From 0de0e198bc7191a0e46cf71f66fec4d07ca91396 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 25 Apr 2017 12:58:31 -0700 Subject: [PATCH 001/249] ACPI / sysfs: fix acpi_get_table() leak / acpi-sysfs denial of service Reading an ACPI table through the /sys/firmware/acpi/tables interface more than 65,536 times leads to the following log message: ACPI Error: Table ffff88033595eaa8, Validation count is zero after increment (20170119/tbutils-423) ...and the table being unavailable until the next reboot. Add the missing acpi_put_table() so the table ->validation_count is decremented after each read. Reported-by: Anush Seetharaman Fixes: 174cc7187e6f "ACPICA: Tables: Back port acpi_get_table_with_size() ..." Signed-off-by: Dan Williams Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index cf05ae973381..5180fef9eb49 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -333,14 +333,17 @@ static ssize_t acpi_table_show(struct file *filp, struct kobject *kobj, container_of(bin_attr, struct acpi_table_attr, attr); struct acpi_table_header *table_header = NULL; acpi_status status; + ssize_t rc; status = acpi_get_table(table_attr->name, table_attr->instance, &table_header); if (ACPI_FAILURE(status)) return -ENODEV; - return memory_read_from_buffer(buf, count, &offset, - table_header, table_header->length); + rc = memory_read_from_buffer(buf, count, &offset, table_header, + table_header->length); + acpi_put_table(table_header); + return rc; } static int acpi_table_attr_init(struct kobject *tables_obj, From 2ac97f0f6654da14312d125005c77a6010e0ea38 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 25 Apr 2017 11:29:56 -0700 Subject: [PATCH 002/249] HID: wacom: Have wacom_tpc_irq guard against possible NULL dereference The following Smatch complaint was generated in response to commit 2a6cdbd ("HID: wacom: Introduce new 'touch_input' device"): drivers/hid/wacom_wac.c:1586 wacom_tpc_irq() error: we previously assumed 'wacom->touch_input' could be null (see line 1577) The 'touch_input' and 'pen_input' variables point to the 'struct input_dev' used for relaying touch and pen events to userspace, respectively. If a device does not have a touch interface or pen interface, the associated input variable is NULL. The 'wacom_tpc_irq()' function is responsible for forwarding input reports to a more-specific IRQ handler function. An unknown report could theoretically be mistaken as e.g. a touch report on a device which does not have a touch interface. This can be prevented by only calling the pen/touch functions are called when the pen/touch pointers are valid. Fixes: 2a6cdbd ("HID: wacom: Introduce new 'touch_input' device") Signed-off-by: Jason Gerecke Reviewed-by: Ping Cheng Cc: stable@vger.kernel.org Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 47 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 4b225fb19a16..e274c9dc32f3 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1571,37 +1571,38 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len) { unsigned char *data = wacom->data; - if (wacom->pen_input) + if (wacom->pen_input) { dev_dbg(wacom->pen_input->dev.parent, "%s: received report #%d\n", __func__, data[0]); - else if (wacom->touch_input) + + if (len == WACOM_PKGLEN_PENABLED || + data[0] == WACOM_REPORT_PENABLED) + return wacom_tpc_pen(wacom); + } + else if (wacom->touch_input) { dev_dbg(wacom->touch_input->dev.parent, "%s: received report #%d\n", __func__, data[0]); - switch (len) { - case WACOM_PKGLEN_TPC1FG: - return wacom_tpc_single_touch(wacom, len); - - case WACOM_PKGLEN_TPC2FG: - return wacom_tpc_mt_touch(wacom); - - case WACOM_PKGLEN_PENABLED: - return wacom_tpc_pen(wacom); - - default: - switch (data[0]) { - case WACOM_REPORT_TPC1FG: - case WACOM_REPORT_TPCHID: - case WACOM_REPORT_TPCST: - case WACOM_REPORT_TPC1FGE: + switch (len) { + case WACOM_PKGLEN_TPC1FG: return wacom_tpc_single_touch(wacom, len); - case WACOM_REPORT_TPCMT: - case WACOM_REPORT_TPCMT2: - return wacom_mt_touch(wacom); + case WACOM_PKGLEN_TPC2FG: + return wacom_tpc_mt_touch(wacom); - case WACOM_REPORT_PENABLED: - return wacom_tpc_pen(wacom); + default: + switch (data[0]) { + case WACOM_REPORT_TPC1FG: + case WACOM_REPORT_TPCHID: + case WACOM_REPORT_TPCST: + case WACOM_REPORT_TPC1FGE: + return wacom_tpc_single_touch(wacom, len); + + case WACOM_REPORT_TPCMT: + case WACOM_REPORT_TPCMT2: + return wacom_mt_touch(wacom); + + } } } From f4b65b9563216b3e01a5cc844c3ba68901d9b195 Mon Sep 17 00:00:00 2001 From: Che-Liang Chiou Date: Fri, 7 Apr 2017 10:12:36 +0200 Subject: [PATCH 003/249] HID: magicmouse: Set multi-touch keybits for Magic Mouse The driver emits multi-touch events for Magic Trackpad as well as Magic Mouse, but it does not set keybits that are related to multi-touch event for Magic Mouse; so set these keybits. The keybits that are not set cause trouble because user programs often probe these keybits for self-configuration and thus they cannot operate properly if the keybits are not set. One of such troubles is that libevdev will not be able to emit correct touch count, causing gestures library failed to do fling stop. Signed-off-by: Che-Liang Chiou Signed-off-by: Thierry Escande Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 20b40ad26325..1d6c997b3001 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -349,6 +349,7 @@ static int magicmouse_raw_event(struct hid_device *hdev, if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { magicmouse_emit_buttons(msc, clicks & 3); + input_mt_report_pointer_emulation(input, true); input_report_rel(input, REL_X, x); input_report_rel(input, REL_Y, y); } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ @@ -388,16 +389,16 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd __clear_bit(BTN_RIGHT, input->keybit); __clear_bit(BTN_MIDDLE, input->keybit); __set_bit(BTN_MOUSE, input->keybit); - __set_bit(BTN_TOOL_FINGER, input->keybit); - __set_bit(BTN_TOOL_DOUBLETAP, input->keybit); - __set_bit(BTN_TOOL_TRIPLETAP, input->keybit); - __set_bit(BTN_TOOL_QUADTAP, input->keybit); - __set_bit(BTN_TOOL_QUINTTAP, input->keybit); - __set_bit(BTN_TOUCH, input->keybit); - __set_bit(INPUT_PROP_POINTER, input->propbit); __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); } + __set_bit(BTN_TOOL_FINGER, input->keybit); + __set_bit(BTN_TOOL_DOUBLETAP, input->keybit); + __set_bit(BTN_TOOL_TRIPLETAP, input->keybit); + __set_bit(BTN_TOOL_QUADTAP, input->keybit); + __set_bit(BTN_TOOL_QUINTTAP, input->keybit); + __set_bit(BTN_TOUCH, input->keybit); + __set_bit(INPUT_PROP_POINTER, input->propbit); __set_bit(EV_ABS, input->evbit); From 0bb7a37f8d15e5fb5d21776875f9fbc74e10753a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20Elio=20Petten=C3=B2?= Date: Wed, 26 Apr 2017 17:37:04 +0100 Subject: [PATCH 004/249] HID: elecom: extend to fix the descriptor for DEFT trackballs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ELECOM DEFT trackballs report only five buttons, when the device actually has 8. Change the descriptor so that the HID driver can see all of them. For completeness and future reference, I included a side-by-side diff of the part of the descriptor that is being edited. Cc: Jiri Kosina Cc: Benjamin Tissoires Cc: Yuxuan Shui Signed-off-by: Diego Elio Pettenò Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 6 ++-- drivers/hid/hid-core.c | 2 ++ drivers/hid/hid-elecom.c | 62 ++++++++++++++++++++++++++++++++++------ drivers/hid/hid-ids.h | 2 ++ 4 files changed, 61 insertions(+), 11 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index fe40e5e499dd..687705c50794 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -275,10 +275,12 @@ config HID_EMS_FF - Trio Linker Plus II config HID_ELECOM - tristate "ELECOM BM084 bluetooth mouse" + tristate "ELECOM HID devices" depends on HID ---help--- - Support for the ELECOM BM084 (bluetooth mouse). + Support for ELECOM devices: + - BM084 Bluetooth Mouse + - DEFT Trackball (Wired and wireless) config HID_ELO tristate "ELO USB 4000/4500 touchscreen" diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 37084b645785..38d041510e1d 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1891,6 +1891,8 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_WN) }, { HID_USB_DEVICE(USB_VENDOR_ID_DREAM_CHEEKY, USB_DEVICE_ID_DREAM_CHEEKY_FA) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0009) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, 0x0030) }, { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) }, diff --git a/drivers/hid/hid-elecom.c b/drivers/hid/hid-elecom.c index 6e3848a8d8dd..e2c7465df69f 100644 --- a/drivers/hid/hid-elecom.c +++ b/drivers/hid/hid-elecom.c @@ -1,10 +1,8 @@ /* - * HID driver for Elecom BM084 (bluetooth mouse). - * Removes a non-existing horizontal wheel from - * the HID descriptor. - * (This module is based on "hid-ortek".) - * + * HID driver for ELECOM devices. * Copyright (c) 2010 Richard Nauber + * Copyright (c) 2016 Yuxuan Shui + * Copyright (c) 2017 Diego Elio Pettenò */ /* @@ -23,15 +21,61 @@ static __u8 *elecom_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { - hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n"); - rdesc[47] = 0x00; + switch (hdev->product) { + case USB_DEVICE_ID_ELECOM_BM084: + /* The BM084 Bluetooth mouse includes a non-existing horizontal + * wheel in the HID descriptor. */ + if (*rsize >= 48 && rdesc[46] == 0x05 && rdesc[47] == 0x0c) { + hid_info(hdev, "Fixing up Elecom BM084 report descriptor\n"); + rdesc[47] = 0x00; + } + break; + case USB_DEVICE_ID_ELECOM_DEFT_WIRED: + case USB_DEVICE_ID_ELECOM_DEFT_WIRELESS: + /* The DEFT trackball has eight buttons, but its descriptor only + * reports five, disabling the three Fn buttons on the top of + * the mouse. + * + * Apply the following diff to the descriptor: + * + * Collection (Physical), Collection (Physical), + * Report ID (1), Report ID (1), + * Report Count (5), -> Report Count (8), + * Report Size (1), Report Size (1), + * Usage Page (Button), Usage Page (Button), + * Usage Minimum (01h), Usage Minimum (01h), + * Usage Maximum (05h), -> Usage Maximum (08h), + * Logical Minimum (0), Logical Minimum (0), + * Logical Maximum (1), Logical Maximum (1), + * Input (Variable), Input (Variable), + * Report Count (1), -> Report Count (0), + * Report Size (3), Report Size (3), + * Input (Constant), Input (Constant), + * Report Size (16), Report Size (16), + * Report Count (2), Report Count (2), + * Usage Page (Desktop), Usage Page (Desktop), + * Usage (X), Usage (X), + * Usage (Y), Usage (Y), + * Logical Minimum (-32768), Logical Minimum (-32768), + * Logical Maximum (32767), Logical Maximum (32767), + * Input (Variable, Relative), Input (Variable, Relative), + * End Collection, End Collection, + */ + if (*rsize == 213 && rdesc[13] == 5 && rdesc[21] == 5) { + hid_info(hdev, "Fixing up Elecom DEFT Fn buttons\n"); + rdesc[13] = 8; /* Button/Variable Report Count */ + rdesc[21] = 8; /* Button/Variable Usage Maximum */ + rdesc[29] = 0; /* Button/Constant Report Count */ + } + break; } return rdesc; } static const struct hid_device_id elecom_devices[] = { - { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084)}, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRED) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_DEFT_WIRELESS) }, { } }; MODULE_DEVICE_TABLE(hid, elecom_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 643390ba749d..8e8a1baee090 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -358,6 +358,8 @@ #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 +#define USB_DEVICE_ID_ELECOM_DEFT_WIRED 0x00fe +#define USB_DEVICE_ID_ELECOM_DEFT_WIRELESS 0x00ff #define USB_VENDOR_ID_DREAM_CHEEKY 0x1d34 #define USB_DEVICE_ID_DREAM_CHEEKY_WN 0x0004 From eb8df543e444492328f506adffc7dfe94111f1bd Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:44 +0200 Subject: [PATCH 005/249] dmaengine: mv_xor_v2: handle mv_xor_v2_prep_sw_desc() error properly The mv_xor_v2_prep_sw_desc() is called from a few different places in the driver, but we never take into account the fact that it might return NULL. This commit fixes that, ensuring that we don't panic if there are no more descriptors available. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index a28a01fcba67..e9280207ac19 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -389,6 +389,8 @@ mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, __func__, len, &src, &dest, flags); sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; sw_desc->async_tx.flags = flags; @@ -443,6 +445,8 @@ mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, __func__, src_cnt, len, &dest, flags); sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; sw_desc->async_tx.flags = flags; @@ -491,6 +495,8 @@ mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) container_of(chan, struct mv_xor_v2_device, dmachan); sw_desc = mv_xor_v2_prep_sw_desc(xor_dev); + if (!sw_desc) + return NULL; /* set the HW descriptor */ hw_descriptor = &sw_desc->hw_desc; From 2aab4e18152cd30cb5d2f4c27629fc8a04aed979 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:45 +0200 Subject: [PATCH 006/249] dmaengine: mv_xor_v2: properly handle wrapping in the array of HW descriptors mv_xor_v2_tasklet() is looping over completed HW descriptors. Before the loop, it initializes 'next_pending_hw_desc' to the first HW descriptor to handle, and then the loop simply increments this point, without taking care of wrapping when we reach the last HW descriptor. The 'pending_ptr' index was being wrapped back to 0 at the end, but it wasn't used in each iteration of the loop to calculate next_pending_hw_desc. This commit fixes that, and makes next_pending_hw_desc a variable local to the loop itself. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index e9280207ac19..bdfb36ecff81 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -560,7 +560,6 @@ static void mv_xor_v2_tasklet(unsigned long data) { struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data; int pending_ptr, num_of_pending, i; - struct mv_xor_v2_descriptor *next_pending_hw_desc = NULL; struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL; dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__); @@ -568,17 +567,10 @@ static void mv_xor_v2_tasklet(unsigned long data) /* get the pending descriptors parameters */ num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr); - /* next HW descriptor */ - next_pending_hw_desc = xor_dev->hw_desq_virt + pending_ptr; - /* loop over free descriptors */ for (i = 0; i < num_of_pending; i++) { - - if (pending_ptr > MV_XOR_V2_DESC_NUM) - pending_ptr = 0; - - if (next_pending_sw_desc != NULL) - next_pending_hw_desc++; + struct mv_xor_v2_descriptor *next_pending_hw_desc = + xor_dev->hw_desq_virt + pending_ptr; /* get the SW descriptor related to the HW descriptor */ next_pending_sw_desc = @@ -614,6 +606,8 @@ static void mv_xor_v2_tasklet(unsigned long data) /* increment the next descriptor */ pending_ptr++; + if (pending_ptr >= MV_XOR_V2_DESC_NUM) + pending_ptr = 0; } if (num_of_pending != 0) { From bc473da1ed726c975ad47f8d7d27631de11356d8 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:46 +0200 Subject: [PATCH 007/249] dmaengine: mv_xor_v2: do not use descriptors not acked by async_tx Descriptors that have not been acknowledged by the async_tx layer should not be re-used, so this commit adjusts the implementation of mv_xor_v2_prep_sw_desc() to skip descriptors for which async_tx_test_ack() is false. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index bdfb36ecff81..cb60e7c4aa16 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -344,6 +344,7 @@ static struct mv_xor_v2_sw_desc * mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev) { struct mv_xor_v2_sw_desc *sw_desc; + bool found = false; /* Lock the channel */ spin_lock_bh(&xor_dev->lock); @@ -355,19 +356,23 @@ mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev) return NULL; } - /* get a free SW descriptor from the SW DESQ */ - sw_desc = list_first_entry(&xor_dev->free_sw_desc, - struct mv_xor_v2_sw_desc, free_list); + list_for_each_entry(sw_desc, &xor_dev->free_sw_desc, free_list) { + if (async_tx_test_ack(&sw_desc->async_tx)) { + found = true; + break; + } + } + + if (!found) { + spin_unlock_bh(&xor_dev->lock); + return NULL; + } + list_del(&sw_desc->free_list); /* Release the channel */ spin_unlock_bh(&xor_dev->lock); - /* set the async tx descriptor */ - dma_async_tx_descriptor_init(&sw_desc->async_tx, &xor_dev->dmachan); - sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit; - async_tx_ack(&sw_desc->async_tx); - return sw_desc; } @@ -785,8 +790,15 @@ static int mv_xor_v2_probe(struct platform_device *pdev) /* add all SW descriptors to the free list */ for (i = 0; i < MV_XOR_V2_DESC_NUM; i++) { - xor_dev->sw_desq[i].idx = i; - list_add(&xor_dev->sw_desq[i].free_list, + struct mv_xor_v2_sw_desc *sw_desc = + xor_dev->sw_desq + i; + sw_desc->idx = i; + dma_async_tx_descriptor_init(&sw_desc->async_tx, + &xor_dev->dmachan); + sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit; + async_tx_ack(&sw_desc->async_tx); + + list_add(&sw_desc->free_list, &xor_dev->free_sw_desc); } From ab2c5f0a77fe49bdb6e307b397496373cb47d2c2 Mon Sep 17 00:00:00 2001 From: Hanna Hawa Date: Fri, 5 May 2017 11:57:47 +0200 Subject: [PATCH 008/249] dmaengine: mv_xor_v2: enable XOR engine after its configuration The engine was enabled prior to its configuration, which isn't correct. This patch relocates the activation of the XOR engine, to be after the configuration of the XOR engine. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Hanna Hawa Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index cb60e7c4aa16..211b8c0e3cfb 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -653,9 +653,6 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev) writel((xor_dev->hw_desq & 0xFFFF00000000) >> 32, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF); - /* enable the DMA engine */ - writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF); - /* * This is a temporary solution, until we activate the * SMMU. Set the attributes for reading & writing data buffers @@ -699,6 +696,9 @@ static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev) reg |= MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL; writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE); + /* enable the DMA engine */ + writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF); + return 0; } From 44d5887a8bf1e86915c8ff647337cb138149da82 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:48 +0200 Subject: [PATCH 009/249] dmaengine: mv_xor_v2: fix tx_submit() implementation The mv_xor_v2_tx_submit() gets the next available HW descriptor by calling mv_xor_v2_get_desq_write_ptr(), which reads a HW register telling the next available HW descriptor. This was working fine when HW descriptors were issued for processing directly in tx_submit(). However, as part of the review process of the driver, a change was requested to move the actual kick-off of HW descriptors processing to ->issue_pending(). Due to this, reading the HW register to know the next available HW descriptor no longer works. So instead of using this HW register, we implemented a software index pointing to the next available HW descriptor. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 211b8c0e3cfb..4684eceea759 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -161,6 +161,7 @@ struct mv_xor_v2_device { struct mv_xor_v2_sw_desc *sw_desq; int desc_size; unsigned int npendings; + unsigned int hw_queue_idx; }; /** @@ -213,18 +214,6 @@ static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev, } } -/* - * Return the next available index in the DESQ. - */ -static int mv_xor_v2_get_desq_write_ptr(struct mv_xor_v2_device *xor_dev) -{ - /* read the index for the next available descriptor in the DESQ */ - u32 reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ALLOC_OFF); - - return ((reg >> MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_SHIFT) - & MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_MASK); -} - /* * notify the engine of new descriptors, and update the available index. */ @@ -306,7 +295,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data) static dma_cookie_t mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx) { - int desq_ptr; void *dest_hw_desc; dma_cookie_t cookie; struct mv_xor_v2_sw_desc *sw_desc = @@ -322,15 +310,15 @@ mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_bh(&xor_dev->lock); cookie = dma_cookie_assign(tx); - /* get the next available slot in the DESQ */ - desq_ptr = mv_xor_v2_get_desq_write_ptr(xor_dev); - /* copy the HW descriptor from the SW descriptor to the DESQ */ - dest_hw_desc = xor_dev->hw_desq_virt + desq_ptr; + dest_hw_desc = xor_dev->hw_desq_virt + xor_dev->hw_queue_idx; memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size); xor_dev->npendings++; + xor_dev->hw_queue_idx++; + if (xor_dev->hw_queue_idx >= MV_XOR_V2_DESC_NUM) + xor_dev->hw_queue_idx = 0; spin_unlock_bh(&xor_dev->lock); From 9dd4f319bac25334a869d9276b19eac9e478fd33 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:49 +0200 Subject: [PATCH 010/249] dmaengine: mv_xor_v2: remove interrupt coalescing The current implementation of interrupt coalescing doesn't work, because it doesn't configure the coalescing timer, which is needed to make sure we get an interrupt at some point. As a fix for stable, we simply remove the interrupt coalescing functionality. It will be re-introduced properly in a future commit. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 4684eceea759..b133fe29d788 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -246,22 +246,6 @@ static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev) return MV_XOR_V2_EXT_DESC_SIZE; } -/* - * Set the IMSG threshold - */ -static inline -void mv_xor_v2_set_imsg_thrd(struct mv_xor_v2_device *xor_dev, int thrd_val) -{ - u32 reg; - - reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF); - - reg &= (~MV_XOR_V2_DMA_IMSG_THRD_MASK << MV_XOR_V2_DMA_IMSG_THRD_SHIFT); - reg |= (thrd_val << MV_XOR_V2_DMA_IMSG_THRD_SHIFT); - - writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF); -} - static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data) { struct mv_xor_v2_device *xor_dev = data; @@ -277,12 +261,6 @@ static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data) if (!ndescs) return IRQ_NONE; - /* - * Update IMSG threshold, to disable new IMSG interrupts until - * end of the tasklet - */ - mv_xor_v2_set_imsg_thrd(xor_dev, MV_XOR_V2_DESC_NUM); - /* schedule a tasklet to handle descriptors callbacks */ tasklet_schedule(&xor_dev->irq_tasklet); @@ -607,9 +585,6 @@ static void mv_xor_v2_tasklet(unsigned long data) /* free the descriptores */ mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending); } - - /* Update IMSG threshold, to enable new IMSG interrupts */ - mv_xor_v2_set_imsg_thrd(xor_dev, 0); } /* From b2d3c270f9f2fb82518ac500a9849c3aaf503852 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 5 May 2017 11:57:50 +0200 Subject: [PATCH 011/249] dmaengine: mv_xor_v2: set DMA mask to 40 bits The XORv2 engine on Armada 7K/8K can only access the first 40 bits of the physical address space, so the DMA mask must be set accordingly. Fixes: 19a340b1a820 ("dmaengine: mv_xor_v2: new driver") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Vinod Koul --- drivers/dma/mv_xor_v2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index b133fe29d788..f3e211f8f6c5 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -693,6 +693,10 @@ static int mv_xor_v2_probe(struct platform_device *pdev) platform_set_drvdata(pdev, xor_dev); + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; + xor_dev->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) return -EPROBE_DEFER; From 72d42504bd7faa6de1c1644b5e46652933e040a6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 May 2017 13:34:29 +0200 Subject: [PATCH 012/249] ovl: select EXPORTFS We get a link error when EXPORTFS is not enabled: ERROR: "exportfs_encode_fh" [fs/overlayfs/overlay.ko] undefined! ERROR: "exportfs_decode_fh" [fs/overlayfs/overlay.ko] undefined! This adds a Kconfig 'select' statement for overlayfs, the same way that it is done for the other users of exportfs. Fixes: 3a1e819b4e80 ("ovl: store file handle of lower inode on copy up") Signed-off-by: Arnd Bergmann Signed-off-by: Miklos Szeredi --- fs/overlayfs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 0daac5112f7a..c0c9683934b7 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,5 +1,6 @@ config OVERLAY_FS tristate "Overlay filesystem support" + select EXPORTFS help An overlay filesystem combines two filesystems - an 'upper' filesystem and a 'lower' filesystem. When a name exists in both filesystems, the From 98883f1b5415ea9dce60d5178877d15f4faa10b8 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Tue, 9 May 2017 11:50:26 -0500 Subject: [PATCH 013/249] ibmvscsis: Clear left-over abort_cmd pointers With the addition of ibmvscsis->abort_cmd pointer within commit 25e78531268e ("ibmvscsis: Do not send aborted task response"), make sure to explicitly NULL these pointers when clearing DELAY_SEND flag. Do this for two cases, when getting the new new ibmvscsis descriptor in ibmvscsis_get_free_cmd() and before posting the response completion in ibmvscsis_send_messages(). Signed-off-by: Bryant G. Ly Reviewed-by: Michael Cyr Cc: # v4.8+ Signed-off-by: Nicholas Bellinger --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index d390325c99ec..ee64241865e6 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1170,6 +1170,8 @@ static struct ibmvscsis_cmd *ibmvscsis_get_free_cmd(struct scsi_info *vscsi) cmd = list_first_entry_or_null(&vscsi->free_cmd, struct ibmvscsis_cmd, list); if (cmd) { + if (cmd->abort_cmd) + cmd->abort_cmd = NULL; cmd->flags &= ~(DELAY_SEND); list_del(&cmd->list); cmd->iue = iue; @@ -1774,6 +1776,7 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi) if (cmd->abort_cmd) { retry = true; cmd->abort_cmd->flags &= ~(DELAY_SEND); + cmd->abort_cmd = NULL; } /* From 75dbf2d36f6b122ad3c1070fe4bf95f71bbff321 Mon Sep 17 00:00:00 2001 From: "Bryant G. Ly" Date: Wed, 10 May 2017 14:35:47 -0500 Subject: [PATCH 014/249] ibmvscsis: Fix the incorrect req_lim_delta The current code is not correctly calculating the req_lim_delta. We want to make sure vscsi->credit is always incremented when we do not send a response for the scsi op. Thus for the case where there is a successfully aborted task we need to make sure the vscsi->credit is incremented. v2 - Moves the original location of the vscsi->credit increment to a better spot. Since if we increment credit, the next command we send back will have increased req_lim_delta. But we probably shouldn't be doing that until the aborted cmd is actually released. Otherwise the client will think that it can send a new command, and we could find ourselves short of command elements. Not likely, but could happen. This patch depends on both: commit 25e78531268e ("ibmvscsis: Do not send aborted task response") commit 98883f1b5415 ("ibmvscsis: Clear left-over abort_cmd pointers") Signed-off-by: Bryant G. Ly Reviewed-by: Michael Cyr Cc: # v4.8+ Signed-off-by: Nicholas Bellinger --- drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c index ee64241865e6..abf6026645dd 100644 --- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c +++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c @@ -1791,6 +1791,25 @@ static void ibmvscsis_send_messages(struct scsi_info *vscsi) list_del(&cmd->list); ibmvscsis_free_cmd_resources(vscsi, cmd); + /* + * With a successfully aborted op + * through LIO we want to increment the + * the vscsi credit so that when we dont + * send a rsp to the original scsi abort + * op (h_send_crq), but the tm rsp to + * the abort is sent, the credit is + * correctly sent with the abort tm rsp. + * We would need 1 for the abort tm rsp + * and 1 credit for the aborted scsi op. + * Thus we need to increment here. + * Also we want to increment the credit + * here because we want to make sure + * cmd is actually released first + * otherwise the client will think it + * it can send a new cmd, and we could + * find ourselves short of cmd elements. + */ + vscsi->credit += 1; } else { iue = cmd->iue; @@ -2965,10 +2984,7 @@ static long srp_build_response(struct scsi_info *vscsi, rsp->opcode = SRP_RSP; - if (vscsi->credit > 0 && vscsi->state == SRP_PROCESSING) - rsp->req_lim_delta = cpu_to_be32(vscsi->credit); - else - rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit); + rsp->req_lim_delta = cpu_to_be32(1 + vscsi->credit); rsp->tag = cmd->rsp.tag; rsp->flags = 0; From 4ff83daa0200affe1894bd33d17bac404e3d78d4 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 11 May 2017 01:07:24 -0700 Subject: [PATCH 015/249] target: Re-add check to reject control WRITEs with overflow data During v4.3 when the overflow/underflow check was relaxed by commit c72c525022: commit c72c5250224d475614a00c1d7e54a67f77cd3410 Author: Roland Dreier Date: Wed Jul 22 15:08:18 2015 -0700 target: allow underflow/overflow for PR OUT etc. commands to allow underflow/overflow for Windows compliance + FCP, a consequence was to allow control CDBs to process overflow data for iscsi-target with immediate data as well. As per Roland's original change, continue to allow underflow cases for control CDBs to make Windows compliance + FCP happy, but until overflow for control CDBs is supported tree-wide, explicitly reject all control WRITEs with overflow following pre v4.3.y logic. Reported-by: Bart Van Assche Cc: Roland Dreier Cc: # v4.3+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 37f57357d4a0..6025935036c9 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1160,15 +1160,28 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size) if (cmd->unknown_data_length) { cmd->data_length = size; } else if (size != cmd->data_length) { - pr_warn("TARGET_CORE[%s]: Expected Transfer Length:" + pr_warn_ratelimited("TARGET_CORE[%s]: Expected Transfer Length:" " %u does not match SCSI CDB Length: %u for SAM Opcode:" " 0x%02x\n", cmd->se_tfo->get_fabric_name(), cmd->data_length, size, cmd->t_task_cdb[0]); - if (cmd->data_direction == DMA_TO_DEVICE && - cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) { - pr_err("Rejecting underflow/overflow WRITE data\n"); - return TCM_INVALID_CDB_FIELD; + if (cmd->data_direction == DMA_TO_DEVICE) { + if (cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) { + pr_err_ratelimited("Rejecting underflow/overflow" + " for WRITE data CDB\n"); + return TCM_INVALID_CDB_FIELD; + } + /* + * Some fabric drivers like iscsi-target still expect to + * always reject overflow writes. Reject this case until + * full fabric driver level support for overflow writes + * is introduced tree-wide. + */ + if (size > cmd->data_length) { + pr_err_ratelimited("Rejecting overflow for" + " WRITE control CDB\n"); + return TCM_INVALID_CDB_FIELD; + } } /* * Reject READ_* or WRITE_* with overflow/underflow for From 9a445bbb1607d9f14556a532453dd86d1b7e381e Mon Sep 17 00:00:00 2001 From: Hiroyuki Yokoyama Date: Mon, 15 May 2017 17:49:52 +0900 Subject: [PATCH 016/249] dmaengine: usb-dmac: Fix DMAOR AE bit definition This patch fixes the register definition of AE (Address Error flag) bit. Fixes: 0c1c8ff32fa2 ("dmaengine: usb-dmac: Add Renesas USB DMA Controller (USB-DMAC) driver") Cc: # v4.1+ Signed-off-by: Hiroyuki Yokoyama [Shimoda: add Fixes and Cc tags in the commit log] Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Signed-off-by: Vinod Koul --- drivers/dma/sh/usb-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c index 72c649713ace..31a145154e9f 100644 --- a/drivers/dma/sh/usb-dmac.c +++ b/drivers/dma/sh/usb-dmac.c @@ -117,7 +117,7 @@ struct usb_dmac { #define USB_DMASWR 0x0008 #define USB_DMASWR_SWR (1 << 0) #define USB_DMAOR 0x0060 -#define USB_DMAOR_AE (1 << 2) +#define USB_DMAOR_AE (1 << 1) #define USB_DMAOR_DME (1 << 0) #define USB_DMASAR 0x0000 From 6ceec6953efe7f371218082ddee1fe022a13e8ab Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 16 May 2017 16:54:53 +0200 Subject: [PATCH 017/249] MAINTAINERS: update RTC mailing list The RTC subsystem mailing list is moving to vger. Signed-off-by: Alexandre Belloni --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f7d568b8f133..b3863ec142d7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10447,7 +10447,7 @@ S: Orphan PXA RTC DRIVER M: Robert Jarzmik -L: rtc-linux@googlegroups.com +L: linux-rtc@vger.kernel.org S: Maintained QAT DRIVER @@ -10754,7 +10754,7 @@ X: kernel/torture.c REAL TIME CLOCK (RTC) SUBSYSTEM M: Alessandro Zummo M: Alexandre Belloni -L: rtc-linux@googlegroups.com +L: linux-rtc@vger.kernel.org Q: http://patchwork.ozlabs.org/project/rtc-linux/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git S: Maintained From 9512a16b0e1217bbef73d276a67c28b5fbb46512 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 16 May 2017 15:57:42 -0400 Subject: [PATCH 018/249] nfsd: Revert "nfsd: check for oversized NFSv2/v3 arguments" This reverts commit 51f567777799 "nfsd: check for oversized NFSv2/v3 arguments", which breaks support for NFSv3 ACLs. That patch was actually an earlier draft of a fix for the problem that was eventually fixed by e6838a29ecb "nfsd: check for oversized NFSv2/v3 arguments". But somehow I accidentally left this earlier draft in the branch that was part of my 2.12 pull request. Reported-by: Eryu Guan Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3xdr.c | 23 ++++++----------------- fs/nfsd/nfsxdr.c | 13 +++---------- include/linux/sunrpc/svc.h | 3 ++- 3 files changed, 11 insertions(+), 28 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 12feac6ee2fd..452334694a5d 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -334,11 +334,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, if (!p) return 0; p = xdr_decode_hyper(p, &args->offset); + args->count = ntohl(*p++); - - if (!xdr_argsize_check(rqstp, p)) - return 0; - len = min(args->count, max_blocksize); /* set up the kvec */ @@ -352,7 +349,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, v++; } args->vlen = v; - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -544,11 +541,9 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, p = decode_fh(p, &args->fh); if (!p) return 0; - if (!xdr_argsize_check(rqstp, p)) - return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -574,14 +569,10 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, args->verf = p; p += 2; args->dircount = ~0; args->count = ntohl(*p++); - - if (!xdr_argsize_check(rqstp, p)) - return 0; - args->count = min_t(u32, args->count, PAGE_SIZE); args->buffer = page_address(*(rqstp->rq_next_page++)); - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -599,9 +590,6 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->dircount = ntohl(*p++); args->count = ntohl(*p++); - if (!xdr_argsize_check(rqstp, p)) - return 0; - len = args->count = min(args->count, max_blocksize); while (len > 0) { struct page *p = *(rqstp->rq_next_page++); @@ -609,7 +597,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->buffer = page_address(p); len -= PAGE_SIZE; } - return 1; + + return xdr_argsize_check(rqstp, p); } int diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 6a4947a3f4fa..de07ff625777 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -257,9 +257,6 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, len = args->count = ntohl(*p++); p++; /* totalcount - unused */ - if (!xdr_argsize_check(rqstp, p)) - return 0; - len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2); /* set up somewhere to store response. @@ -275,7 +272,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, v++; } args->vlen = v; - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -365,11 +362,9 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readli p = decode_fh(p, &args->fh); if (!p) return 0; - if (!xdr_argsize_check(rqstp, p)) - return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); - return 1; + return xdr_argsize_check(rqstp, p); } int @@ -407,11 +402,9 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, args->cookie = ntohl(*p++); args->count = ntohl(*p++); args->count = min_t(u32, args->count, PAGE_SIZE); - if (!xdr_argsize_check(rqstp, p)) - return 0; args->buffer = page_address(*(rqstp->rq_next_page++)); - return 1; + return xdr_argsize_check(rqstp, p); } /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 94631026f79c..11cef5a7bc87 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -336,7 +336,8 @@ xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) { char *cp = (char *)p; struct kvec *vec = &rqstp->rq_arg.head[0]; - return cp == (char *)vec->iov_base + vec->iov_len; + return cp >= (char*)vec->iov_base + && cp <= (char*)vec->iov_base + vec->iov_len; } static inline int From 4681ee21d62cfed4364e09ec50ee8e88185dd628 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 18 May 2017 11:49:39 +0300 Subject: [PATCH 019/249] drm/i915: Do not sync RCU during shrinking Due to the complex dependencies between workqueues and RCU, which are not easily detected by lockdep, do not synchronize RCU during shrinking. On low-on-memory systems (mem=1G for example), the RCU sync leads to all system workqueus freezing and unrelated lockdep splats are displayed according to reports. GIT bisecting done by J. R. Okajima points to the commit where RCU syncing was extended. RCU sync gains us very little benefit in real life scenarios where the amount of memory used by object backing storage is dominant over the metadata under RCU, so drop it altogether. " Yeeeaah, if core could just, go ahead and reclaim RCU queues, that'd be great. " - Chris Wilson, 2016 (0eafec6d3244) v2: More information to commit message. v3: Remove "grep _rcu_" escapee from i915_gem_shrink_all (Andrea) Fixes: c053b5a506d3 ("drm/i915: Don't call synchronize_rcu_expedited under struct_mutex") Suggested-by: Chris Wilson Reported-by: J. R. Okajima Signed-off-by: Joonas Lahtinen Reviewed-by: Chris Wilson Tested-by: Hugh Dickins Tested-by: Andrea Arcangeli Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: J. R. Okajima Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jani Nikula Cc: # v4.11+ (cherry picked from commit 73cc0b9aa9afa5ba65d92e46ded61d29430d72a4) Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1495097379-573-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 129ed303a6c4..57d9f7f4ef15 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -59,9 +59,6 @@ static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock) return; mutex_unlock(&dev->struct_mutex); - - /* expedite the RCU grace period to free some request slabs */ - synchronize_rcu_expedited(); } static bool any_vma_pinned(struct drm_i915_gem_object *obj) @@ -274,8 +271,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) I915_SHRINK_ACTIVE); intel_runtime_pm_put(dev_priv); - synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */ - return freed; } From 171d8b9363725e122b164e6b9ef2acf2f751e387 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 16 May 2017 09:55:14 +0100 Subject: [PATCH 020/249] drm/i915: use vma->size for appgtt allocate_va_range For the aliasing ppgtt we clear the va range up to vma->size, but seem to allocate up to vma->node.size, which is a little inconsistent given that vma->node.size >= vma->size. Not that is really matters all that much since we preallocate anyway, but for consistency just use vma->size. Fixes: ff685975d97f ("drm/i915: Move allocate_va_range to GTT") Signed-off-by: Matthew Auld Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170516085514.5853-1-matthew.auld@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit d567232cbd9ec2a289ddffea4013b7265bbcc3d5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a0563e18d753..50b8f1139ff9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2313,7 +2313,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, appgtt->base.allocate_va_range) { ret = appgtt->base.allocate_va_range(&appgtt->base, vma->node.start, - vma->node.size); + vma->size); if (ret) goto err_pages; } From d8db7ae4eeebb278aa68a231a003e7102f635a4d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 18 May 2017 13:06:44 +0200 Subject: [PATCH 021/249] drm/i915: Fix new -Wint-in-bool-context gcc compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit fixes the following compiler warning: drivers/gpu/drm/i915/intel_dsi.c: In function ‘intel_dsi_prepare’: drivers/gpu/drm/i915/intel_dsi.c:1487:23: warning: ?: using integer constants in boolean context [-Wint-in-bool-context] PORT_A ? PORT_C : PORT_A), Fixes: f4c3a88e5f04 ("drm/i915: Tighten mmio arrays for MIPI_PORT") Signed-off-by: Hans de Goede Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170518110644.9902-1-hdegoede@redhat.com (cherry picked from commit 0ad4dc887d4168448e8c801aa4edd8fe1e0bd534) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5a7c63e64381..65b837e96fe6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8280,7 +8280,7 @@ enum { /* MIPI DSI registers */ -#define _MIPI_PORT(port, a, c) ((port) ? c : a) /* ports A and C only */ +#define _MIPI_PORT(port, a, c) (((port) == PORT_A) ? a : c) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) #define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) From 8137ae26d25303e7b5cfb418fd28b976461e5b6e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 16 May 2017 08:45:46 +0300 Subject: [PATCH 022/249] ovl: fix creds leak in copy up error path Fixes: 42f269b92540 ("ovl: rearrange code in ovl_copy_up_locked()") Cc: # v4.11 Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 9008ab9fbd2e..061a8448e6c4 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -343,12 +343,13 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, temp = ovl_do_tmpfile(upperdir, stat->mode); else temp = ovl_lookup_temp(workdir, dentry); - err = PTR_ERR(temp); - if (IS_ERR(temp)) - goto out1; - err = 0; - if (!tmpfile) + if (IS_ERR(temp)) { + err = PTR_ERR(temp); + temp = NULL; + } + + if (!err && !tmpfile) err = ovl_create_real(wdir, temp, &cattr, NULL, true); if (new_creds) { From 82b749b2c65e9d108c1c5598dc0a5f436b525f42 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 17 May 2017 00:12:40 +0300 Subject: [PATCH 023/249] ovl: check on mount time if upper fs supports setting xattr xattr are needed by overlayfs for setting opaque dir, redirect dir and copy up origin. Check at mount time by trying to set the overlay.opaque xattr on the workdir and if that fails issue a warning message. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 3 +++ fs/overlayfs/ovl_entry.h | 1 + fs/overlayfs/super.c | 13 +++++++++++++ fs/overlayfs/util.c | 21 +++++++++++++++++++++ 4 files changed, 38 insertions(+) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index caa36cb9c46d..ce7c3aba61e4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -279,3 +279,6 @@ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index b2023ddb8532..ad86c0a302eb 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -28,6 +28,7 @@ struct ovl_fs { /* creds of process who forced instantiation of super block */ const struct cred *creator_cred; bool tmpfile; + bool noxattr; wait_queue_head_t copyup_wq; /* sb common to all layers */ struct super_block *same_sb; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 9828b7de8999..f1647626a882 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -891,6 +891,19 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) dput(temp); else pr_warn("overlayfs: upper fs does not support tmpfile.\n"); + + /* + * Check if upper/work fs supports trusted.overlay.* + * xattr + */ + err = ovl_do_setxattr(ufs->workdir, OVL_XATTR_OPAQUE, + "0", 1, 0); + if (err) { + ufs->noxattr = true; + pr_warn("overlayfs: upper fs does not support xattr.\n"); + } else { + vfs_removexattr(ufs->workdir, OVL_XATTR_OPAQUE); + } } } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index cfdea47313a1..b5a0dc36ee96 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -303,3 +303,24 @@ void ovl_copy_up_end(struct dentry *dentry) wake_up_locked(&ofs->copyup_wq); spin_unlock(&ofs->copyup_wq.lock); } + +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr) +{ + int err; + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + + if (ofs->noxattr) + return xerr; + + err = ovl_do_setxattr(upperdentry, name, value, size, 0); + + if (err == -EOPNOTSUPP) { + pr_warn("overlayfs: cannot set %s xattr on upper\n", name); + ofs->noxattr = true; + return xerr; + } + + return err; +} From 6266d465bde044a105f6c2d4e244680f951a2d70 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 18 May 2017 16:11:24 +0200 Subject: [PATCH 024/249] ovl: don't fail copy-up if upper doesn't support xattr Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 061a8448e6c4..f92ab35d43a6 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -300,7 +300,11 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, return PTR_ERR(fh); } - err = ovl_do_setxattr(upper, OVL_XATTR_ORIGIN, fh, fh ? fh->len : 0, 0); + /* + * Do not fail when upper doesn't support xattrs. + */ + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh, + fh ? fh->len : 0, 0); kfree(fh); return err; From 21a228781104ae6fed7e720137ab024575071feb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 17 May 2017 00:12:41 +0300 Subject: [PATCH 025/249] ovl: handle rename when upper doesn't support xattr On failure to set opaque/redirect xattr on rename, skip setting xattr and return -EXDEV. On failure to set opaque xattr when creating a new directory, -EIO is returned instead of -EOPNOTSUPP. Any failure to set those xattr will be recorded in super block and then setting any xattr on upper won't be attempted again. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 29 +++++++++++++++++++---------- fs/overlayfs/overlayfs.h | 1 - fs/overlayfs/util.c | 9 +-------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 723b98b90698..80e0e202a346 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -127,17 +127,28 @@ int ovl_create_real(struct inode *dir, struct dentry *newdentry, return err; } -static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) +static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, + int xerr) { int err; - err = ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); + err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); return err; } +static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) +{ + /* + * Fail with -EIO when trying to create opaque dir and upper doesn't + * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to + * return a specific error for noxattr case. + */ + return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); +} + /* Common operations required to be done after creation of file on upper */ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) @@ -846,18 +857,16 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_do_setxattr(ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, - redirect, strlen(redirect), 0); + err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + OVL_XATTR_REDIRECT, + redirect, strlen(redirect), -EXDEV); if (!err) { spin_lock(&dentry->d_lock); ovl_dentry_set_redirect(dentry, redirect); spin_unlock(&dentry->d_lock); } else { kfree(redirect); - if (err == -EOPNOTSUPP) - ovl_clear_redirect_dir(dentry->d_sb); - else - pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); + pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); /* Fall back to userspace copy-up */ err = -EXDEV; } @@ -992,7 +1001,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(old)) err = ovl_set_redirect(old, samedir); else if (!old_opaque && ovl_type_merge(new->d_parent)) - err = ovl_set_opaque(old, olddentry); + err = ovl_set_opaque_xerr(old, olddentry, -EXDEV); if (err) goto out_dput; } @@ -1000,7 +1009,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (ovl_type_merge_or_lower(new)) err = ovl_set_redirect(new, samedir); else if (!new_opaque && ovl_type_merge(old->d_parent)) - err = ovl_set_opaque(new, newdentry); + err = ovl_set_opaque_xerr(new, newdentry, -EXDEV); if (err) goto out_dput; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ce7c3aba61e4..505b18b56330 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -206,7 +206,6 @@ bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); -void ovl_clear_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index b5a0dc36ee96..4d541a8d0834 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -191,14 +191,7 @@ bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; - return ofs->config.redirect_dir; -} - -void ovl_clear_redirect_dir(struct super_block *sb) -{ - struct ovl_fs *ofs = sb->s_fs_info; - - ofs->config.redirect_dir = false; + return ofs->config.redirect_dir && !ofs->noxattr; } const char *ovl_dentry_get_redirect(struct dentry *dentry) From 3d27573ce32b47ba54e6680c77c26a700d67cc16 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 19 May 2017 09:33:49 +0200 Subject: [PATCH 026/249] ovl: remove unused arg from ovl_lookup_temp() Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/dir.c | 8 ++++---- fs/overlayfs/overlayfs.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index f92ab35d43a6..843ed2a2d7db 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -346,7 +346,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, if (tmpfile) temp = ovl_do_tmpfile(upperdir, stat->mode); else - temp = ovl_lookup_temp(workdir, dentry); + temp = ovl_lookup_temp(workdir); err = 0; if (IS_ERR(temp)) { err = PTR_ERR(temp); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 80e0e202a346..369ee7c4cdc0 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -41,7 +41,7 @@ void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) } } -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) +struct dentry *ovl_lookup_temp(struct dentry *workdir) { struct dentry *temp; char name[20]; @@ -68,7 +68,7 @@ static struct dentry *ovl_whiteout(struct dentry *workdir, struct dentry *whiteout; struct inode *wdir = workdir->d_inode; - whiteout = ovl_lookup_temp(workdir, dentry); + whiteout = ovl_lookup_temp(workdir); if (IS_ERR(whiteout)) return whiteout; @@ -261,7 +261,7 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, if (upper->d_parent->d_inode != udir) goto out_unlock; - opaquedir = ovl_lookup_temp(workdir, dentry); + opaquedir = ovl_lookup_temp(workdir); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out_unlock; @@ -393,7 +393,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, if (err) goto out; - newdentry = ovl_lookup_temp(workdir, dentry); + newdentry = ovl_lookup_temp(workdir); err = PTR_ERR(newdentry); if (IS_ERR(newdentry)) goto out_unlock; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 505b18b56330..7c56932bfc2f 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -262,7 +262,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* dir.c */ extern const struct inode_operations ovl_dir_inode_operations; -struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); +struct dentry *ovl_lookup_temp(struct dentry *workdir); struct cattr { dev_t rdev; umode_t mode; From ee1d6d37b6b884383b501089be93ce94f2153028 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 11 May 2017 16:42:26 +0300 Subject: [PATCH 027/249] ovl: mark upper dir with type origin entries "impure" When moving a merge dir or non-dir with copy up origin into a non-merge upper dir (a.k.a pure upper dir), we are marking the target parent dir "impure". ovl_iterate() iterates pure upper dirs directly, because there is no need to filter out whiteouts and merge dir content with lower dir. But for the case of an "impure" upper dir, ovl_iterate() will not be able to iterate the real upper dir directly, because it will need to lookup the origin inode and use it to fill d_ino. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 45 ++++++++++++++++++++++++++++++++++++++++ fs/overlayfs/namei.c | 18 ++++++++++++++-- fs/overlayfs/overlayfs.h | 3 +++ fs/overlayfs/ovl_entry.h | 1 + fs/overlayfs/util.c | 14 +++++++++++++ 5 files changed, 79 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 369ee7c4cdc0..f2a118ba00e4 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -149,6 +149,22 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); } +static int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) +{ + int err; + + /* + * Do not fail when upper doesn't support xattrs. + * Upper inodes won't have origin nor redirect xattr anyway. + */ + err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, + "y", 1, 0); + if (!err) + ovl_dentry_set_impure(dentry); + + return err; +} + /* Common operations required to be done after creation of file on upper */ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) @@ -173,6 +189,11 @@ static bool ovl_type_merge(struct dentry *dentry) return OVL_TYPE_MERGE(ovl_path_type(dentry)); } +static bool ovl_type_origin(struct dentry *dentry) +{ + return OVL_TYPE_ORIGIN(ovl_path_type(dentry)); +} + static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct cattr *attr, struct dentry *hardlink) { @@ -952,6 +973,30 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, old_upperdir = ovl_dentry_upper(old->d_parent); new_upperdir = ovl_dentry_upper(new->d_parent); + if (!samedir) { + /* + * When moving a merge dir or non-dir with copy up origin into + * a non-merge upper dir (a.k.a pure upper dir), we are making + * the target parent dir "impure". ovl_iterate() iterates pure + * upper dirs directly, because there is no need to filter out + * whiteouts and merge dir content with lower dir. But for the + * case of an "impure" upper dir, ovl_iterate() cannot iterate + * the real directory directly, because it looks for the inode + * numbers to fill d_ino in the entries origin inode. + */ + if (ovl_type_origin(old) && !ovl_type_merge(new->d_parent)) { + err = ovl_set_impure(new->d_parent, new_upperdir); + if (err) + goto out_revert_creds; + } + if (!overwrite && ovl_type_origin(new) && + !ovl_type_merge(old->d_parent)) { + err = ovl_set_impure(old->d_parent, old_upperdir); + if (err) + goto out_revert_creds; + } + } + trap = lock_rename(new_upperdir, old_upperdir); olddentry = lookup_one_len(old->d_name.name, old_upperdir, diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index bad0f665a635..0c72a5909db2 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -167,7 +167,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, goto out; } -static bool ovl_is_opaquedir(struct dentry *dentry) +static bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) { int res; char val; @@ -175,13 +175,23 @@ static bool ovl_is_opaquedir(struct dentry *dentry) if (!d_is_dir(dentry)) return false; - res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); + res = vfs_getxattr(dentry, name, &val, 1); if (res == 1 && val == 'y') return true; return false; } +static bool ovl_is_opaquedir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); +} + +static bool ovl_is_impuredir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); +} + static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, const char *name, unsigned int namelen, size_t prelen, const char *post, @@ -351,6 +361,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int ctr = 0; struct inode *inode = NULL; bool upperopaque = false; + bool upperimpure = false; char *upperredirect = NULL; struct dentry *this; unsigned int i; @@ -395,6 +406,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, poe = roe; } upperopaque = d.opaque; + if (upperdentry && d.is_dir) + upperimpure = ovl_is_impuredir(upperdentry); } if (!d.stop && poe->numlower) { @@ -463,6 +476,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, revert_creds(old_cred); oe->opaque = upperopaque; + oe->impure = upperimpure; oe->redirect = upperredirect; oe->__upperdentry = upperdentry; memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 7c56932bfc2f..a9fb958fd5d4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -24,6 +24,7 @@ enum ovl_path_type { #define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque" #define OVL_XATTR_REDIRECT OVL_XATTR_PREFIX "redirect" #define OVL_XATTR_ORIGIN OVL_XATTR_PREFIX "origin" +#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure" /* * The tuple (fh,uuid) is a universal unique identifier for a copy up origin, @@ -203,8 +204,10 @@ struct dentry *ovl_dentry_real(struct dentry *dentry); struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); bool ovl_dentry_is_opaque(struct dentry *dentry); +bool ovl_dentry_is_impure(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); +void ovl_dentry_set_impure(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h index ad86c0a302eb..34bc4a9f5c61 100644 --- a/fs/overlayfs/ovl_entry.h +++ b/fs/overlayfs/ovl_entry.h @@ -43,6 +43,7 @@ struct ovl_entry { u64 version; const char *redirect; bool opaque; + bool impure; bool copying; }; struct rcu_head rcu; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 4d541a8d0834..e0dfb07d5457 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -175,6 +175,13 @@ bool ovl_dentry_is_opaque(struct dentry *dentry) return oe->opaque; } +bool ovl_dentry_is_impure(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->impure; +} + bool ovl_dentry_is_whiteout(struct dentry *dentry) { return !dentry->d_inode && ovl_dentry_is_opaque(dentry); @@ -187,6 +194,13 @@ void ovl_dentry_set_opaque(struct dentry *dentry) oe->opaque = true; } +void ovl_dentry_set_impure(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + oe->impure = true; +} + bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; From 46cd902f6d9a746f0da816cbdfb04b9baf6d7967 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Thu, 11 May 2017 18:07:42 +0800 Subject: [PATCH 028/249] drm/i915: set initialised only when init_context callback is NULL During execlist_context_deferred_alloc() we presumed that the context is uninitialised (we only just allocated the state object for it!) and chose to optimise away the later call to engine->init_context() if engine->init_context were NULL. This breaks with GVT's contexts that are marked as pre-initialised to avoid us annoyingly calling engine->init_context(). The fix is to not override ce->initialised if it is already true. Cc: Chris Wilson Signed-off-by: Chuanxiao Dong Link: http://patchwork.freedesktop.org/patch/msgid/1494497262-24855-1-git-send-email-chuanxiao.dong@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson (cherry picked from commit 0d402a24df8c8160727af934d83293f3d44d31a3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8f7c631fc1f..dac4e003c1f3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1989,7 +1989,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ce->ring = ring; ce->state = vma; - ce->initialised = engine->init_context == NULL; + ce->initialised |= engine->init_context == NULL; return 0; From 2dffdc0724004f38f5e39907747b53e4b0d80e59 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Tue, 16 May 2017 14:01:25 +0800 Subject: [PATCH 029/249] md-cluster: fix potential lock issue in add_new_disk The add_new_disk returns with communication locked if __sendmsg returns failure, fix it with call unlock_comm before return. Reported-by: Dan Carpenter CC: Goldwyn Rodrigues Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md-cluster.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 7299ce2f08a8..03082e17c65c 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1311,8 +1311,10 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev) cmsg.raid_slot = cpu_to_le32(rdev->desc_nr); lock_comm(cinfo, 1); ret = __sendmsg(cinfo, &cmsg); - if (ret) + if (ret) { + unlock_comm(cinfo); return ret; + } cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX); cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE; From 44d4182e23c555cbfa8b8a0ad2d94664d23850d3 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 1 May 2017 17:04:50 -0700 Subject: [PATCH 030/249] mtd: nand: don't leak buffers when ->scan_bbt() fails This bug seems to have been here forever, although we came close to fixing all of them in [1]! [1] 11eaf6df1cce ("mtd: nand: Remove BUG() abuse in nand_scan_tail") Signed-off-by: Brian Norris Acked-by: Ezequiel Garcia Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index d474378ed810..66782291a762 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -4842,7 +4842,11 @@ int nand_scan_tail(struct mtd_info *mtd) return 0; /* Build bad block table */ - return chip->scan_bbt(mtd); + ret = chip->scan_bbt(mtd); + if (ret) + goto err_free; + return 0; + err_free: if (nbuf) { kfree(nbuf->databuf); From 0545c1720277dd246bd682b23aee425f3830a14f Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 1 May 2017 17:04:52 -0700 Subject: [PATCH 031/249] mtd: nand: drop unneeded module.h include nand_ids isn't a separate module anymore and doesn't need this header. Signed-off-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_ids.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index 9d5ca0e540b5..92e2cf8e9ff9 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -6,7 +6,6 @@ * published by the Free Software Foundation. * */ -#include #include #include From 787710492911e21148975e1d1914c7409fb32c7e Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 1 May 2017 17:04:53 -0700 Subject: [PATCH 032/249] mtd: nand: free vendor-specific resources in init failure paths If we fail any time after calling nand_detect(), then we don't call the vendor-specific ->cleanup() callback, and we'll leak any resources the vendor-specific code might have allocated. Mark the "fix" against the first commit that started allocating anything in ->init(). Fixes: 626994e07480 ("mtd: nand: hynix: Add read-retry support for 1x nm MLC NANDs") Signed-off-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 38 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 66782291a762..81f77f9cd784 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -4361,7 +4361,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, /* Initialize the ->data_interface field. */ ret = nand_init_data_interface(chip); if (ret) - return ret; + goto err_nand_init; /* * Setup the data interface correctly on the chip and controller side. @@ -4373,7 +4373,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, */ ret = nand_setup_data_interface(chip); if (ret) - return ret; + goto err_nand_init; nand_maf_id = chip->id.data[0]; nand_dev_id = chip->id.data[1]; @@ -4404,6 +4404,12 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, mtd->size = i * chip->chipsize; return 0; + +err_nand_init: + /* Free manufacturer priv data. */ + nand_manufacturer_cleanup(chip); + + return ret; } EXPORT_SYMBOL(nand_scan_ident); @@ -4574,18 +4580,23 @@ int nand_scan_tail(struct mtd_info *mtd) /* New bad blocks should be marked in OOB, flash-based BBT, or both */ if (WARN_ON((chip->bbt_options & NAND_BBT_NO_OOB_BBM) && - !(chip->bbt_options & NAND_BBT_USE_FLASH))) - return -EINVAL; + !(chip->bbt_options & NAND_BBT_USE_FLASH))) { + ret = -EINVAL; + goto err_ident; + } if (invalid_ecc_page_accessors(chip)) { pr_err("Invalid ECC page accessors setup\n"); - return -EINVAL; + ret = -EINVAL; + goto err_ident; } if (!(chip->options & NAND_OWN_BUFFERS)) { nbuf = kzalloc(sizeof(*nbuf), GFP_KERNEL); - if (!nbuf) - return -ENOMEM; + if (!nbuf) { + ret = -ENOMEM; + goto err_ident; + } nbuf->ecccalc = kmalloc(mtd->oobsize, GFP_KERNEL); if (!nbuf->ecccalc) { @@ -4608,8 +4619,10 @@ int nand_scan_tail(struct mtd_info *mtd) chip->buffers = nbuf; } else { - if (!chip->buffers) - return -ENOMEM; + if (!chip->buffers) { + ret = -ENOMEM; + goto err_ident; + } } /* Set the internal oob buffer location, just after the page data */ @@ -4854,6 +4867,13 @@ int nand_scan_tail(struct mtd_info *mtd) kfree(nbuf->ecccalc); kfree(nbuf); } + +err_ident: + /* Clean up nand_scan_ident(). */ + + /* Free manufacturer priv data. */ + nand_manufacturer_cleanup(chip); + return ret; } EXPORT_SYMBOL(nand_scan_tail); From d24197907454b902908e025bce4b8bc677d3ba6b Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 1 May 2017 17:04:55 -0700 Subject: [PATCH 033/249] mtd: nand: samsung: warn about un-parseable ECC info We don't handle cases larger than 7. We probably shouldn't pretend we know the ECC step size in this case, and it's probably also good to WARN() like we do in many other similar cases. Fixes: 8fc82d456e40 ("mtd: nand: samsung: Retrieve ECC requirements from extended ID") Signed-off-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_samsung.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mtd/nand/nand_samsung.c b/drivers/mtd/nand/nand_samsung.c index 9cfc4035a420..1e0755997762 100644 --- a/drivers/mtd/nand/nand_samsung.c +++ b/drivers/mtd/nand/nand_samsung.c @@ -84,6 +84,9 @@ static void samsung_nand_decode_id(struct nand_chip *chip) case 7: chip->ecc_strength_ds = 60; break; + default: + WARN(1, "Could not decode ECC info"); + chip->ecc_step_ds = 0; } } } else { From 2761b4f12b017f6d3e5add386733a700a490df47 Mon Sep 17 00:00:00 2001 From: Andres Galacho Date: Mon, 1 May 2017 16:30:15 -0400 Subject: [PATCH 034/249] mtd: nand: tango: Export OF device ID table as module aliases The device table is required to load modules based on modaliases. After adding MODULE_DEVICE_TABLE, below entries for example will be added to module.alias: alias: of:N*T*Csigma,smp8758-nandC* alias: of:N*T*Csigma,smp8758-nand Fixes: 6956e2385a16 ("mtd: nand: add tango NAND flash controller support") Cc: stable@vger.kernel.org Signed-off-by: Andres Galacho Acked-by: Brian Norris Signed-off-by: Boris Brezillon --- drivers/mtd/nand/tango_nand.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c index 05b6e1065203..82fea9b4c358 100644 --- a/drivers/mtd/nand/tango_nand.c +++ b/drivers/mtd/nand/tango_nand.c @@ -663,6 +663,7 @@ static const struct of_device_id tango_nand_ids[] = { { .compatible = "sigma,smp8758-nand" }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, tango_nand_ids); static struct platform_driver tango_nand_driver = { .probe = tango_nand_probe, From 60cf0ce14b09b54e7ee79dc3ef498de6ef0e41e9 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Fri, 12 May 2017 17:34:01 +0200 Subject: [PATCH 035/249] mtd: nand: tango: Update ecc_stats.corrected According to Boris, some user-space tools expect MTD drivers to update ecc_stats.corrected, and it's better to provide a lower bound than to provide no information at all. Fixes: 6956e2385a16 ("mtd: nand: add tango NAND flash controller support") Cc: stable@vger.kernel.org Reported-by: Pavel Machek Signed-off-by: Marc Gonzalez Signed-off-by: Boris Brezillon --- drivers/mtd/nand/tango_nand.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/mtd/nand/tango_nand.c b/drivers/mtd/nand/tango_nand.c index 82fea9b4c358..49b286c6c10f 100644 --- a/drivers/mtd/nand/tango_nand.c +++ b/drivers/mtd/nand/tango_nand.c @@ -55,10 +55,10 @@ * byte 1 for other packets in the page (PKT_N, for N > 0) * ERR_COUNT_PKT_N is the max error count over all but the first packet. */ -#define DECODE_OK_PKT_0(v) ((v) & BIT(7)) -#define DECODE_OK_PKT_N(v) ((v) & BIT(15)) #define ERR_COUNT_PKT_0(v) (((v) >> 0) & 0x3f) #define ERR_COUNT_PKT_N(v) (((v) >> 8) & 0x3f) +#define DECODE_FAIL_PKT_0(v) (((v) & BIT(7)) == 0) +#define DECODE_FAIL_PKT_N(v) (((v) & BIT(15)) == 0) /* Offsets relative to pbus_base */ #define PBUS_CS_CTRL 0x83c @@ -193,6 +193,8 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf) chip->ecc.strength); if (res < 0) mtd->ecc_stats.failed++; + else + mtd->ecc_stats.corrected += res; bitflips = max(res, bitflips); buf += pkt_size; @@ -202,9 +204,11 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf) return bitflips; } -static int decode_error_report(struct tango_nfc *nfc) +static int decode_error_report(struct nand_chip *chip) { u32 status, res; + struct mtd_info *mtd = nand_to_mtd(chip); + struct tango_nfc *nfc = to_tango_nfc(chip->controller); status = readl_relaxed(nfc->reg_base + NFC_XFER_STATUS); if (status & PAGE_IS_EMPTY) @@ -212,10 +216,14 @@ static int decode_error_report(struct tango_nfc *nfc) res = readl_relaxed(nfc->mem_base + ERROR_REPORT); - if (DECODE_OK_PKT_0(res) && DECODE_OK_PKT_N(res)) - return max(ERR_COUNT_PKT_0(res), ERR_COUNT_PKT_N(res)); + if (DECODE_FAIL_PKT_0(res) || DECODE_FAIL_PKT_N(res)) + return -EBADMSG; - return -EBADMSG; + /* ERR_COUNT_PKT_N is max, not sum, but that's all we have */ + mtd->ecc_stats.corrected += + ERR_COUNT_PKT_0(res) + ERR_COUNT_PKT_N(res); + + return max(ERR_COUNT_PKT_0(res), ERR_COUNT_PKT_N(res)); } static void tango_dma_callback(void *arg) @@ -282,7 +290,7 @@ static int tango_read_page(struct mtd_info *mtd, struct nand_chip *chip, if (err) return err; - res = decode_error_report(nfc); + res = decode_error_report(chip); if (res < 0) { chip->ecc.read_oob_raw(mtd, chip, page); res = check_erased_page(chip, buf); From d4ed3b9015b5eebc90d629579d9e7944607cbae5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 4 May 2017 13:11:00 +0100 Subject: [PATCH 036/249] mtd: nand: make nand_ooblayout_lp_hamming_ops static nand_ooblayout_lp_hamming_ops can be made static as it does not need to be in global scope. Signed-off-by: Colin Ian King Acked-by: Boris Brezillon Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 81f77f9cd784..b1dd12729f19 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -202,7 +202,7 @@ static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section, return 0; } -const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = { +static const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = { .ecc = nand_ooblayout_ecc_lp_hamming, .free = nand_ooblayout_free_lp_hamming, }; From a9de080bbcd5c4e213a3d7bbb1e314d60980e943 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 25 Apr 2017 06:22:05 +0000 Subject: [PATCH 037/249] pinctrl: cherryview: Add terminate entry for dmi_system_id tables Make sure dmi_system_id tables are NULL terminated. Fixes: 703650278372 ("pinctrl: cherryview: Add a quirk to make Acer Chromebook keyboard work again") Signed-off-by: Wei Yongjun Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 2debba62fac9..e35d0fe4c737 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1547,7 +1547,8 @@ static const struct dmi_system_id chv_no_valid_mask[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"), DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"), }, - } + }, + {} }; static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) From b4d2ea2af95cb77e2f320e24da526280d4aa2f6b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 8 May 2017 10:48:21 +0200 Subject: [PATCH 038/249] Revert "pinctrl: generic: Add bi-directional and output-enable" This reverts commit 8c58f1a7a4b6d1d723bf25fef9d842d5a11200d0. It turns out that applying these generic properties was premature: the properties used in the driver using this are of unclear electrical nature and the subject need to be discussed. Signed-off-by: Linus Walleij --- Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt | 2 -- drivers/pinctrl/pinconf-generic.c | 3 --- include/linux/pinctrl/pinconf-generic.h | 3 --- 3 files changed, 8 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt index 71a3c134af1b..f01d154090da 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt @@ -247,7 +247,6 @@ bias-bus-hold - latch weakly bias-pull-up - pull up the pin bias-pull-down - pull down the pin bias-pull-pin-default - use pin-default pull state -bi-directional - pin supports simultaneous input/output operations drive-push-pull - drive actively high and low drive-open-drain - drive with open drain drive-open-source - drive with open source @@ -260,7 +259,6 @@ input-debounce - debounce mode with debound time X power-source - select between different power supplies low-power-enable - enable low power mode low-power-disable - disable low power mode -output-enable - enable output on pin regardless of output value output-low - set the pin to output mode with low level output-high - set the pin to output mode with high level slew-rate - set the slew rate diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index 0d6b7f4b82af..720a19fd38d2 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -35,7 +35,6 @@ static const struct pin_config_item conf_items[] = { PCONFDUMP(PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, "input bias pull to pin specific state", NULL, false), PCONFDUMP(PIN_CONFIG_BIAS_PULL_UP, "input bias pull up", NULL, false), - PCONFDUMP(PIN_CONFIG_BIDIRECTIONAL, "bi-directional pin operations", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_DRAIN, "output drive open drain", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_OPEN_SOURCE, "output drive open source", NULL, false), PCONFDUMP(PIN_CONFIG_DRIVE_PUSH_PULL, "output drive push pull", NULL, false), @@ -161,7 +160,6 @@ static const struct pinconf_generic_params dt_params[] = { { "bias-pull-up", PIN_CONFIG_BIAS_PULL_UP, 1 }, { "bias-pull-pin-default", PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, 1 }, { "bias-pull-down", PIN_CONFIG_BIAS_PULL_DOWN, 1 }, - { "bi-directional", PIN_CONFIG_BIDIRECTIONAL, 1 }, { "drive-open-drain", PIN_CONFIG_DRIVE_OPEN_DRAIN, 0 }, { "drive-open-source", PIN_CONFIG_DRIVE_OPEN_SOURCE, 0 }, { "drive-push-pull", PIN_CONFIG_DRIVE_PUSH_PULL, 0 }, @@ -174,7 +172,6 @@ static const struct pinconf_generic_params dt_params[] = { { "input-schmitt-enable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 1 }, { "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 }, { "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 }, - { "output-enable", PIN_CONFIG_OUTPUT, 1, }, { "output-high", PIN_CONFIG_OUTPUT, 1, }, { "output-low", PIN_CONFIG_OUTPUT, 0, }, { "power-source", PIN_CONFIG_POWER_SOURCE, 0 }, diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 279e3c5326e3..7620eb127cff 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -42,8 +42,6 @@ * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high * impedance to VDD). If the argument is != 0 pull-up is enabled, * if it is 0, pull-up is total, i.e. the pin is connected to VDD. - * @PIN_CONFIG_BIDIRECTIONAL: the pin will be configured to allow simultaneous - * input and output operations. * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open * collector) which means it is usually wired with other output ports * which are then pulled up with an external resistor. Setting this @@ -98,7 +96,6 @@ enum pin_config_param { PIN_CONFIG_BIAS_PULL_DOWN, PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, PIN_CONFIG_BIAS_PULL_UP, - PIN_CONFIG_BIDIRECTIONAL, PIN_CONFIG_DRIVE_OPEN_DRAIN, PIN_CONFIG_DRIVE_OPEN_SOURCE, PIN_CONFIG_DRIVE_PUSH_PULL, From 020e0b1c8f19f1fc3bce23beeccd80c574ca0e49 Mon Sep 17 00:00:00 2001 From: Anatolij Gustschin Date: Thu, 11 May 2017 20:24:31 +0200 Subject: [PATCH 039/249] gpiolib: Add stubs for gpiod lookup table interface Add stubs for gpiod_add_lookup_table() and gpiod_remove_lookup_table() for the !GPIOLIB case to prevent build errors. Signed-off-by: Anatolij Gustschin Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- include/linux/gpio/machine.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index c0d712d22b07..f738d50cc17d 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -56,7 +56,14 @@ struct gpiod_lookup_table { .flags = _flags, \ } +#ifdef CONFIG_GPIOLIB void gpiod_add_lookup_table(struct gpiod_lookup_table *table); void gpiod_remove_lookup_table(struct gpiod_lookup_table *table); +#else +static inline +void gpiod_add_lookup_table(struct gpiod_lookup_table *table) {} +static inline +void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) {} +#endif #endif /* __LINUX_GPIO_MACHINE_H */ From 76dd1fbebbaebab294dc09230960238746b883b1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 15 May 2017 09:31:41 +0200 Subject: [PATCH 040/249] HID: asus: Add support for T100 keyboard The keyboard dock used with the Asus Transformer T100 series, uses the same vendor-defined 0xff31 usage-page as some other Asus keyboards. But with a small twist, it has a small descriptor bug which needs to be fixed up for things to work. This commit adds the USB-ID for this keyboard to the hid-asus driver and makes asus_report_fixup fix the descriptor issue, fixing various special function keys on this keyboard not working. Signed-off-by: Hans de Goede Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 10 ++++++++++ drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + 3 files changed, 12 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 16df6cc90235..101ab2e63d18 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -69,6 +69,7 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad"); #define QUIRK_IS_MULTITOUCH BIT(3) #define QUIRK_NO_CONSUMER_USAGES BIT(4) #define QUIRK_USE_KBD_BACKLIGHT BIT(5) +#define QUIRK_T100_KEYBOARD BIT(6) #define I2C_KEYBOARD_QUIRKS (QUIRK_FIX_NOTEBOOK_REPORT | \ QUIRK_NO_INIT_REPORTS | \ @@ -548,6 +549,12 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, hid_info(hdev, "Fixing up Asus notebook report descriptor\n"); rdesc[55] = 0xdd; } + if (drvdata->quirks & QUIRK_T100_KEYBOARD && + *rsize == 76 && rdesc[73] == 0x81 && rdesc[74] == 0x01) { + hid_info(hdev, "Fixing up Asus T100 keyb report descriptor\n"); + rdesc[74] &= ~HID_MAIN_ITEM_CONSTANT; + } + return rdesc; } @@ -560,6 +567,9 @@ static const struct hid_device_id asus_devices[] = { USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2), QUIRK_USE_KBD_BACKLIGHT }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD), + QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES }, { } }; MODULE_DEVICE_TABLE(hid, asus_devices); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 38d041510e1d..04cee65531d7 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1855,6 +1855,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_I2C_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1) }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2) }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD) }, { HID_USB_DEVICE(USB_VENDOR_ID_AUREAL, USB_DEVICE_ID_AUREAL_W01RN) }, { HID_USB_DEVICE(USB_VENDOR_ID_BELKIN, USB_DEVICE_ID_FLIP_KVM) }, { HID_USB_DEVICE(USB_VENDOR_ID_BETOP_2185BFM, 0x2208) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 8e8a1baee090..8ca1e8ce0af2 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -173,6 +173,7 @@ #define USB_VENDOR_ID_ASUSTEK 0x0b05 #define USB_DEVICE_ID_ASUSTEK_LCM 0x1726 #define USB_DEVICE_ID_ASUSTEK_LCM2 0x175b +#define USB_DEVICE_ID_ASUSTEK_T100_KEYBOARD 0x17e0 #define USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD 0x8585 #define USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD 0x0101 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1 0x1854 From 664b7c4728821767e0228ee161bab87db2be58f1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 12 May 2017 08:47:57 -0700 Subject: [PATCH 041/249] pinctrl: core: Fix warning by removing bogus code Andre Przywara noticed that we can get the following warning with -EPROBE_DEFER: "WARNING: CPU: 1 PID: 89 at drivers/base/dd.c:349 driver_probe_device+0x2ac/0x2e8" Let's fix the issue by removing the indices as suggested by Tejun Heo . All we have to do here is kill the radix tree. I probably ended up with the indices after grepping for removal of all entries using radix_tree_for_each_slot() and the first match found was gmap_radix_tree_free(). Anyways, no need for indices here, and we can just do remove all the entries using radix_tree_for_each_slot() along how the item_kill_tree() test case does. Fixes: c7059c5ac70a ("pinctrl: core: Add generic pinctrl functions for managing groups") Fixes: a76edc89b100 ("pinctrl: core: Add generic pinctrl functions for managing groups") Reported-by: Andre Przywara Signed-off-by: Tony Lindgren Reviewed-by: Andre Przywara Tested-by: Andre Przywara Signed-off-by: Linus Walleij --- drivers/pinctrl/core.c | 20 +++----------------- drivers/pinctrl/pinmux.c | 21 ++++----------------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 1653cbda6a82..bd459a93b0e7 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -680,30 +680,16 @@ EXPORT_SYMBOL_GPL(pinctrl_generic_remove_group); * pinctrl_generic_free_groups() - removes all pin groups * @pctldev: pin controller device * - * Note that the caller must take care of locking. + * Note that the caller must take care of locking. The pinctrl groups + * are allocated with devm_kzalloc() so no need to free them here. */ static void pinctrl_generic_free_groups(struct pinctrl_dev *pctldev) { struct radix_tree_iter iter; - struct group_desc *group; - unsigned long *indices; void **slot; - int i = 0; - - indices = devm_kzalloc(pctldev->dev, sizeof(*indices) * - pctldev->num_groups, GFP_KERNEL); - if (!indices) - return; radix_tree_for_each_slot(slot, &pctldev->pin_group_tree, &iter, 0) - indices[i++] = iter.index; - - for (i = 0; i < pctldev->num_groups; i++) { - group = radix_tree_lookup(&pctldev->pin_group_tree, - indices[i]); - radix_tree_delete(&pctldev->pin_group_tree, indices[i]); - devm_kfree(pctldev->dev, group); - } + radix_tree_delete(&pctldev->pin_group_tree, iter.index); pctldev->num_groups = 0; } diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 9fd6d9087dc5..16b3ae5e4f44 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -826,30 +826,17 @@ EXPORT_SYMBOL_GPL(pinmux_generic_remove_function); * pinmux_generic_free_functions() - removes all functions * @pctldev: pin controller device * - * Note that the caller must take care of locking. + * Note that the caller must take care of locking. The pinctrl + * functions are allocated with devm_kzalloc() so no need to free + * them here. */ void pinmux_generic_free_functions(struct pinctrl_dev *pctldev) { struct radix_tree_iter iter; - struct function_desc *function; - unsigned long *indices; void **slot; - int i = 0; - - indices = devm_kzalloc(pctldev->dev, sizeof(*indices) * - pctldev->num_functions, GFP_KERNEL); - if (!indices) - return; radix_tree_for_each_slot(slot, &pctldev->pin_function_tree, &iter, 0) - indices[i++] = iter.index; - - for (i = 0; i < pctldev->num_functions; i++) { - function = radix_tree_lookup(&pctldev->pin_function_tree, - indices[i]); - radix_tree_delete(&pctldev->pin_function_tree, indices[i]); - devm_kfree(pctldev->dev, function); - } + radix_tree_delete(&pctldev->pin_function_tree, iter.index); pctldev->num_functions = 0; } From f52236e0b0a0820e938e16a776309e76b7bd6c43 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Thu, 18 May 2017 13:47:25 +0300 Subject: [PATCH 042/249] dm verity: fix no salt use case DM-Verity has an (undocumented) mode where no salt is used. This was never handled directly by the DM-Verity code, instead working due to the fact that calling crypto_shash_update() with a zero length data is an implicit noop. This is no longer the case now that we have switched to crypto_ahash_update(). Fix the issue by introducing explicit handling of the no salt use case to DM-Verity. Signed-off-by: Gilad Ben-Yossef Reported-by: Marian Csontos Fixes: d1ac3ff ("dm verity: switch to using asynchronous hash crypto API") Tested-by: Milan Broz Signed-off-by: Mike Snitzer --- drivers/md/dm-verity-target.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 97de961a3bfc..1ec9b2c51c07 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -166,7 +166,7 @@ static int verity_hash_init(struct dm_verity *v, struct ahash_request *req, return r; } - if (likely(v->version >= 1)) + if (likely(v->salt_size && (v->version >= 1))) r = verity_hash_update(v, req, v->salt, v->salt_size, res); return r; @@ -177,7 +177,7 @@ static int verity_hash_final(struct dm_verity *v, struct ahash_request *req, { int r; - if (unlikely(!v->version)) { + if (unlikely(v->salt_size && (!v->version))) { r = verity_hash_update(v, req, v->salt, v->salt_size, res); if (r < 0) { From 702a6204f804bad946c455e7cd8d50d79c9d1629 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 20 May 2017 14:56:21 -0400 Subject: [PATCH 043/249] dm integrity: use kvmalloc() instead of dm_integrity_kvmalloc() Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c7f7c8d76576..1feeb2ccf5a1 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2374,21 +2374,6 @@ static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) blk_queue_max_integrity_segments(disk->queue, UINT_MAX); } -/* FIXME: use new kvmalloc */ -static void *dm_integrity_kvmalloc(size_t size, gfp_t gfp) -{ - void *ptr = NULL; - - if (size <= PAGE_SIZE) - ptr = kmalloc(size, GFP_KERNEL | gfp); - if (!ptr && size <= KMALLOC_MAX_SIZE) - ptr = kmalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | gfp); - if (!ptr) - ptr = __vmalloc(size, GFP_KERNEL | gfp, PAGE_KERNEL); - - return ptr; -} - static void dm_integrity_free_page_list(struct dm_integrity_c *ic, struct page_list *pl) { unsigned i; @@ -2407,7 +2392,7 @@ static struct page_list *dm_integrity_alloc_page_list(struct dm_integrity_c *ic) struct page_list *pl; unsigned i; - pl = dm_integrity_kvmalloc(page_list_desc_size, __GFP_ZERO); + pl = kvmalloc(page_list_desc_size, GFP_KERNEL | __GFP_ZERO); if (!pl) return NULL; @@ -2437,7 +2422,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int struct scatterlist **sl; unsigned i; - sl = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), __GFP_ZERO); + sl = kvmalloc(ic->journal_sections * sizeof(struct scatterlist *), GFP_KERNEL | __GFP_ZERO); if (!sl) return NULL; @@ -2453,7 +2438,7 @@ static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_int n_pages = (end_index - start_index + 1); - s = dm_integrity_kvmalloc(n_pages * sizeof(struct scatterlist), 0); + s = kvmalloc(n_pages * sizeof(struct scatterlist), GFP_KERNEL); if (!s) { dm_integrity_free_journal_scatterlist(ic, sl); return NULL; @@ -2617,7 +2602,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) goto bad; } - sg = dm_integrity_kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), 0); + sg = kvmalloc((ic->journal_pages + 1) * sizeof(struct scatterlist), GFP_KERNEL); if (!sg) { *error = "Unable to allocate sg list"; r = -ENOMEM; @@ -2673,7 +2658,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) r = -ENOMEM; goto bad; } - ic->sk_requests = dm_integrity_kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), __GFP_ZERO); + ic->sk_requests = kvmalloc(ic->journal_sections * sizeof(struct skcipher_request *), GFP_KERNEL | __GFP_ZERO); if (!ic->sk_requests) { *error = "Unable to allocate sk requests"; r = -ENOMEM; @@ -2740,7 +2725,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error) r = -ENOMEM; goto bad; } - ic->journal_tree = dm_integrity_kvmalloc(journal_tree_size, 0); + ic->journal_tree = kvmalloc(journal_tree_size, GFP_KERNEL); if (!ic->journal_tree) { *error = "Could not allocate memory for journal tree"; r = -ENOMEM; From 8c1e2162f27b319da913683143c0c6c09b083ebb Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Thu, 18 May 2017 12:00:51 -0700 Subject: [PATCH 044/249] dm ioctl: restore __GFP_HIGH in copy_params() Commit d224e9381897 ("drivers/md/dm-ioctl.c: use kvmalloc rather than opencoded variant") left out the __GFP_HIGH flag when converting from __vmalloc to kvmalloc. This can cause the DM ioctl to fail in some low memory situations where it wouldn't have failed earlier. Add __GFP_HIGH back to avoid any potential regression. Fixes: d224e9381897 ("drivers/md/dm-ioctl.c: use kvmalloc rather than opencoded variant") Signed-off-by: Junaid Shahid Signed-off-by: Mikulas Patocka Acked-by: David Rientjes Signed-off-by: Mike Snitzer --- drivers/md/dm-ioctl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 0555b4410e05..41852ae287a5 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1710,12 +1710,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern } /* - * Try to avoid low memory issues when a device is suspended. + * Use __GFP_HIGH to avoid low memory issues when a device is + * suspended and the ioctl is needed to resume it. * Use kmalloc() rather than vmalloc() when we can. */ dmi = NULL; noio_flag = memalloc_noio_save(); - dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL); + dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL | __GFP_HIGH); memalloc_noio_restore(noio_flag); if (!dmi) { From 1999f108c983a7287a847b09e29dac25b9301dee Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Wed, 17 May 2017 15:49:01 +0800 Subject: [PATCH 045/249] drm/i915/gvt: Disable compression workaround for Gen9 With enabling this workaround, can observe GPU hang issue on Gen9. As currently host side doesn't have this workaround, disable it from GVT side. v2: - Fix indent error.(Zhenyu) Cc: Zhenyu Wang Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 30 ++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index c995e540ff96..0ffd69654592 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1366,18 +1366,28 @@ static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - i915_reg_t reg = {.reg = offset}; + u32 v = *(u32 *)p_data; + + if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) + return intel_vgpu_default_mmio_write(vgpu, + offset, p_data, bytes); switch (offset) { case 0x4ddc: - vgpu_vreg(vgpu, offset) = 0x8000003c; - /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl */ - I915_WRITE(reg, vgpu_vreg(vgpu, offset)); + /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ + vgpu_vreg(vgpu, offset) = v & ~(1 << 31); break; case 0x42080: - vgpu_vreg(vgpu, offset) = 0x8000; - /* WaCompressedResourceDisplayNewHashMode:skl */ - I915_WRITE(reg, vgpu_vreg(vgpu, offset)); + /* bypass WaCompressedResourceDisplayNewHashMode */ + vgpu_vreg(vgpu, offset) = v & ~(1 << 15); + break; + case 0xe194: + /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ + vgpu_vreg(vgpu, offset) = v & ~(1 << 8); + break; + case 0x7014: + /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ + vgpu_vreg(vgpu, offset) = v & ~(1 << 13); break; default: return -EINVAL; @@ -1634,7 +1644,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, + skl_misc_ctl_write); MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2568,7 +2579,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x6e570, D_BDW_PLUS); MMIO_D(0x65f10, D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, + skl_misc_ctl_write); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); From c61872c9833d17d3807fb999096917c1f9eaada0 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 17 May 2017 13:25:12 +0300 Subject: [PATCH 046/249] firmware: dmi: Add DMI_PRODUCT_FAMILY identification string Sometimes it is more convenient to be able to match a whole family of products, like in case of bunch of Chromebooks based on Intel_Strago to apply a driver quirk instead of quirking each machine one-by-one. This adds support for DMI_PRODUCT_FAMILY identification string and also exports it to the userspace through sysfs attribute just like the existing ones. Suggested-by: Dmitry Torokhov Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/firmware/dmi-id.c | 2 ++ drivers/firmware/dmi_scan.c | 1 + include/linux/mod_devicetable.h | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c index 44c01390d035..dc269cb288c2 100644 --- a/drivers/firmware/dmi-id.c +++ b/drivers/firmware/dmi-id.c @@ -47,6 +47,7 @@ DEFINE_DMI_ATTR_WITH_SHOW(product_name, 0444, DMI_PRODUCT_NAME); DEFINE_DMI_ATTR_WITH_SHOW(product_version, 0444, DMI_PRODUCT_VERSION); DEFINE_DMI_ATTR_WITH_SHOW(product_serial, 0400, DMI_PRODUCT_SERIAL); DEFINE_DMI_ATTR_WITH_SHOW(product_uuid, 0400, DMI_PRODUCT_UUID); +DEFINE_DMI_ATTR_WITH_SHOW(product_family, 0400, DMI_PRODUCT_FAMILY); DEFINE_DMI_ATTR_WITH_SHOW(board_vendor, 0444, DMI_BOARD_VENDOR); DEFINE_DMI_ATTR_WITH_SHOW(board_name, 0444, DMI_BOARD_NAME); DEFINE_DMI_ATTR_WITH_SHOW(board_version, 0444, DMI_BOARD_VERSION); @@ -191,6 +192,7 @@ static void __init dmi_id_init_attr_table(void) ADD_DMI_ATTR(product_version, DMI_PRODUCT_VERSION); ADD_DMI_ATTR(product_serial, DMI_PRODUCT_SERIAL); ADD_DMI_ATTR(product_uuid, DMI_PRODUCT_UUID); + ADD_DMI_ATTR(product_family, DMI_PRODUCT_FAMILY); ADD_DMI_ATTR(board_vendor, DMI_BOARD_VENDOR); ADD_DMI_ATTR(board_name, DMI_BOARD_NAME); ADD_DMI_ATTR(board_version, DMI_BOARD_VERSION); diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 54be60ead08f..93f7acdaac7a 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -430,6 +430,7 @@ static void __init dmi_decode(const struct dmi_header *dm, void *dummy) dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); dmi_save_uuid(dm, DMI_PRODUCT_UUID, 8); + dmi_save_ident(dm, DMI_PRODUCT_FAMILY, 26); break; case 2: /* Base Board Information */ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 566fda587fcf..3f74ef2281e8 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -467,6 +467,7 @@ enum dmi_field { DMI_PRODUCT_VERSION, DMI_PRODUCT_SERIAL, DMI_PRODUCT_UUID, + DMI_PRODUCT_FAMILY, DMI_BOARD_VENDOR, DMI_BOARD_NAME, DMI_BOARD_VERSION, From 2a8209fa68236ad65363dba03db5dbced520268a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 17 May 2017 13:25:14 +0300 Subject: [PATCH 047/249] pinctrl: cherryview: Extend the Chromebook DMI quirk to Intel_Strago systems It turns out there are quite many Chromebooks out there that have the same keyboard issue than Acer Chromebook. All of them are based on Intel_Strago reference and report their DMI_PRODUCT_FAMILY as "Intel_Strago" (Samsung Chromebook 3 and Cyan Chromebooks are exceptions for which we add separate entries). Instead of adding each machine to the quirk table, we use DMI_PRODUCT_FAMILY of "Intel_Strago" that hopefully covers most of the machines out there currently. Link: https://bugzilla.kernel.org/show_bug.cgi?id=194945 Suggested: Dmitry Torokhov Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index e35d0fe4c737..20f1b4493994 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1539,13 +1539,26 @@ static void chv_gpio_irq_handler(struct irq_desc *desc) * is not listed below. */ static const struct dmi_system_id chv_no_valid_mask[] = { + /* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */ { - /* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */ - .ident = "Acer Chromebook (CYAN)", + .ident = "Intel_Strago based Chromebooks (All models)", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), - DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"), - DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"), + }, + }, + { + .ident = "Acer Chromebook R11 (Cyan)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"), + }, + }, + { + .ident = "Samsung Chromebook 3 (Celes)", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Celes"), }, }, {} From da6c2addf66d7ff7d0b090d6267d4292f951e4e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 18 May 2017 11:23:55 +0200 Subject: [PATCH 048/249] pinctrl: mxs: atomically switch mux and drive strength config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To set the mux mode of a pin two bits must be set. Up to now this is implemented using the following idiom: writel(mask, reg + CLR); writel(value, reg + SET); . This however results in the mux mode being 0 between the two writes. On my machine there is an IC's reset pin connected to LCD_D20. The bootloader configures this pin as GPIO output-high (i.e. not holding the IC in reset). When Linux reconfigures the pin to GPIO the short time LCD_D20 is muxed as LCD_D20 instead of GPIO_1_20 is enough to confuse the connected IC. The same problem is present for the pin's drive strength setting which is reset to low drive strength before using the right value. So instead of relying on the hardware to modify the register setting using two writes implement the bit toggling using read-modify-write. Fixes: 17723111e64f ("pinctrl: add pinctrl-mxs support") Signed-off-by: Uwe Kleine-König Acked-by: Shawn Guo Signed-off-by: Linus Walleij --- drivers/pinctrl/freescale/pinctrl-mxs.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/freescale/pinctrl-mxs.c b/drivers/pinctrl/freescale/pinctrl-mxs.c index 41b5b07d5a2b..6852010a6d70 100644 --- a/drivers/pinctrl/freescale/pinctrl-mxs.c +++ b/drivers/pinctrl/freescale/pinctrl-mxs.c @@ -194,6 +194,16 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev, return 0; } +static void mxs_pinctrl_rmwl(u32 value, u32 mask, u8 shift, void __iomem *reg) +{ + u32 tmp; + + tmp = readl(reg); + tmp &= ~(mask << shift); + tmp |= value << shift; + writel(tmp, reg); +} + static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector, unsigned group) { @@ -211,8 +221,7 @@ static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector, reg += bank * 0x20 + pin / 16 * 0x10; shift = pin % 16 * 2; - writel(0x3 << shift, reg + CLR); - writel(g->muxsel[i] << shift, reg + SET); + mxs_pinctrl_rmwl(g->muxsel[i], 0x3, shift, reg); } return 0; @@ -279,8 +288,7 @@ static int mxs_pinconf_group_set(struct pinctrl_dev *pctldev, /* mA */ if (config & MA_PRESENT) { shift = pin % 8 * 4; - writel(0x3 << shift, reg + CLR); - writel(ma << shift, reg + SET); + mxs_pinctrl_rmwl(ma, 0x3, shift, reg); } /* vol */ From 7903d4f5e1dec53963cba9b1bc472a76a3532e07 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Mon, 22 May 2017 14:25:49 +0800 Subject: [PATCH 049/249] pinctrl: sunxi: Fix SPDIF function name for A83T We use well known standard names for functions that have name, such as I2C, SPI, SPDIF, etc.. Fix the function name of SPDIF, which was named OWA (One Wire Audio) based on Allwinner datasheets. Fixes: 4730f33f0d82 ("pinctrl: sunxi: add allwinner A83T PIO controller support") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c index 9aec1d2232dd..6624499eae72 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a83t.c @@ -394,7 +394,7 @@ static const struct sunxi_desc_pin sun8i_a83t_pins[] = { SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 18), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x3, "owa")), /* DOUT */ + SUNXI_FUNCTION(0x3, "spdif")), /* DOUT */ SUNXI_PIN(SUNXI_PINCTRL_PIN(E, 19), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out")), From 9a307403d374b993061f5992a6e260c944920d0b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 23 May 2017 12:24:40 -0400 Subject: [PATCH 050/249] nfsd4: fix null dereference on replay if we receive a compound such that: - the sessionid, slot, and sequence number in the SEQUENCE op match a cached succesful reply with N ops, and - the Nth operation of the compound is a PUTFH, PUTPUBFH, PUTROOTFH, or RESTOREFH, then nfsd4_sequence will return 0 and set cstate->status to nfserr_replay_cache. The current filehandle will not be set. This will cause us to call check_nfsd_access with first argument NULL. To nfsd4_compound it looks like we just succesfully executed an operation that set a filehandle, but the current filehandle is not set. Fix this by moving the nfserr_replay_cache earlier. There was never any reason to have it after the encode_op label, since the only case where he hit that is when opdesc->op_func sets it. Note that there are two ways we could hit this case: - a client is resending a previously sent compound that ended with one of the four PUTFH-like operations, or - a client is sending a *new* compound that (incorrectly) shares sessionid, slot, and sequence number with a previously sent compound, and the length of the previously sent compound happens to match the position of a PUTFH-like operation in the new compound. The second is obviously incorrect client behavior. The first is also very strange--the only purpose of a PUTFH-like operation is to set the current filehandle to be used by the following operation, so there's no point in having it as the last in a compound. So it's likely this requires a buggy or malicious client to reproduce. Reported-by: Scott Mayhew Cc: stable@kernel.vger.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c453a1998e00..dadb3bf305b2 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1769,6 +1769,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, opdesc->op_get_currentstateid(cstate, &op->u); op->status = opdesc->op_func(rqstp, cstate, &op->u); + /* Only from SEQUENCE */ + if (cstate->status == nfserr_replay_cache) { + dprintk("%s NFS4.1 replay from cache\n", __func__); + status = op->status; + goto out; + } if (!op->status) { if (opdesc->op_set_currentstateid) opdesc->op_set_currentstateid(cstate, &op->u); @@ -1779,14 +1785,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (need_wrongsec_check(rqstp)) op->status = check_nfsd_access(current_fh->fh_export, rqstp); } - encode_op: - /* Only from SEQUENCE */ - if (cstate->status == nfserr_replay_cache) { - dprintk("%s NFS4.1 replay from cache\n", __func__); - status = op->status; - goto out; - } if (op->status == nfserr_replay_me) { op->replay = &cstate->replay_owner->so_replay; nfsd4_encode_replay(&resp->xdr, op); From 0648a07c9b22acc33ead0645cf8f607b0c9c7e32 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Sat, 20 May 2017 09:58:10 +0200 Subject: [PATCH 051/249] scsi: scsi_dh_rdac: Use ctlr directly in rdac_failover_get() rdac_failover_get references struct rdac_controller as ctlr->ms_sdev->handler_data->ctlr for no apparent reason. Besides being inefficient this also introduces a null-pointer dereference as send_mode_select() sets ctlr->ms_sdev to NULL before calling rdac_failover_get(): [ 18.432550] device-mapper: multipath service-time: version 0.3.0 loaded [ 18.436124] BUG: unable to handle kernel NULL pointer dereference at 0000000000000790 [ 18.436129] IP: send_mode_select+0xca/0x560 [ 18.436129] PGD 0 [ 18.436130] P4D 0 [ 18.436130] [ 18.436132] Oops: 0000 [#1] SMP [ 18.436133] Modules linked in: dm_service_time sd_mod dm_multipath amdkfd amd_iommu_v2 radeon(+) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm qla2xxx drm serio_raw scsi_transport_fc bnx2 i2c_core dm_mirror dm_region_hash dm_log dm_mod [ 18.436143] CPU: 4 PID: 443 Comm: kworker/u16:2 Not tainted 4.12.0-rc1.1.el7.test.x86_64 #1 [ 18.436144] Hardware name: IBM BladeCenter LS22 -[79013SG]-/Server Blade, BIOS -[L8E164AUS-1.07]- 05/25/2011 [ 18.436145] Workqueue: kmpath_rdacd send_mode_select [ 18.436146] task: ffff880225116a40 task.stack: ffffc90002bd8000 [ 18.436148] RIP: 0010:send_mode_select+0xca/0x560 [ 18.436148] RSP: 0018:ffffc90002bdbda8 EFLAGS: 00010246 [ 18.436149] RAX: 0000000000000000 RBX: ffffc90002bdbe08 RCX: ffff88017ef04a80 [ 18.436150] RDX: ffffc90002bdbe08 RSI: ffff88017ef04a80 RDI: ffff8802248e4388 [ 18.436151] RBP: ffffc90002bdbe48 R08: 0000000000000000 R09: ffffffff81c104c0 [ 18.436151] R10: 00000000000001ff R11: 000000000000035a R12: ffffc90002bdbdd8 [ 18.436152] R13: ffff8802248e4390 R14: ffff880225152800 R15: ffff8802248e4400 [ 18.436153] FS: 0000000000000000(0000) GS:ffff880227d00000(0000) knlGS:0000000000000000 [ 18.436154] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 18.436154] CR2: 0000000000000790 CR3: 000000042535b000 CR4: 00000000000006e0 [ 18.436155] Call Trace: [ 18.436159] ? rdac_activate+0x14e/0x150 [ 18.436161] ? refcount_dec_and_test+0x11/0x20 [ 18.436162] ? kobject_put+0x1c/0x50 [ 18.436165] ? scsi_dh_activate+0x6f/0xd0 [ 18.436168] process_one_work+0x149/0x360 [ 18.436170] worker_thread+0x4d/0x3c0 [ 18.436172] kthread+0x109/0x140 [ 18.436173] ? rescuer_thread+0x380/0x380 [ 18.436174] ? kthread_park+0x60/0x60 [ 18.436176] ret_from_fork+0x2c/0x40 [ 18.436177] Code: 49 c7 46 20 00 00 00 00 4c 89 ef c6 07 00 0f 1f 40 00 45 31 ed c7 45 b0 05 00 00 00 44 89 6d b4 4d 89 f5 4c 8b 75 a8 49 8b 45 20 <48> 8b b0 90 07 00 00 48 8b 56 10 8b 42 10 48 8d 7a 28 85 c0 0f [ 18.436192] RIP: send_mode_select+0xca/0x560 RSP: ffffc90002bdbda8 [ 18.436192] CR2: 0000000000000790 [ 18.436198] ---[ end trace 40f3e4dca1ffabdd ]--- [ 18.436199] Kernel panic - not syncing: Fatal exception [ 18.436222] Kernel Offset: disabled [-- MARK -- Thu May 18 11:45:00 2017] Fixes: 327825574132 scsi_dh_rdac: switch to scsi_execute_req_flags() Cc: stable@vger.kernel.org Signed-off-by: Artem Savkov Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/device_handler/scsi_dh_rdac.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 3cbab8710e58..2ceff585f189 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -265,18 +265,16 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, struct list_head *list, unsigned char *cdb) { - struct scsi_device *sdev = ctlr->ms_sdev; - struct rdac_dh_data *h = sdev->handler_data; struct rdac_mode_common *common; unsigned data_size; struct rdac_queue_data *qdata; u8 *lun_table; - if (h->ctlr->use_ms10) { + if (ctlr->use_ms10) { struct rdac_pg_expanded *rdac_pg; data_size = sizeof(struct rdac_pg_expanded); - rdac_pg = &h->ctlr->mode_select.expanded; + rdac_pg = &ctlr->mode_select.expanded; memset(rdac_pg, 0, data_size); common = &rdac_pg->common; rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40; @@ -288,7 +286,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, struct rdac_pg_legacy *rdac_pg; data_size = sizeof(struct rdac_pg_legacy); - rdac_pg = &h->ctlr->mode_select.legacy; + rdac_pg = &ctlr->mode_select.legacy; memset(rdac_pg, 0, data_size); common = &rdac_pg->common; rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER; @@ -304,7 +302,7 @@ static unsigned int rdac_failover_get(struct rdac_controller *ctlr, } /* Prepare the command. */ - if (h->ctlr->use_ms10) { + if (ctlr->use_ms10) { cdb[0] = MODE_SELECT_10; cdb[7] = data_size >> 8; cdb[8] = data_size & 0xff; From 5e901d0b15c0cba8c5ba55e4be46fc5a0e2f3cb9 Mon Sep 17 00:00:00 2001 From: Arun Easi Date: Fri, 19 May 2017 01:33:15 -0700 Subject: [PATCH 052/249] scsi: qedi: Fix bad pte call trace when iscsiuio is stopped. munmap done by iscsiuio during a stop of the service triggers a "bad pte" warning sometimes. munmap kernel path goes through the mmapped pages and has a validation check for mapcount (in struct page) to be zero or above. kzalloc, which we had used to allocate udev->ctrl, uses slab allocations, which re-uses mapcount (union) for other purposes that can make the mapcount look negative. Avoid all these trouble by invoking one of the __get_free_pages wrappers to be used instead of kzalloc for udev->ctrl. BUG: Bad page map in process iscsiuio pte:80000000aa624067 pmd:3e6777067 page:ffffea0002a98900 count:2 mapcount:-2143289280 mapping: (null) index:0xffff8800aa624e00 page flags: 0x10075d00000090(dirty|slab) page dumped because: bad pte addr:00007fcba70a3000 vm_flags:0c0400fb anon_vma: (null) mapping:ffff8803edf66e90 index:0 Call Trace: dump_stack+0x19/0x1b print_bad_pte+0x1af/0x250 unmap_page_range+0x7a7/0x8a0 unmap_single_vma+0x81/0xf0 unmap_vmas+0x49/0x90 unmap_region+0xbe/0x140 ? vma_rb_erase+0x121/0x220 do_munmap+0x245/0x420 vm_munmap+0x41/0x60 SyS_munmap+0x22/0x30 tracesys+0xdd/0xe2 Signed-off-by: Arun Easi Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 92775a8b74b1..997e3052a706 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -151,6 +151,11 @@ static int qedi_uio_close(struct uio_info *uinfo, struct inode *inode) static void __qedi_free_uio_rings(struct qedi_uio_dev *udev) { + if (udev->uctrl) { + free_page((unsigned long)udev->uctrl); + udev->uctrl = NULL; + } + if (udev->ll2_ring) { free_page((unsigned long)udev->ll2_ring); udev->ll2_ring = NULL; @@ -169,7 +174,6 @@ static void __qedi_free_uio(struct qedi_uio_dev *udev) __qedi_free_uio_rings(udev); pci_dev_put(udev->pdev); - kfree(udev->uctrl); kfree(udev); } @@ -208,6 +212,11 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev) if (udev->ll2_ring || udev->ll2_buf) return rc; + /* Memory for control area. */ + udev->uctrl = (void *)get_zeroed_page(GFP_KERNEL); + if (!udev->uctrl) + return -ENOMEM; + /* Allocating memory for LL2 ring */ udev->ll2_ring_size = QEDI_PAGE_SIZE; udev->ll2_ring = (void *)get_zeroed_page(GFP_KERNEL | __GFP_COMP); @@ -237,7 +246,6 @@ static int __qedi_alloc_uio_rings(struct qedi_uio_dev *udev) static int qedi_alloc_uio_rings(struct qedi_ctx *qedi) { struct qedi_uio_dev *udev = NULL; - struct qedi_uio_ctrl *uctrl = NULL; int rc = 0; list_for_each_entry(udev, &qedi_udev_list, list) { @@ -258,21 +266,14 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi) goto err_udev; } - uctrl = kzalloc(sizeof(*uctrl), GFP_KERNEL); - if (!uctrl) { - rc = -ENOMEM; - goto err_uctrl; - } - udev->uio_dev = -1; udev->qedi = qedi; udev->pdev = qedi->pdev; - udev->uctrl = uctrl; rc = __qedi_alloc_uio_rings(udev); if (rc) - goto err_uio_rings; + goto err_uctrl; list_add(&udev->list, &qedi_udev_list); @@ -283,8 +284,6 @@ static int qedi_alloc_uio_rings(struct qedi_ctx *qedi) udev->rx_pkt = udev->ll2_buf + LL2_SINGLE_BUF_SIZE; return 0; - err_uio_rings: - kfree(uctrl); err_uctrl: kfree(udev); err_udev: From fc2fbf0d422b54b487c5e7413acd54cbac6d4151 Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:16 -0700 Subject: [PATCH 053/249] scsi: qedi: Correctly set firmware max supported BDs. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index 5ca3e8c28a3f..269dac620cf8 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -38,7 +38,7 @@ struct qedi_endpoint; #define QEDI_MAX_ISCSI_TASK 4096 #define QEDI_MAX_TASK_NUM 0x0FFF #define QEDI_MAX_ISCSI_CONNS_PER_HBA 1024 -#define QEDI_ISCSI_MAX_BDS_PER_CMD 256 /* Firmware max BDs is 256 */ +#define QEDI_ISCSI_MAX_BDS_PER_CMD 255 /* Firmware max BDs is 255 */ #define MAX_OUSTANDING_TASKS_PER_CON 1024 #define QEDI_MAX_BD_LEN 0xffff From d0788a528d9e09d7c17f05dd61c4cc492181f817 Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:17 -0700 Subject: [PATCH 054/249] scsi: qedi: Set dma_boundary to 0xfff. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi.h | 1 + drivers/scsi/qedi/qedi_iscsi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index 269dac620cf8..32632c9b2276 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -63,6 +63,7 @@ struct qedi_endpoint; #define QEDI_PAGE_MASK (~((QEDI_PAGE_SIZE) - 1)) #define QEDI_PAGE_SIZE 4096 +#define QEDI_HW_DMA_BOUNDARY 0xfff #define QEDI_PATH_HANDLE 0xFE0000000UL struct qedi_uio_ctrl { diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 3548d46f9b27..19177931b84c 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -59,6 +59,7 @@ struct scsi_host_template qedi_host_template = { .this_id = -1, .sg_tablesize = QEDI_ISCSI_MAX_BDS_PER_CMD, .max_sectors = 0xffff, + .dma_boundary = QEDI_HW_DMA_BOUNDARY, .cmd_per_lun = 128, .use_clustering = ENABLE_CLUSTERING, .shost_attrs = qedi_shost_attrs, From 0ea9314f4e6c69f8d732e0a9310114c2de35ada8 Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:18 -0700 Subject: [PATCH 055/249] scsi: qedi: Fix endpoint NULL panic in qedi_set_path. RIP: 0010:qedi_set_path+0x114/0x570 [qedi] Call Trace: [] iscsi_if_recv_msg+0x623/0x14a0 [] ? rhashtable_lookup_compare+0x36/0x70 [] iscsi_if_rx+0x8e/0x1f0 [] netlink_unicast+0xed/0x1b0 [] netlink_sendmsg+0x330/0x770 [] sock_sendmsg+0xb0/0xf0 [] ? __switch_to+0x17b/0x4b0 [] ? __schedule+0x2d8/0x900 [] ___sys_sendmsg+0x3a9/0x3c0 [] ? get_futex_key+0x1c8/0x2b0 [] ? futex_wake+0x80/0x160 Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_iscsi.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 19177931b84c..87f0af358b33 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -1224,8 +1224,12 @@ static int qedi_set_path(struct Scsi_Host *shost, struct iscsi_path *path_data) iscsi_cid = (u32)path_data->handle; qedi_ep = qedi->ep_tbl[iscsi_cid]; - QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_CONN, + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "iscsi_cid=0x%x, qedi_ep=%p\n", iscsi_cid, qedi_ep); + if (!qedi_ep) { + ret = -EINVAL; + goto set_path_exit; + } if (!is_valid_ether_addr(&path_data->mac_addr[0])) { QEDI_NOTICE(&qedi->dbg_ctx, "dst mac NOT VALID\n"); From 962ea1c0df6c5a36f4477aa4a10f4acc0f5f56bd Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:19 -0700 Subject: [PATCH 056/249] scsi: qedi: Set firmware tcp msl timer value. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 997e3052a706..62ba0550b68c 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -827,6 +827,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi) qedi->pf_params.iscsi_pf_params.num_uhq_pages_in_ring = num_sq_pages; qedi->pf_params.iscsi_pf_params.num_queues = qedi->num_queues; qedi->pf_params.iscsi_pf_params.debug_mode = qedi_fw_debug; + qedi->pf_params.iscsi_pf_params.two_msl_timer = 4000; for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) { if ((1 << log_page_size) == PAGE_SIZE) From 3d61a3132212d6b1c8c6914700d5f6456712ac08 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Fri, 19 May 2017 01:33:20 -0700 Subject: [PATCH 057/249] scsi: qedi: set max_fin_rt default value max_fin_rt is the maximum re-transmission of FIN packets as part of the termination flow. After reaching this value the FW will send a single RESET. Signed-off-by: Nilesh Javali Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 62ba0550b68c..09a294634bc7 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -828,6 +828,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi) qedi->pf_params.iscsi_pf_params.num_queues = qedi->num_queues; qedi->pf_params.iscsi_pf_params.debug_mode = qedi_fw_debug; qedi->pf_params.iscsi_pf_params.two_msl_timer = 4000; + qedi->pf_params.iscsi_pf_params.max_fin_rt = 2; for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) { if ((1 << log_page_size) == PAGE_SIZE) From b19775e4785996503b106e59d9d3a8839e677afd Mon Sep 17 00:00:00 2001 From: "manish.rangankar@cavium.com" Date: Fri, 19 May 2017 01:33:21 -0700 Subject: [PATCH 058/249] scsi: qedi: Fix endpoint NULL panic during recovery. Signed-off-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_fw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index d6978cbc56f0..8bc7ee1a8ca8 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -1494,6 +1494,8 @@ static int qedi_send_iscsi_tmf(struct qedi_conn *qedi_conn, tmf_hdr = (struct iscsi_tm *)mtask->hdr; qedi_cmd = (struct qedi_cmd *)mtask->dd_data; ep = qedi_conn->ep; + if (!ep) + return -ENODEV; tid = qedi_get_task_idx(qedi); if (tid == -1) From e274086e473c0cbea18051ae0a78a05f8d658f47 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 22 May 2017 17:46:58 +0800 Subject: [PATCH 059/249] drm/i915/gvt: clean up unsubmited workloads before destroying kmem cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is to fix a memory leak issue caused by unfreed gvtg workload objects. Walk through the workload list and free all of the remained workloads before destroying kmem cache. [179.885211] INFO: Object 0xffff9cef10003b80 @offset=7040 [179.885657] kmem_cache_destroy gvt-g_vgpu_workload: Slab cache still has objects [179.886146] CPU: 2 PID: 2318 Comm: win_lucas Tainted: G    B   W       4.11.0+ #1 [179.887223] Call Trace: [179.887394] dump_stack+0x63/0x90 [179.887617] kmem_cache_destroy+0x1cf/0x1e0 [179.887960] intel_vgpu_clean_execlist+0x15/0x20 [i915] [179.888365] intel_gvt_destroy_vgpu+0x4c/0xd0 [i915] [179.888688] intel_vgpu_remove+0x2a/0x30 [kvmgt] [179.888988] mdev_device_remove_ops+0x23/0x50 [mdev] [179.889309] mdev_device_remove+0xe4/0x190 [mdev] [179.889615] remove_store+0x7d/0xb0 [mdev] [179.889885] dev_attr_store+0x18/0x30 [179.890129] sysfs_kf_write+0x37/0x40 [179.890371] kernfs_fop_write+0x107/0x180 [179.890632] __vfs_write+0x37/0x160 [179.890865] ? kmem_cache_alloc+0xd7/0x1b0 [179.891116] ? apparmor_file_permission+0x1a/0x20 [179.891372] ? security_file_permission+0x3b/0xc0 [179.891628] vfs_write+0xb8/0x1b0 [179.891812] SyS_write+0x55/0xc0 [179.891992] entry_SYSCALL_64_fastpath+0x1e/0xad Signed-off-by: Changbin Du Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/execlist.c | 30 +++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index dca989eb2d42..24fe04d6307b 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -779,8 +779,26 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id) vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw; } +static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_engine_cs *engine; + struct intel_vgpu_workload *pos, *n; + unsigned int tmp; + + /* free the unsubmited workloads in the queues. */ + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + list_for_each_entry_safe(pos, n, + &vgpu->workload_q_head[engine->id], list) { + list_del_init(&pos->list); + free_workload(pos); + } + } +} + void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) { + clean_workloads(vgpu, ALL_ENGINES); kmem_cache_destroy(vgpu->workloads); } @@ -811,17 +829,9 @@ void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; - struct intel_vgpu_workload *pos, *n; unsigned int tmp; - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { - /* free the unsubmited workload in the queue */ - list_for_each_entry_safe(pos, n, - &vgpu->workload_q_head[engine->id], list) { - list_del_init(&pos->list); - free_workload(pos); - } - + clean_workloads(vgpu, engine_mask); + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) init_vgpu_execlist(vgpu, engine->id); - } } From 75b61250bf687c686ba6850c34eccc1303b0b827 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Tue, 16 May 2017 19:23:44 +0530 Subject: [PATCH 060/249] scsi: libcxgbi: fix skb use after free skb->data is assigned to task->hdr in cxgbi_conn_alloc_pdu(), skb gets freed after tx but task->hdr is still dereferenced in iscsi_tcp_task_xmit() to avoid this call skb_get() after allocating skb and free the skb in cxgbi_cleanup_task() or before allocating new skb in cxgbi_conn_alloc_pdu(). Signed-off-by: Varun Prakash Signed-off-by: Martin K. Petersen --- drivers/scsi/cxgbi/libcxgbi.c | 25 ++++++++++++++++++------- drivers/scsi/cxgbi/libcxgbi.h | 16 ++++++++-------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index bd7d39ecbd24..fb06974c88c1 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -1873,6 +1873,11 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode) tcp_task->dd_data = tdata; task->hdr = NULL; + if (tdata->skb) { + kfree_skb(tdata->skb); + tdata->skb = NULL; + } + if (SKB_MAX_HEAD(cdev->skb_tx_rsvd) > (512 * MAX_SKB_FRAGS) && (opcode == ISCSI_OP_SCSI_DATA_OUT || (opcode == ISCSI_OP_SCSI_CMD && @@ -1890,6 +1895,7 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode) return -ENOMEM; } + skb_get(tdata->skb); skb_reserve(tdata->skb, cdev->skb_tx_rsvd); task->hdr = (struct iscsi_hdr *)tdata->skb->data; task->hdr_max = SKB_TX_ISCSI_PDU_HEADER_MAX; /* BHS + AHS */ @@ -2035,9 +2041,9 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) unsigned int datalen; int err; - if (!skb) { + if (!skb || cxgbi_skcb_test_flag(skb, SKCBF_TX_DONE)) { log_debug(1 << CXGBI_DBG_ISCSI | 1 << CXGBI_DBG_PDU_TX, - "task 0x%p, skb NULL.\n", task); + "task 0x%p, skb 0x%p\n", task, skb); return 0; } @@ -2050,7 +2056,6 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) } datalen = skb->data_len; - tdata->skb = NULL; /* write ppod first if using ofldq to write ppod */ if (ttinfo->flags & CXGBI_PPOD_INFO_FLAG_VALID) { @@ -2078,6 +2083,7 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) pdulen += ISCSI_DIGEST_SIZE; task->conn->txdata_octets += pdulen; + cxgbi_skcb_set_flag(skb, SKCBF_TX_DONE); return 0; } @@ -2086,7 +2092,6 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) "task 0x%p, skb 0x%p, len %u/%u, %d EAGAIN.\n", task, skb, skb->len, skb->data_len, err); /* reset skb to send when we are called again */ - tdata->skb = skb; return err; } @@ -2094,7 +2099,8 @@ int cxgbi_conn_xmit_pdu(struct iscsi_task *task) "itt 0x%x, skb 0x%p, len %u/%u, xmit err %d.\n", task->itt, skb, skb->len, skb->data_len, err); - kfree_skb(skb); + __kfree_skb(tdata->skb); + tdata->skb = NULL; iscsi_conn_printk(KERN_ERR, task->conn, "xmit err %d.\n", err); iscsi_conn_failure(task->conn, ISCSI_ERR_XMIT_FAILED); @@ -2113,8 +2119,10 @@ void cxgbi_cleanup_task(struct iscsi_task *task) tcp_task->dd_data = NULL; /* never reached the xmit task callout */ - if (tdata->skb) - __kfree_skb(tdata->skb); + if (tdata->skb) { + kfree_skb(tdata->skb); + tdata->skb = NULL; + } task_release_itt(task, task->hdr_itt); memset(tdata, 0, sizeof(*tdata)); @@ -2714,6 +2722,9 @@ EXPORT_SYMBOL_GPL(cxgbi_attr_is_visible); static int __init libcxgbi_init_module(void) { pr_info("%s", version); + + BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) < + sizeof(struct cxgbi_skb_cb)); return 0; } diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h index 18e0ea83d361..239462a75760 100644 --- a/drivers/scsi/cxgbi/libcxgbi.h +++ b/drivers/scsi/cxgbi/libcxgbi.h @@ -195,7 +195,8 @@ struct cxgbi_skb_rx_cb { }; struct cxgbi_skb_tx_cb { - void *l2t; + void *handle; + void *arp_err_handler; struct sk_buff *wr_next; }; @@ -203,6 +204,7 @@ enum cxgbi_skcb_flags { SKCBF_TX_NEED_HDR, /* packet needs a header */ SKCBF_TX_MEM_WRITE, /* memory write */ SKCBF_TX_FLAG_COMPL, /* wr completion flag */ + SKCBF_TX_DONE, /* skb tx done */ SKCBF_RX_COALESCED, /* received whole pdu */ SKCBF_RX_HDR, /* received pdu header */ SKCBF_RX_DATA, /* received pdu payload */ @@ -215,13 +217,13 @@ enum cxgbi_skcb_flags { }; struct cxgbi_skb_cb { - unsigned char ulp_mode; - unsigned long flags; - unsigned int seq; union { struct cxgbi_skb_rx_cb rx; struct cxgbi_skb_tx_cb tx; }; + unsigned char ulp_mode; + unsigned long flags; + unsigned int seq; }; #define CXGBI_SKB_CB(skb) ((struct cxgbi_skb_cb *)&((skb)->cb[0])) @@ -374,11 +376,9 @@ static inline void cxgbi_sock_enqueue_wr(struct cxgbi_sock *csk, cxgbi_skcb_tx_wr_next(skb) = NULL; /* * We want to take an extra reference since both us and the driver - * need to free the packet before it's really freed. We know there's - * just one user currently so we use atomic_set rather than skb_get - * to avoid the atomic op. + * need to free the packet before it's really freed. */ - atomic_set(&skb->users, 2); + skb_get(skb); if (!csk->wr_pending_head) csk->wr_pending_head = skb; From f3cdbe39b2ab0636dec0d5d43b54f1061ce7566c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 17 May 2017 04:34:37 -0500 Subject: [PATCH 061/249] tcmu: fix crash during device removal We currently do tcmu_free_device ->tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE) -> uio_unregister_device -> kfree(tcmu_dev). The problem is that the kernel does not wait for userspace to do the close() on the uio device before freeing the tcmu_dev. We can then hit a race where the kernel frees the tcmu_dev before userspace does close() and so when close() -> release -> tcmu_release is done, we try to access a freed tcmu_dev. This patch made over the target-pending master branch moves the freeing of the tcmu_dev to when the last reference has been dropped. This also fixes a leak where if tcmu_configure_device was not called on a device we did not free udev->name which was allocated at tcmu_alloc_device time. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 46 ++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 9045837f748b..beb5f098f32d 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -97,7 +97,7 @@ struct tcmu_hba { struct tcmu_dev { struct list_head node; - + struct kref kref; struct se_device se_dev; char *name; @@ -969,6 +969,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL); if (!udev) return NULL; + kref_init(&udev->kref); udev->name = kstrdup(name, GFP_KERNEL); if (!udev->name) { @@ -1145,6 +1146,24 @@ static int tcmu_open(struct uio_info *info, struct inode *inode) return 0; } +static void tcmu_dev_call_rcu(struct rcu_head *p) +{ + struct se_device *dev = container_of(p, struct se_device, rcu_head); + struct tcmu_dev *udev = TCMU_DEV(dev); + + kfree(udev->uio_info.name); + kfree(udev->name); + kfree(udev); +} + +static void tcmu_dev_kref_release(struct kref *kref) +{ + struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref); + struct se_device *dev = &udev->se_dev; + + call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); +} + static int tcmu_release(struct uio_info *info, struct inode *inode) { struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); @@ -1152,7 +1171,8 @@ static int tcmu_release(struct uio_info *info, struct inode *inode) clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags); pr_debug("close\n"); - + /* release ref from configure */ + kref_put(&udev->kref, tcmu_dev_kref_release); return 0; } @@ -1272,6 +1292,12 @@ static int tcmu_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = 128; dev->dev_attrib.hw_queue_depth = 128; + /* + * Get a ref incase userspace does a close on the uio device before + * LIO has initiated tcmu_free_device. + */ + kref_get(&udev->kref); + ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name, udev->uio_info.uio_dev->minor); if (ret) @@ -1284,11 +1310,13 @@ static int tcmu_configure_device(struct se_device *dev) return 0; err_netlink: + kref_put(&udev->kref, tcmu_dev_kref_release); uio_unregister_device(&udev->uio_info); err_register: vfree(udev->mb_addr); err_vzalloc: kfree(info->name); + info->name = NULL; return ret; } @@ -1302,14 +1330,6 @@ static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) return -EINVAL; } -static void tcmu_dev_call_rcu(struct rcu_head *p) -{ - struct se_device *dev = container_of(p, struct se_device, rcu_head); - struct tcmu_dev *udev = TCMU_DEV(dev); - - kfree(udev); -} - static bool tcmu_dev_configured(struct tcmu_dev *udev) { return udev->uio_info.uio_dev ? true : false; @@ -1364,10 +1384,10 @@ static void tcmu_free_device(struct se_device *dev) udev->uio_info.uio_dev->minor); uio_unregister_device(&udev->uio_info); - kfree(udev->uio_info.name); - kfree(udev->name); } - call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); + + /* release ref from init */ + kref_put(&udev->kref, tcmu_dev_kref_release); } enum { From 0037ae47812b1f431cc602100d1d51f37d77b61e Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 22 May 2017 16:05:22 +0200 Subject: [PATCH 062/249] dmaengine: ep93xx: Always start from BASE0 The current buffer is being reset to zero on device_free_chan_resources() but not on device_terminate_all(). It could happen that HW is restarted and expects BASE0 to be used, but the driver is not synchronized and will start from BASE1. One solution is to reset the buffer explicitly in m2p_hw_setup(). Signed-off-by: Alexander Sverdlin Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ep93xx_dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index d37e8dda8079..deb009c3121f 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -323,6 +323,8 @@ static int m2p_hw_setup(struct ep93xx_dma_chan *edmac) | M2P_CONTROL_ENABLE; m2p_set_control(edmac, control); + edmac->buffer = 0; + return 0; } From 98f9de366fccee7572c646af226b2d4b4841e3b5 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Mon, 22 May 2017 16:05:23 +0200 Subject: [PATCH 063/249] dmaengine: ep93xx: Don't drain the transfers in terminate_all() Draining the transfers in terminate_all callback happens with IRQs disabled, therefore induces huge latency: irqsoff latency trace v1.1.5 on 4.11.0 -------------------------------------------------------------------- latency: 39770 us, #57/57, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0) ----------------- | task: process-129 (uid:0 nice:0 policy:2 rt_prio:50) ----------------- => started at: _snd_pcm_stream_lock_irqsave => ended at: snd_pcm_stream_unlock_irqrestore _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / delay cmd pid ||||| time | caller \ / ||||| \ | / process-129 0d.s. 3us : _snd_pcm_stream_lock_irqsave process-129 0d.s1 9us : snd_pcm_stream_lock <-_snd_pcm_stream_lock_irqsave process-129 0d.s1 15us : preempt_count_add <-snd_pcm_stream_lock process-129 0d.s2 22us : preempt_count_add <-snd_pcm_stream_lock process-129 0d.s3 32us : snd_pcm_update_hw_ptr0 <-snd_pcm_period_elapsed process-129 0d.s3 41us : soc_pcm_pointer <-snd_pcm_update_hw_ptr0 process-129 0d.s3 50us : dmaengine_pcm_pointer <-soc_pcm_pointer process-129 0d.s3 58us+: snd_dmaengine_pcm_pointer_no_residue <-dmaengine_pcm_pointer process-129 0d.s3 96us : update_audio_tstamp <-snd_pcm_update_hw_ptr0 process-129 0d.s3 103us : snd_pcm_update_state <-snd_pcm_update_hw_ptr0 process-129 0d.s3 112us : xrun <-snd_pcm_update_state process-129 0d.s3 119us : snd_pcm_stop <-xrun process-129 0d.s3 126us : snd_pcm_action <-snd_pcm_stop process-129 0d.s3 134us : snd_pcm_action_single <-snd_pcm_action process-129 0d.s3 141us : snd_pcm_pre_stop <-snd_pcm_action_single process-129 0d.s3 150us : snd_pcm_do_stop <-snd_pcm_action_single process-129 0d.s3 157us : soc_pcm_trigger <-snd_pcm_do_stop process-129 0d.s3 166us : snd_dmaengine_pcm_trigger <-soc_pcm_trigger process-129 0d.s3 175us : ep93xx_dma_terminate_all <-snd_dmaengine_pcm_trigger process-129 0d.s3 182us : preempt_count_add <-ep93xx_dma_terminate_all process-129 0d.s4 189us*: m2p_hw_shutdown <-ep93xx_dma_terminate_all process-129 0d.s4 39472us : m2p_hw_setup <-ep93xx_dma_terminate_all ... rest skipped... process-129 0d.s. 40080us : => ep93xx_dma_tasklet => tasklet_action => __do_softirq => irq_exit => __handle_domain_irq => vic_handle_irq => __irq_usr => 0xb66c6668 Just abort the transfers and warn if the HW state is not what we expect. Move draining into device_synchronize callback. Signed-off-by: Alexander Sverdlin Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/ep93xx_dma.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index deb009c3121f..ec240592f5c8 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -201,6 +201,7 @@ struct ep93xx_dma_engine { struct dma_device dma_dev; bool m2m; int (*hw_setup)(struct ep93xx_dma_chan *); + void (*hw_synchronize)(struct ep93xx_dma_chan *); void (*hw_shutdown)(struct ep93xx_dma_chan *); void (*hw_submit)(struct ep93xx_dma_chan *); int (*hw_interrupt)(struct ep93xx_dma_chan *); @@ -333,21 +334,27 @@ static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac) return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3; } -static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac) +static void m2p_hw_synchronize(struct ep93xx_dma_chan *edmac) { + unsigned long flags; u32 control; + spin_lock_irqsave(&edmac->lock, flags); control = readl(edmac->regs + M2P_CONTROL); control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT); m2p_set_control(edmac, control); + spin_unlock_irqrestore(&edmac->lock, flags); while (m2p_channel_state(edmac) >= M2P_STATE_ON) - cpu_relax(); + schedule(); +} +static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac) +{ m2p_set_control(edmac, 0); - while (m2p_channel_state(edmac) == M2P_STATE_STALL) - cpu_relax(); + while (m2p_channel_state(edmac) != M2P_STATE_IDLE) + dev_warn(chan2dev(edmac), "M2P: Not yet IDLE\n"); } static void m2p_fill_desc(struct ep93xx_dma_chan *edmac) @@ -1162,6 +1169,26 @@ ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, return NULL; } +/** + * ep93xx_dma_synchronize - Synchronizes the termination of transfers to the + * current context. + * @chan: channel + * + * Synchronizes the DMA channel termination to the current context. When this + * function returns it is guaranteed that all transfers for previously issued + * descriptors have stopped and and it is safe to free the memory associated + * with them. Furthermore it is guaranteed that all complete callback functions + * for a previously submitted descriptor have finished running and it is safe to + * free resources accessed from within the complete callbacks. + */ +static void ep93xx_dma_synchronize(struct dma_chan *chan) +{ + struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); + + if (edmac->edma->hw_synchronize) + edmac->edma->hw_synchronize(edmac); +} + /** * ep93xx_dma_terminate_all - terminate all transactions * @chan: channel @@ -1325,6 +1352,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg; dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic; dma_dev->device_config = ep93xx_dma_slave_config; + dma_dev->device_synchronize = ep93xx_dma_synchronize; dma_dev->device_terminate_all = ep93xx_dma_terminate_all; dma_dev->device_issue_pending = ep93xx_dma_issue_pending; dma_dev->device_tx_status = ep93xx_dma_tx_status; @@ -1342,6 +1370,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev) } else { dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + edma->hw_synchronize = m2p_hw_synchronize; edma->hw_setup = m2p_hw_setup; edma->hw_shutdown = m2p_hw_shutdown; edma->hw_submit = m2p_hw_submit; From 0f0b9b63e14fc3f66e4d342df016c9b071c5abed Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 May 2017 13:14:13 +0200 Subject: [PATCH 064/249] gfs2: Make flush bios explicitely sync Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 CC: Steven Whitehouse CC: cluster-devel@redhat.com CC: stable@vger.kernel.org Acked-by: Bob Peterson Signed-off-by: Jan Kara --- fs/gfs2/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f865b96374df..d2955daf17a4 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -659,7 +659,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) struct gfs2_log_header *lh; unsigned int tail; u32 hash; - int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META; + int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); lh = page_address(page); From d8747d642ec4ce96adf17ae35652a5e4015cfe02 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 May 2017 13:16:18 +0200 Subject: [PATCH 065/249] reiserfs: Make flush bios explicitely sync Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 CC: reiserfs-devel@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/reiserfs/journal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index da01f497180a..39bb1e838d8d 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1112,7 +1112,7 @@ static int flush_commit_list(struct super_block *s, depth = reiserfs_write_unlock_nested(s); if (reiserfs_barrier_flush(s)) __sync_dirty_buffer(jl->j_commit_bh, - REQ_PREFLUSH | REQ_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(jl->j_commit_bh); reiserfs_write_lock_nested(s, depth); @@ -1271,7 +1271,7 @@ static int _update_journal_header_block(struct super_block *sb, if (reiserfs_barrier_flush(sb)) __sync_dirty_buffer(journal->j_header_bh, - REQ_PREFLUSH | REQ_FUA); + REQ_SYNC | REQ_PREFLUSH | REQ_FUA); else sync_dirty_buffer(journal->j_header_bh); From 6d3b5d8d8dd1c14f991ccab84b40f8425f1ae91b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 16 May 2017 12:00:15 -0400 Subject: [PATCH 066/249] NFS fix COMMIT after COPY Fix a typo in the commit e092693443b995c8e3a565a73b5fdb05f1260f9b "NFS append COMMIT after synchronous COPY" Reported-by: Eryu Guan Fixes: e092693443b ("NFS append COMMIT after synchronous COPY") Signed-off-by: Olga Kornievskaia Tested-by: Eryu Guan Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 929d09a5310a..319a47db218d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -177,7 +177,7 @@ static ssize_t _nfs42_proc_copy(struct file *src, if (status) goto out; - if (!nfs_write_verifier_cmp(&res->write_res.verifier.verifier, + if (nfs_write_verifier_cmp(&res->write_res.verifier.verifier, &res->commit_res.verf->verifier)) { status = -EAGAIN; goto out; From 662f9a105b4322b8559d448f86110e6ec24b8738 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 May 2017 00:31:12 +0300 Subject: [PATCH 067/249] pNFS/flexfiles: missing error code in ff_layout_alloc_lseg() If xdr_inline_decode() fails then we end up returning ERR_PTR(0). The caller treats NULL returns as -ENOMEM so it doesn't really hurt runtime, but obviously we intended to set an error code here. Fixes: d67ae825a59d ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f5714ee01000..23542dc44a25 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -454,6 +454,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, goto out_err_free; /* fh */ + rc = -EIO; p = xdr_inline_decode(&stream, 4); if (!p) goto out_err_free; From d2c23c0075d7091bf749411bd2ee757cf4ec356c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 22 May 2017 22:18:28 +0200 Subject: [PATCH 068/249] xprtrdma: Delete an error message for a failed memory allocation in xprt_rdma_bc_setup() Omit an extra message for a memory allocation failure in this function. This issue was detected by using the Coccinelle software. Link: http://events.linuxfoundation.org/sites/events/files/slides/LCJ16-Refactor_Strings-WSang_0.pdf Signed-off-by: Markus Elfring Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/backchannel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 24fedd4b117e..03f6b5840764 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -119,11 +119,9 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) for (i = 0; i < (reqs << 1); i++) { rqst = kzalloc(sizeof(*rqst), GFP_KERNEL); - if (!rqst) { - pr_err("RPC: %s: Failed to create bc rpc_rqst\n", - __func__); + if (!rqst) goto out_free; - } + dprintk("RPC: %s: new rqst %p\n", __func__, rqst); rqst->rq_xprt = &r_xprt->rx_xprt; From 08cb5b0f058a325fcb5305e33f572ff6d6dfa289 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Mon, 22 May 2017 20:20:23 -0400 Subject: [PATCH 069/249] pnfs: Fix the check for requests in range of layout segment It's possible and acceptable for NFS to attempt to add requests beyond the range of the current pgio->pg_lseg, a case which should be caught and limited by the pg_test operation. However, the current handling of this case replaces pgio->pg_lseg with a new layout segment (after a WARN) within that pg_test operation. That will cause all the previously added requests to be submitted with this new layout segment, which may not be valid for those requests. Fix this problem by only returning zero for the number of bytes to coalesce from pg_test for this case which allows any previously added requests to complete on the current layout segment. The check for requests starting out of range of the layout segment moves to pg_init, so that the replacement of pgio->pg_lseg will be done when the next request is added. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 25 +++++++++++++++++-------- fs/nfs/pnfs.h | 10 ++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index adc6ec28d4b5..c383d0913b54 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2094,12 +2094,26 @@ pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout); +/* + * Check for any intersection between the request and the pgio->pg_lseg, + * and if none, put this pgio->pg_lseg away. + */ +static void +pnfs_generic_pg_check_range(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + if (pgio->pg_lseg && !pnfs_lseg_request_intersecting(pgio->pg_lseg, req)) { + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + } +} + void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { u64 rd_size = req->wb_bytes; pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_range(pgio, req); if (pgio->pg_lseg == NULL) { if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); @@ -2131,6 +2145,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { pnfs_generic_pg_check_layout(pgio); + pnfs_generic_pg_check_range(pgio, req); if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, @@ -2191,16 +2206,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length); req_start = req_offset(req); - WARN_ON_ONCE(req_start >= seg_end); + /* start of request is past the last byte of this segment */ - if (req_start >= seg_end) { - /* reference the new lseg */ - if (pgio->pg_ops->pg_cleanup) - pgio->pg_ops->pg_cleanup(pgio); - if (pgio->pg_ops->pg_init) - pgio->pg_ops->pg_init(pgio, req); + if (req_start >= seg_end) return 0; - } /* adjust 'size' iff there are fewer bytes left in the * segment than what nfs_generic_pg_test returned */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2d05b756a8d6..99731e3e332f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -593,6 +593,16 @@ pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2); } +static inline bool +pnfs_lseg_request_intersecting(struct pnfs_layout_segment *lseg, struct nfs_page *req) +{ + u64 seg_last = pnfs_end_offset(lseg->pls_range.offset, lseg->pls_range.length); + u64 req_last = req_offset(req) + req->wb_bytes; + + return pnfs_is_range_intersecting(lseg->pls_range.offset, seg_last, + req_offset(req), req_last); +} + extern unsigned int layoutstats_timer; #ifdef NFS_DEBUG From b49c15f97c936ef5a536821f97e4dd8568369802 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 May 2017 07:55:44 -0400 Subject: [PATCH 070/249] NFSv4.0: Fix a lock leak in nfs40_walk_client_list Xiaolong Ye's kernel test robot detected the following Oops: [ 299.158991] BUG: scheduling while atomic: mount.nfs/9387/0x00000002 [ 299.169587] 2 locks held by mount.nfs/9387: [ 299.176165] #0: (nfs_clid_init_mutex){......}, at: [] nfs4_discover_server_trunking+0x47/0x1fc [ 299.201802] #1: (&(&nn->nfs_client_lock)->rlock){......}, at: [] nfs40_walk_client_list+0x2e9/0x338 [ 299.221979] CPU: 0 PID: 9387 Comm: mount.nfs Not tainted 4.11.0-rc7-00021-g14d1bbb #45 [ 299.235584] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-20161025_171302-gandalf 04/01/2014 [ 299.251176] Call Trace: [ 299.255192] dump_stack+0x61/0x7e [ 299.260416] __schedule_bug+0x65/0x74 [ 299.266208] __schedule+0x5d/0x87c [ 299.271883] schedule+0x89/0x9a [ 299.276937] schedule_timeout+0x232/0x289 [ 299.283223] ? detach_if_pending+0x10b/0x10b [ 299.289935] schedule_timeout_uninterruptible+0x2a/0x2c [ 299.298266] ? put_rpccred+0x3e/0x115 [ 299.304327] ? schedule_timeout_uninterruptible+0x2a/0x2c [ 299.312851] msleep+0x1e/0x22 [ 299.317612] nfs4_discover_server_trunking+0x102/0x1fc [ 299.325644] nfs4_init_client+0x13f/0x194 It looks as if we recently added a spin_lock() leak to nfs40_walk_client_list() when cleaning up the code. Reported-by: kernel test robot Fixes: 14d1bbb0ca42 ("NFS: Create a common nfs4_match_client() function") Cc: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 692a7a8bfc7a..66776f022111 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -582,7 +582,6 @@ int nfs40_walk_client_list(struct nfs_client *new, */ nfs4_schedule_path_down_recovery(pos); default: - spin_lock(&nn->nfs_client_lock); goto out; } From d38162e4b5c643733792f32be4ea107c831827b4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 17 May 2017 15:15:57 +0200 Subject: [PATCH 071/249] Revert "drm/i915: Restore lost "Initialized i915" welcome message" This reverts commit bc5ca47c0af4f949ba889e666b7da65569e36093. Gabriel put this back into generic code with commit 75f6dfe3e652e1adef8cc1b073c89f3e22103a8f Author: Gabriel Krisman Bertazi Date: Wed Dec 28 12:32:11 2016 -0200 drm: Deduplicate driver initialization message but somehow he missed Chris' patch to add the message meanwhile. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101025 Fixes: 75f6dfe3e652 ("drm: Deduplicate driver initialization message") Cc: Gabriel Krisman Bertazi Cc: Daniel Vetter Cc: Jani Nikula Cc: Chris Wilson Cc: # v4.11+ Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170517131557.7836-1-daniel.vetter@ffwll.ch (cherry picked from commit 6bdba81979b2c3c8fed0be62ca31c32c3129d85f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3036d4835b0f..c994fe6e65b2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1272,10 +1272,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) dev_priv->ipc_enabled = false; - /* Everything is in place, we can now relax! */ - DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", - driver.name, driver.major, driver.minor, driver.patchlevel, - driver.date, pci_name(pdev), dev_priv->drm.primary->index); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG)) DRM_INFO("DRM_I915_DEBUG enabled\n"); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) From 2e0bb5b38f32eb21fe25e845721a205370adeffe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 May 2017 20:44:12 +0100 Subject: [PATCH 072/249] drm/i915/selftests: Silence compiler warning in igt_ctx_exec The compiler doesn't always spot the guard that object is allocated on the first pass, leading to: drivers/gpu/drm/i915/selftests/i915_gem_context.c: warning: 'obj' may be used uninitialized in this function [-Wuninitialized]: => 370:8 v2: Make it more obvious by setting obj to NULL on the first pass and any later pass where we need to reallocate. Reported-by: Geert Uytterhoeven Fixes: 791ff39ae32a ("drm/i915: Live testing for context execution") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Matthew Auld c: # v4.12-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170523194412.1195-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin (cherry picked from commit ca83d5840cb641b2efb04db0b70fa56955dd1453) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 1afb8b06e3e1..12b85b3278cd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -320,7 +320,7 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj) static int igt_ctx_exec(void *arg) { struct drm_i915_private *i915 = arg; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj = NULL; struct drm_file *file; IGT_TIMEOUT(end_time); LIST_HEAD(objects); @@ -359,7 +359,7 @@ static int igt_ctx_exec(void *arg) } for_each_engine(engine, i915, id) { - if (dw == 0) { + if (!obj) { obj = create_test_object(ctx, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -376,8 +376,10 @@ static int igt_ctx_exec(void *arg) goto out_unlock; } - if (++dw == max_dwords(obj)) + if (++dw == max_dwords(obj)) { + obj = NULL; dw = 0; + } ndwords++; } ncontexts++; From 9dfa7bba35ac08a63565d58c454dccb7e1bb0a08 Mon Sep 17 00:00:00 2001 From: Michael Schmitz Date: Sun, 30 Apr 2017 19:49:21 +1200 Subject: [PATCH 073/249] fix race in drivers/char/random.c:get_reg() get_reg() can be reentered on architectures with prioritized interrupts (m68k in this case), causing f->reg_index to be incremented after the range check. Out of bounds memory access past the pt_regs struct results. This will go mostly undetected unless access is beyond end of memory. Prevent the race by disabling interrupts in get_reg(). Tested on m68k (Atari Falcon, and ARAnyM emulator). Kudos to Geert Uytterhoeven for helping to trace this race. Signed-off-by: Michael Schmitz Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 0ab024918907..a561f0c2f428 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1097,12 +1097,16 @@ static void add_interrupt_bench(cycles_t start) static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) { __u32 *ptr = (__u32 *) regs; + unsigned long flags; if (regs == NULL) return 0; + local_irq_save(flags); if (f->reg_idx >= sizeof(struct pt_regs) / sizeof(__u32)) f->reg_idx = 0; - return *(ptr + f->reg_idx++); + ptr += f->reg_idx++; + local_irq_restore(flags); + return *ptr; } void add_interrupt_randomness(int irq, int irq_flags) From 4179bc30b2fe85f827d93e0ae7ae8f49ad3afc02 Mon Sep 17 00:00:00 2001 From: Kyungchan Koh Date: Wed, 24 May 2017 10:16:27 -0700 Subject: [PATCH 074/249] md: uuid debug statement now in processor byte order. Previously, the uuid debug statements were printed in little-endian format, which wasn't consistent in machines that might not be in little-endian byte order. With this change, the output will be consistent for all machines with different byte-ordering. Signed-off-by: Kyungchan Koh Signed-off-by: Shaohua Li --- drivers/md/bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index bf7419a56454..f4eace5ea184 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -485,10 +485,10 @@ void bitmap_print_sb(struct bitmap *bitmap) pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); pr_debug(" version: %d\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", - *(__u32 *)(sb->uuid+0), - *(__u32 *)(sb->uuid+4), - *(__u32 *)(sb->uuid+8), - *(__u32 *)(sb->uuid+12)); + le32_to_cpu(*(__u32 *)(sb->uuid+0)), + le32_to_cpu(*(__u32 *)(sb->uuid+4)), + le32_to_cpu(*(__u32 *)(sb->uuid+8)), + le32_to_cpu(*(__u32 *)(sb->uuid+12))); pr_debug(" events: %llu\n", (unsigned long long) le64_to_cpu(sb->events)); pr_debug("events cleared: %llu\n", From e153903686deac9ea8c499b49516126f1a844fdb Mon Sep 17 00:00:00 2001 From: Nix Date: Tue, 16 May 2017 10:13:31 +0100 Subject: [PATCH 075/249] md: report sector of stripes with check mismatches This makes it possible, with appropriate filesystem support, for a sysadmin to tell what is affected by the mismatch, and whether it should be ignored (if it's inside a swap partition, for instance). We ratelimit to prevent log flooding: if there are so many mismatches that ratelimiting is necessary, the individual messages are relatively unlikely to be important (either the machine is swapping like crazy or something is very wrong with the disk). Signed-off-by: Nick Alcock Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9c4f7659f8b1..722064689e82 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4085,10 +4085,15 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh, set_bit(STRIPE_INSYNC, &sh->state); else { atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { /* don't try to repair!! */ set_bit(STRIPE_INSYNC, &sh->state); - else { + pr_warn_ratelimited("%s: mismatch sector in range " + "%llu-%llu\n", mdname(conf->mddev), + (unsigned long long) sh->sector, + (unsigned long long) sh->sector + + STRIPE_SECTORS); + } else { sh->check_state = check_state_compute_run; set_bit(STRIPE_COMPUTE_RUN, &sh->state); set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); @@ -4237,10 +4242,15 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh, } } else { atomic64_add(STRIPE_SECTORS, &conf->mddev->resync_mismatches); - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) { /* don't try to repair!! */ set_bit(STRIPE_INSYNC, &sh->state); - else { + pr_warn_ratelimited("%s: mismatch sector in range " + "%llu-%llu\n", mdname(conf->mddev), + (unsigned long long) sh->sector, + (unsigned long long) sh->sector + + STRIPE_SECTORS); + } else { int *target = &sh->ops.target; sh->ops.target = -1; From 717902cc93118119a6fce7765da6cf2786987418 Mon Sep 17 00:00:00 2001 From: Timmy Li Date: Mon, 22 May 2017 16:48:28 +0100 Subject: [PATCH 076/249] ARM64: PCI: Fix struct acpi_pci_root_ops allocation failure path Commit 093d24a20442 ("arm64: PCI: Manage controller-specific data on per-controller basis") added code to allocate ACPI PCI root_ops dynamically on a per host bridge basis but failed to update the corresponding memory allocation failure path in pci_acpi_scan_root() leading to a potential memory leakage. Fix it by adding the required kfree call. Fixes: 093d24a20442 ("arm64: PCI: Manage controller-specific data on per-controller basis") Reviewed-by: Tomasz Nowicki Signed-off-by: Timmy Li [lorenzo.pieralisi@arm.com: refactored code, rewrote commit log] Signed-off-by: Lorenzo Pieralisi CC: Will Deacon CC: Bjorn Helgaas Signed-off-by: Catalin Marinas --- arch/arm64/kernel/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4f0e3ebfea4b..c7e3e6387a49 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node); - if (!root_ops) + if (!root_ops) { + kfree(ri); return NULL; + } ri->cfg = pci_acpi_setup_ecam_mapping(root); if (!ri->cfg) { From 9bd9590997b92fbd79fd028f704f6c584b4439d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 27 Apr 2017 19:02:22 +0300 Subject: [PATCH 077/249] drm/i915: Stop pretending to mask/unmask LPE audio interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vlv_display_irq_postinstall() enables the LPE audio interrupts regardless of whether the LPE audio irq chip has masked/unmasked them. Also the irqchip masking/unmasking doesn't consider the state of the display power well or the device, and hence just leads to dmesg spew when it tries to access the hardware while it's powered down. If the current way works, then we don't need to do anything in the mask/unmask hooks. If it doesn't work, well, then we'd need to properly track whether the irqchip has masked/unmasked the interrupts when we enable display interrupts. And the mask/unmask hooks would need to check whether display interrupts are even enabled before frobbing with he registers. So let's just assume the current way works and neuter the mask/unmask hooks. Also clean up vlv_display_irq_postinstall() a bit and stop it from trying to unmask/enable the LPE C interrupt on VLV since it doesn't exist. Cc: Takashi Iwai Cc: Pierre-Louis Bossart Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170427160231.13337-4-ville.syrjala@linux.intel.com Reviewed-by: Takashi Iwai (cherry picked from commit ebf5f921478b9b55ed4e634b994571dd23a8fca3) Reference: http://mid.mail-archive.com/874cf6d3-4e45-d4cf-e662-eb972490d2ce@redhat.com Tested-by: Hans de Goede Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 15 +++++------ drivers/gpu/drm/i915/intel_lpe_audio.c | 36 -------------------------- 2 files changed, 6 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index fd97fe00cd0d..190f6aa5d15e 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2953,7 +2953,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) u32 pipestat_mask; u32 enable_mask; enum pipe pipe; - u32 val; pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV | PIPE_CRC_DONE_INTERRUPT_STATUS; @@ -2964,18 +2963,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv) enable_mask = I915_DISPLAY_PORT_INTERRUPT | I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | + I915_LPE_PIPE_A_INTERRUPT | + I915_LPE_PIPE_B_INTERRUPT; + if (IS_CHERRYVIEW(dev_priv)) - enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT; + enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT | + I915_LPE_PIPE_C_INTERRUPT; WARN_ON(dev_priv->irq_mask != ~0); - val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT | - I915_LPE_PIPE_C_INTERRUPT); - - enable_mask |= val; - dev_priv->irq_mask = ~enable_mask; GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask); diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c index 668f00480d97..292fedf30b00 100644 --- a/drivers/gpu/drm/i915/intel_lpe_audio.c +++ b/drivers/gpu/drm/i915/intel_lpe_audio.c @@ -149,44 +149,10 @@ static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv) static void lpe_audio_irq_unmask(struct irq_data *d) { - struct drm_i915_private *dev_priv = d->chip_data; - unsigned long irqflags; - u32 val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT); - - if (IS_CHERRYVIEW(dev_priv)) - val |= I915_LPE_PIPE_C_INTERRUPT; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - - dev_priv->irq_mask &= ~val; - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IMR, dev_priv->irq_mask); - POSTING_READ(VLV_IMR); - - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } static void lpe_audio_irq_mask(struct irq_data *d) { - struct drm_i915_private *dev_priv = d->chip_data; - unsigned long irqflags; - u32 val = (I915_LPE_PIPE_A_INTERRUPT | - I915_LPE_PIPE_B_INTERRUPT); - - if (IS_CHERRYVIEW(dev_priv)) - val |= I915_LPE_PIPE_C_INTERRUPT; - - spin_lock_irqsave(&dev_priv->irq_lock, irqflags); - - dev_priv->irq_mask |= val; - I915_WRITE(VLV_IMR, dev_priv->irq_mask); - I915_WRITE(VLV_IIR, val); - I915_WRITE(VLV_IIR, val); - POSTING_READ(VLV_IIR); - - spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } static struct irq_chip lpe_audio_irqchip = { @@ -330,8 +296,6 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv) desc = irq_to_desc(dev_priv->lpe_audio.irq); - lpe_audio_irq_mask(&desc->irq_data); - lpe_audio_platdev_destroy(dev_priv); irq_free_desc(dev_priv->lpe_audio.irq); From 5acc1ca4fb15f00bfa3d4046e35ca381bc25d580 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sat, 20 May 2017 20:32:32 -0700 Subject: [PATCH 078/249] KVM: X86: Fix preempt the preemption timer cancel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preemption can occur during cancel preemption timer, and there will be inconsistent status in lapic, vmx and vmcs field. CPU0 CPU1 preemption timer vmexit handle_preemption_timer(vCPU0) kvm_lapic_expired_hv_timer vmx_cancel_hv_timer vmx->hv_deadline_tsc = -1 vmcs_clear_bits /* hv_timer_in_use still true */ sched_out sched_in kvm_arch_vcpu_load vmx_set_hv_timer write vmx->hv_deadline_tsc vmcs_set_bits /* back in kvm_lapic_expired_hv_timer */ hv_timer_in_use = false ... vmx_vcpu_run vmx_arm_hv_run write preemption timer deadline spurious preemption timer vmexit handle_preemption_timer(vCPU0) kvm_lapic_expired_hv_timer WARN_ON(!apic->lapic_timer.hv_timer_in_use); This can be reproduced sporadically during boot of L2 on a preemptible L1, causing a splat on L1. WARNING: CPU: 3 PID: 1952 at arch/x86/kvm/lapic.c:1529 kvm_lapic_expired_hv_timer+0xb5/0xd0 [kvm] CPU: 3 PID: 1952 Comm: qemu-system-x86 Not tainted 4.12.0-rc1+ #24 RIP: 0010:kvm_lapic_expired_hv_timer+0xb5/0xd0 [kvm] Call Trace: handle_preemption_timer+0xe/0x20 [kvm_intel] vmx_handle_exit+0xc9/0x15f0 [kvm_intel] ? lock_acquire+0xdb/0x250 ? lock_acquire+0xdb/0x250 ? kvm_arch_vcpu_ioctl_run+0xdf3/0x1ce0 [kvm] kvm_arch_vcpu_ioctl_run+0xe55/0x1ce0 [kvm] kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? __fget+0xf3/0x210 do_vfs_ioctl+0xa4/0x700 ? __fget+0x114/0x210 SyS_ioctl+0x79/0x90 do_syscall_64+0x8f/0x750 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL64_slow_path+0x25/0x25 This patch fixes it by disabling preemption while cancelling preemption timer. This way cancel_hv_timer is atomic with respect to kvm_arch_vcpu_load. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c329d2894905..6e6f345adfe6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1495,8 +1495,10 @@ EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { + preempt_disable(); kvm_x86_ops->cancel_hv_timer(apic->vcpu); apic->lapic_timer.hv_timer_in_use = false; + preempt_enable(); } static bool start_hv_timer(struct kvm_lapic *apic) From e1d39b17e044e8ae819827810d87d809ba5f58c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Sat, 20 May 2017 13:22:56 +0200 Subject: [PATCH 079/249] KVM: nVMX: Fix handling of lmsw instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The decision whether or not to exit from L2 to L1 on an lmsw instruction is based on bogus values: instead of using the information encoded within the exit qualification, it uses the data also used for the mov-to-cr instruction, which boils down to using whatever is in %eax at that point. Use the correct values instead. Without this fix, an L1 may not get notified when a 32-bit Linux L2 switches its secondary CPUs to protected mode; the L1 is only notified on the next modification of CR0. This short time window poses a problem, when there is some other reason to exit to L1 in between. Then, L2 will be resumed in real mode and chaos ensues. Signed-off-by: Jan H. Schönherr Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 72f78396bc09..880f371705bc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7913,11 +7913,13 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; - int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_readl(vcpu, reg); + int reg; + unsigned long val; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ + reg = (exit_qualification >> 8) & 15; + val = kvm_register_readl(vcpu, reg); switch (cr) { case 0: if (vmcs12->cr0_guest_host_mask & @@ -7972,6 +7974,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, * lmsw can change bits 1..3 of cr0, and only set bit 0 of * cr0. Other attempted changes are ignored, with no exit. */ + val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) return true; From 52b5419016997f2960e9c8b6584c4acb3875d126 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Sat, 20 May 2017 13:24:32 +0200 Subject: [PATCH 080/249] KVM: x86: Fix virtual wire mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel SDM says, that at most one LAPIC should be configured with ExtINT delivery. KVM configures all LAPICs this way. This causes pic_unlock() to kick the first available vCPU from the internal KVM data structures. If this vCPU is not the BSP, but some not-yet-booted AP, the BSP may never realize that there is an interrupt. Fix that by enabling ExtINT delivery only for the BSP. This allows booting a Linux guest without a TSC in the above situation. Otherwise the BSP gets stuck in calibrate_delay_converge(). Signed-off-by: Jan H. Schönherr Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6e6f345adfe6..d24c8742d9b0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1936,7 +1936,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) for (i = 0; i < KVM_APIC_LVT_NUM; i++) kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); apic_update_lvtt(apic); - if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) + if (kvm_vcpu_is_reset_bsp(vcpu) && + kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) kvm_lapic_set_reg(apic, APIC_LVT0, SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); From 5720acf4bfc142ba568d5b6782fceaf62ed15e0b Mon Sep 17 00:00:00 2001 From: Miroslav Benes Date: Fri, 26 May 2017 14:45:21 +0200 Subject: [PATCH 081/249] livepatch: Make livepatch dependent on !TRIM_UNUSED_KSYMS If TRIM_UNUSED_KSYMS is enabled, all unneeded exported symbols are made unexported. Two-pass build of the kernel is done to find out which symbols are needed based on a configuration. This effectively complicates things for out-of-tree modules. Livepatch exports functions to (un)register and enable/disable a live patch. The only in-tree module which uses these functions is a sample in samples/livepatch/. If the sample is disabled, the functions are trimmed and out-of-tree live patches cannot be built. Note that live patches are intended to be built out-of-tree. Suggested-by: Michal Marek Acked-by: Josh Poimboeuf Acked-by: Jessica Yu Signed-off-by: Miroslav Benes Signed-off-by: Jiri Kosina --- kernel/livepatch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index 045022557936..ec4565122e65 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -10,6 +10,7 @@ config LIVEPATCH depends on SYSFS depends on KALLSYMS_ALL depends on HAVE_LIVEPATCH + depends on !TRIM_UNUSED_KSYMS help Say Y here if you want to support kernel live patching. This option has no runtime impact until a kernel "patch" From ac20fa0a96c32ff40c8a127dfd58140f1df44a9b Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 2 May 2017 12:39:53 -0400 Subject: [PATCH 082/249] drm/msm: select PM_OPP Otherwise, if nothing else enabled selects it, dev_pm_opp_of_add_table() will return -ENOTSUPP. Fixes: e2af8b6 ("drm/msm: gpu: Use OPP tables if we can") Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index 5b8e23d051f2..0a31cd6d01ce 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -13,6 +13,7 @@ config DRM_MSM select QCOM_SCM select SND_SOC_HDMI_CODEC if SND_SOC select SYNC_FILE + select PM_OPP default y help DRM/KMS driver for MSM/snapdragon. From 786813c343cb619d23cb0990e152e350b826d810 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 3 May 2017 10:04:48 -0400 Subject: [PATCH 083/249] drm/msm/mdp5: use __drm_atomic_helper_plane_duplicate_state() Somehow the helper was never retrofitted for mdp5. Which meant when plane_state->fence was added, it could get copied into new state in mdp5_plane_duplicate_state(). If an update to disable the plane (for example on rmfb) managed to sneak in after an nonblock update had swapped state, but before it was committed, we'd get a splat: WARNING: CPU: 1 PID: 69 at ../drivers/gpu/drm/drm_atomic_helper.c:1061 drm_atomic_helper_wait_for_fences+0xe0/0xf8 Modules linked in: CPU: 1 PID: 69 Comm: kworker/1:1 Tainted: G W 4.11.0-rc8+ #1187 Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) Workqueue: events drm_mode_rmfb_work_fn task: ffffffc036560d00 task.stack: ffffffc036550000 PC is at drm_atomic_helper_wait_for_fences+0xe0/0xf8 LR is at complete_commit.isra.1+0x44/0x1c0 pc : [] lr : [] pstate: 20000145 sp : ffffffc036553b60 x29: ffffffc036553b60 x28: ffffffc0264e6a00 x27: ffffffc035659000 x26: 0000000000000000 x25: ffffffc0240e8000 x24: 0000000000000038 x23: 0000000000000000 x22: ffffff800858f200 x21: ffffffc0240e8000 x20: ffffffc02f56a800 x19: 0000000000000000 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: ffffffc00a192700 x13: 0000000000000004 x12: 0000000000000000 x11: ffffff80089a1690 x10: 00000000000008f0 x9 : ffffffc036553b20 x8 : ffffffc036561650 x7 : ffffffc03fe6cb40 x6 : 0000000000000000 x5 : 0000000000000001 x4 : 0000000000000002 x3 : ffffffc035659000 x2 : ffffffc0240e8c80 x1 : 0000000000000000 x0 : ffffffc02adbe588 ---[ end trace 13aeec77c3fb55e2 ]--- Call trace: Exception stack(0xffffffc036553990 to 0xffffffc036553ac0) 3980: 0000000000000000 0000008000000000 39a0: ffffffc036553b60 ffffff80084f6040 0000000000004ff0 0000000000000038 39c0: ffffffc0365539d0 ffffff800857e098 ffffffc036553a00 ffffff800857e1b0 39e0: ffffffc036553a10 ffffff800857c554 ffffffc0365e8400 ffffffc0365e8400 3a00: ffffffc036553a20 ffffff8008103358 000000000001aad7 ffffff800851b72c 3a20: ffffffc036553a50 ffffff80080e9228 ffffffc02adbe588 0000000000000000 3a40: ffffffc0240e8c80 ffffffc035659000 0000000000000002 0000000000000001 3a60: 0000000000000000 ffffffc03fe6cb40 ffffffc036561650 ffffffc036553b20 3a80: 00000000000008f0 ffffff80089a1690 0000000000000000 0000000000000004 3aa0: ffffffc00a192700 0000000000000000 0000000000000000 0000000000000000 [] drm_atomic_helper_wait_for_fences+0xe0/0xf8 [] complete_commit.isra.1+0x44/0x1c0 [] msm_atomic_commit+0x32c/0x350 [] drm_atomic_commit+0x50/0x60 [] drm_atomic_remove_fb+0x158/0x250 [] drm_framebuffer_remove+0x50/0x158 [] drm_mode_rmfb_work_fn+0x40/0x58 [] process_one_work+0x1d0/0x378 [] worker_thread+0x244/0x488 [] kthread+0xfc/0x128 [] ret_from_fork+0x10/0x50 Fixes: 9626014 ("drm/fence: add in-fences support") Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter Reported-by: Stanimir Varbanov Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index a38c5fe6cc19..30b4691f7b0d 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -225,9 +225,10 @@ mdp5_plane_duplicate_state(struct drm_plane *plane) mdp5_state = kmemdup(to_mdp5_plane_state(plane->state), sizeof(*mdp5_state), GFP_KERNEL); + if (!mdp5_state) + return NULL; - if (mdp5_state && mdp5_state->base.fb) - drm_framebuffer_reference(mdp5_state->base.fb); + __drm_atomic_helper_plane_duplicate_state(plane, &mdp5_state->base); return &mdp5_state->base; } From 134ccada7ac59156761ce05afd1c0b1d02ebd928 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 3 May 2017 10:43:14 -0400 Subject: [PATCH 084/249] drm/msm/gpu: check legacy clk names in get_clocks() Otherwise if someone was using old bindings with "core_clk" instead of "core" as the clock name, we'd never find it and gpu would be stuck at 27MHz (or whatever it's slowest rate is). Fixes: 98db803 ("msm/drm: gpu: Dynamically locate the clocks from the device tree") Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 97b9c38c6b3f..0fdc88d79ca8 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -549,9 +549,9 @@ static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) gpu->grp_clks[i] = get_clock(dev, name); /* Remember the key clocks that we need to control later */ - if (!strcmp(name, "core")) + if (!strcmp(name, "core") || !strcmp(name, "core_clk")) gpu->core_clk = gpu->grp_clks[i]; - else if (!strcmp(name, "rbbmtimer")) + else if (!strcmp(name, "rbbmtimer") || !strcmp(name, "rbbmtimer_clk")) gpu->rbbmtimer_clk = gpu->grp_clks[i]; ++i; From 43523eba79bda8f5b4c27f8ffe20ea078d20113a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Apr 2017 12:11:58 -0700 Subject: [PATCH 085/249] drm/msm: Expose our reservation object when exporting a dmabuf. Without this, polling on the dma-buf (and presumably other devices synchronizing against our rendering) would return immediately, even while the BO was busy. Signed-off-by: Eric Anholt Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Cc: Rob Clark Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Reviewed-by: Rob Clark Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 1 + drivers/gpu/drm/msm/msm_drv.h | 1 + drivers/gpu/drm/msm/msm_gem_prime.c | 7 +++++++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 87b5695d4034..9d498eb81906 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -830,6 +830,7 @@ static struct drm_driver msm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = drm_gem_prime_export, .gem_prime_import = drm_gem_prime_import, + .gem_prime_res_obj = msm_gem_prime_res_obj, .gem_prime_pin = msm_gem_prime_pin, .gem_prime_unpin = msm_gem_prime_unpin, .gem_prime_get_sg_table = msm_gem_prime_get_sg_table, diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 28b6f9ba5066..1b26ca626528 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -224,6 +224,7 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); void *msm_gem_prime_vmap(struct drm_gem_object *obj); void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +struct reservation_object *msm_gem_prime_res_obj(struct drm_gem_object *obj); struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c index 60bb290700ce..13403c6da6c7 100644 --- a/drivers/gpu/drm/msm/msm_gem_prime.c +++ b/drivers/gpu/drm/msm/msm_gem_prime.c @@ -70,3 +70,10 @@ void msm_gem_prime_unpin(struct drm_gem_object *obj) if (!obj->import_attach) msm_gem_put_pages(obj); } + +struct reservation_object *msm_gem_prime_res_obj(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + return msm_obj->resv; +} From 3c30cc41a87880db86c343b8c9cc8bc2d3d36055 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Apr 2017 12:12:00 -0700 Subject: [PATCH 086/249] drm/msm: Reuse dma_fence_release. If we follow the typical pattern of the base class being the first member, we can use the default dma_fence_free function. Signed-off-by: Eric Anholt Cc: Rob Clark Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Reviewed-by: Rob Clark Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_fence.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index 3f299c537b77..a2f89bac9c16 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -99,8 +99,8 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) } struct msm_fence { - struct msm_fence_context *fctx; struct dma_fence base; + struct msm_fence_context *fctx; }; static inline struct msm_fence *to_msm_fence(struct dma_fence *fence) @@ -130,19 +130,13 @@ static bool msm_fence_signaled(struct dma_fence *fence) return fence_completed(f->fctx, f->base.seqno); } -static void msm_fence_release(struct dma_fence *fence) -{ - struct msm_fence *f = to_msm_fence(fence); - kfree_rcu(f, base.rcu); -} - static const struct dma_fence_ops msm_fence_ops = { .get_driver_name = msm_fence_get_driver_name, .get_timeline_name = msm_fence_get_timeline_name, .enable_signaling = msm_fence_enable_signaling, .signaled = msm_fence_signaled, .wait = dma_fence_default_wait, - .release = msm_fence_release, + .release = dma_fence_free, }; struct dma_fence * From adcbae310f9ed84ef139e3fa6d7a7743d96c44c8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 21 May 2017 12:05:07 -0400 Subject: [PATCH 087/249] drm/msm/mdp5: release hwpipe(s) for unused planes Otherwise, if userspace doesn't re-use a given plane, it's hwpipe(s) could stay permanently assigned. Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c index 30b4691f7b0d..7d3741215387 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c @@ -445,6 +445,10 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state, mdp5_pipe_release(state->state, old_hwpipe); mdp5_pipe_release(state->state, old_right_hwpipe); } + } else { + mdp5_pipe_release(state->state, mdp5_state->hwpipe); + mdp5_pipe_release(state->state, mdp5_state->r_hwpipe); + mdp5_state->hwpipe = mdp5_state->r_hwpipe = NULL; } return 0; From c43dd227f411359802427da7aebe5da5b0e48e0e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 24 May 2017 18:12:19 +0200 Subject: [PATCH 088/249] drm/msm: constify irq_domain_ops struct irq_domain_ops is not modified, so it can be made const. Signed-off-by: Tobias Klauser Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c index f8f48d014978..9c34d7824988 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c @@ -116,7 +116,7 @@ static int mdss_hw_irqdomain_map(struct irq_domain *d, unsigned int irq, return 0; } -static struct irq_domain_ops mdss_hw_irqdomain_ops = { +static const struct irq_domain_ops mdss_hw_irqdomain_ops = { .map = mdss_hw_irqdomain_map, .xlate = irq_domain_xlate_onecell, }; From 3cfac69cbd56e1a57a70bfbdb07560b788030404 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 17 Mar 2017 19:38:40 +0100 Subject: [PATCH 089/249] drm/msm: for array in-fences, check if all backing fences are from our own context before waiting Use the dma_fence_match_context helper to check if all backing fences are from our own context, in which case we don't have to wait. Signed-off-by: Philipp Zabel Cc: Rob Clark Cc: Gustavo Padovan [rebased on code-motion] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 1c545ebe6a5a..8ac4ca443914 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -410,12 +410,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (!in_fence) return -EINVAL; - /* TODO if we get an array-fence due to userspace merging multiple - * fences, we need a way to determine if all the backing fences - * are from our own context.. + /* + * Wait if the fence is from a foreign context, or if the fence + * array contains any fence from a foreign context. */ - - if (in_fence->context != gpu->fctx->context) { + if (!dma_fence_match_context(in_fence, gpu->fctx->context)) { ret = dma_fence_wait(in_fence, true); if (ret) return ret; From 90dd57de4a043f642179b1323a31ca3ced826611 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:34:57 -0600 Subject: [PATCH 090/249] drm/msm: Take the mutex before calling msm_gem_new_impl Amongst its other duties, msm_gem_new_impl adds the newly created GEM object to the shared inactive list which may also be actively modifiying the list during submission. All the paths to modify the list are protected by the mutex except for the one through msm_gem_import which can end up causing list corruption. Signed-off-by: Jordan Crouse [add extra WARN_ON(!mutex_is_locked(&dev->struct_mutex))] Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 68e509b3b9e4..50289a23baf8 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -758,6 +758,8 @@ static int msm_gem_new_impl(struct drm_device *dev, struct msm_gem_object *msm_obj; bool use_vram = false; + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + switch (flags & MSM_BO_CACHE_MASK) { case MSM_BO_UNCACHED: case MSM_BO_CACHED: @@ -853,7 +855,11 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, size = PAGE_ALIGN(dmabuf->size); + /* Take mutex so we can modify the inactive list in msm_gem_new_impl */ + mutex_lock(&dev->struct_mutex); ret = msm_gem_new_impl(dev, size, MSM_BO_WC, dmabuf->resv, &obj); + mutex_unlock(&dev->struct_mutex); + if (ret) goto fail; From d72fea538fe6d783c1e63a2fc304019abf4be93a Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Mon, 8 May 2017 14:34:58 -0600 Subject: [PATCH 091/249] drm/msm: Fix the check for the command size The overrun check for the size of submitted commands is off by one. It should allow the offset plus the size to be equal to the size of the memory object when the command stream is very tightly constructed. Signed-off-by: Jordan Crouse Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 8ac4ca443914..7832e6421d25 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -495,8 +495,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out; } - if ((submit_cmd.size + submit_cmd.submit_offset) >= - msm_obj->base.size) { + if (!submit_cmd.size || + ((submit_cmd.size + submit_cmd.submit_offset) > + msm_obj->base.size)) { DRM_ERROR("invalid cmdstream size: %u\n", submit_cmd.size); ret = -EINVAL; goto out; From 1ea34adb87c969b89dfd83f1905a79161e9ada26 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 26 May 2017 12:36:47 +0100 Subject: [PATCH 092/249] efi: Don't issue error message when booted under Xen When booted as Xen dom0 there won't be an EFI memmap allocated. Avoid issuing an error message in this case: [ 0.144079] efi: Failed to allocate new EFI memmap Signed-off-by: Juergen Gross Signed-off-by: Matt Fleming Cc: # v4.9+ Cc: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170526113652.21339-2-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 26615991d69c..e0cf95a83f3f 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -360,6 +360,9 @@ void __init efi_free_boot_services(void) free_bootmem_late(start, size); } + if (!num_entries) + return; + new_size = efi.memmap.desc_size * num_entries; new_phys = efi_memmap_alloc(num_entries); if (!new_phys) { From 684e3f965d0be8c26fedefe94f637374242aba08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 May 2017 12:36:48 +0100 Subject: [PATCH 093/249] efi: Remove duplicate 'const' specifiers gcc-7 shows these harmless warnings: drivers/firmware/efi/libstub/secureboot.c:19:27: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] static const efi_char16_t const efi_SecureBoot_name[] = { drivers/firmware/efi/libstub/secureboot.c:22:27: error: duplicate 'const' declaration specifier [-Werror=duplicate-decl-specifier] Removing one of the specifiers gives us the expected behavior. Signed-off-by: Arnd Bergmann Signed-off-by: Matt Fleming Reviewed-by: David Howells Acked-by: Ard Biesheuvel Cc: Josh Boyer Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: de8cb458625c ("efi: Get and store the secure boot status") Link: http://lkml.kernel.org/r/20170526113652.21339-3-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/secureboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 8c34d50a4d80..959777ec8a77 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -16,10 +16,10 @@ /* BIOS variables */ static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; -static const efi_char16_t const efi_SecureBoot_name[] = { +static const efi_char16_t efi_SecureBoot_name[] = { 'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 }; -static const efi_char16_t const efi_SetupMode_name[] = { +static const efi_char16_t efi_SetupMode_name[] = { 'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0 }; From 4e52797d2efefac3271abdc54439a3435abd77b9 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Fri, 26 May 2017 12:36:49 +0100 Subject: [PATCH 094/249] x86/efi: Disable runtime services on kexec kernel if booted with efi=old_map Booting kexec kernel with "efi=old_map" in kernel command line hits kernel panic as shown below. BUG: unable to handle kernel paging request at ffff88007fe78070 IP: virt_efi_set_variable.part.7+0x63/0x1b0 PGD 7ea28067 PUD 7ea2b067 PMD 7ea2d067 PTE 0 [...] Call Trace: virt_efi_set_variable() efi_delete_dummy_variable() efi_enter_virtual_mode() start_kernel() x86_64_start_reservations() x86_64_start_kernel() start_cpu() [ efi=old_map was never intended to work with kexec. The problem with using efi=old_map is that the virtual addresses are assigned from the memory region used by other kernel mappings; vmalloc() space. Potentially there could be collisions when booting kexec if something else is mapped at the virtual address we allocated for runtime service regions in the initial boot - Matt Fleming ] Since kexec was never intended to work with efi=old_map, disable runtime services in kexec if booted with efi=old_map, so that we don't panic. Tested-by: Lee Chun-Yi Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Acked-by: Dave Young Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170526113652.21339-4-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 7e76a4d8304b..43b96f5f78ba 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -828,9 +828,11 @@ static void __init kexec_enter_virtual_mode(void) /* * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI + * non-native EFI. With efi=old_map, we don't do runtime services in + * kexec kernel because in the initial boot something else might + * have been mapped at these virtual addresses. */ - if (!efi_is_native()) { + if (!efi_is_native() || efi_enabled(EFI_OLD_MEMMAP)) { efi_memmap_unmap(); clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); return; From 94133e46a0f5ca3f138479806104ab4a8cb0455e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 26 May 2017 12:36:50 +0100 Subject: [PATCH 095/249] x86/efi: Correct EFI identity mapping under 'efi=old_map' when KASLR is enabled For EFI with the 'efi=old_map' kernel option specified, the kernel will panic when KASLR is enabled: BUG: unable to handle kernel paging request at 000000007febd57e IP: 0x7febd57e PGD 1025a067 PUD 0 Oops: 0010 [#1] SMP Call Trace: efi_enter_virtual_mode() start_kernel() x86_64_start_reservations() x86_64_start_kernel() start_cpu() The root cause is that the identity mapping is not built correctly in the 'efi=old_map' case. On 'nokaslr' kernels, PAGE_OFFSET is 0xffff880000000000 which is PGDIR_SIZE aligned. We can borrow the PUD table from the direct mappings safely. Given a physical address X, we have pud_index(X) == pud_index(__va(X)). However, on KASLR kernels, PAGE_OFFSET is PUD_SIZE aligned. For a given physical address X, pud_index(X) != pud_index(__va(X)). We can't just copy the PGD entry from direct mapping to build identity mapping, instead we need to copy the PUD entries one by one from the direct mapping. Fix it. Signed-off-by: Baoquan He Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Bhupesh Sharma Cc: Borislav Petkov Cc: Dave Young Cc: Frank Ramsay Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Garnier Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170526113652.21339-5-matt@codeblueprint.co.uk [ Fixed and reworded the changelog and code comments to be more readable. ] Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 79 ++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c488625c9712..eb8dff15a7f6 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -71,11 +71,13 @@ static void __init early_code_mapping_set_exec(int executable) pgd_t * __init efi_call_phys_prolog(void) { - unsigned long vaddress; - pgd_t *save_pgd; + unsigned long vaddr, addr_pgd, addr_p4d, addr_pud; + pgd_t *save_pgd, *pgd_k, *pgd_efi; + p4d_t *p4d, *p4d_k, *p4d_efi; + pud_t *pud; int pgd; - int n_pgds; + int n_pgds, i, j; if (!efi_enabled(EFI_OLD_MEMMAP)) { save_pgd = (pgd_t *)read_cr3(); @@ -88,10 +90,49 @@ pgd_t * __init efi_call_phys_prolog(void) n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE); save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL); + /* + * Build 1:1 identity mapping for efi=old_map usage. Note that + * PAGE_OFFSET is PGDIR_SIZE aligned when KASLR is disabled, while + * it is PUD_SIZE ALIGNED with KASLR enabled. So for a given physical + * address X, the pud_index(X) != pud_index(__va(X)), we can only copy + * PUD entry of __va(X) to fill in pud entry of X to build 1:1 mapping. + * This means here we can only reuse the PMD tables of the direct mapping. + */ for (pgd = 0; pgd < n_pgds; pgd++) { - save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE); - vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); - set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + addr_pgd = (unsigned long)(pgd * PGDIR_SIZE); + vaddr = (unsigned long)__va(pgd * PGDIR_SIZE); + pgd_efi = pgd_offset_k(addr_pgd); + save_pgd[pgd] = *pgd_efi; + + p4d = p4d_alloc(&init_mm, pgd_efi, addr_pgd); + if (!p4d) { + pr_err("Failed to allocate p4d table!\n"); + goto out; + } + + for (i = 0; i < PTRS_PER_P4D; i++) { + addr_p4d = addr_pgd + i * P4D_SIZE; + p4d_efi = p4d + p4d_index(addr_p4d); + + pud = pud_alloc(&init_mm, p4d_efi, addr_p4d); + if (!pud) { + pr_err("Failed to allocate pud table!\n"); + goto out; + } + + for (j = 0; j < PTRS_PER_PUD; j++) { + addr_pud = addr_p4d + j * PUD_SIZE; + + if (addr_pud > (max_pfn << PAGE_SHIFT)) + break; + + vaddr = (unsigned long)__va(addr_pud); + + pgd_k = pgd_offset_k(vaddr); + p4d_k = p4d_offset(pgd_k, vaddr); + pud[j] = *pud_offset(p4d_k, vaddr); + } + } } out: __flush_tlb_all(); @@ -104,8 +145,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) /* * After the lock is released, the original page table is restored. */ - int pgd_idx; + int pgd_idx, i; int nr_pgds; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; if (!efi_enabled(EFI_OLD_MEMMAP)) { write_cr3((unsigned long)save_pgd); @@ -115,9 +159,28 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); - for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) + for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++) { + pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); + if (!(pgd_val(*pgd) & _PAGE_PRESENT)) + continue; + + for (i = 0; i < PTRS_PER_P4D; i++) { + p4d = p4d_offset(pgd, + pgd_idx * PGDIR_SIZE + i * P4D_SIZE); + + if (!(p4d_val(*p4d) & _PAGE_PRESENT)) + continue; + + pud = (pud_t *)p4d_page_vaddr(*p4d); + pud_free(&init_mm, pud); + } + + p4d = (p4d_t *)pgd_page_vaddr(*pgd); + p4d_free(&init_mm, p4d); + } + kfree(save_pgd); __flush_tlb_all(); From 7425826f4f7ac60f2538b06a7f0a5d1006405159 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Fri, 26 May 2017 12:36:51 +0100 Subject: [PATCH 096/249] efi/bgrt: Skip efi_bgrt_init() in case of non-EFI boot Sabrina Dubroca reported an early panic: BUG: unable to handle kernel paging request at ffffffffff240001 IP: efi_bgrt_init+0xdc/0x134 [...] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ... which was introduced by: 7b0a911478c7 ("efi/x86: Move the EFI BGRT init code to early init code") The cause is that on this machine the firmware provides the EFI ACPI BGRT table even on legacy non-EFI bootups - which table should be EFI only. The garbage BGRT data causes the efi_bgrt_init() panic. Add a check to skip efi_bgrt_init() in case non-EFI bootup to work around this firmware bug. Tested-by: Sabrina Dubroca Signed-off-by: Dave Young Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: # v4.11+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 7b0a911478c7 ("efi/x86: Move the EFI BGRT init code to early init code") Link: http://lkml.kernel.org/r/20170526113652.21339-6-matt@codeblueprint.co.uk [ Rewrote the changelog to be more readable. ] Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi-bgrt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/efi-bgrt.c b/drivers/firmware/efi/efi-bgrt.c index 04ca8764f0c0..8bf27323f7a3 100644 --- a/drivers/firmware/efi/efi-bgrt.c +++ b/drivers/firmware/efi/efi-bgrt.c @@ -36,6 +36,9 @@ void __init efi_bgrt_init(struct acpi_table_header *table) if (acpi_disabled) return; + if (!efi_enabled(EFI_BOOT)) + return; + if (table->length < sizeof(bgrt_tab)) { pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n", table->length, sizeof(bgrt_tab)); From fe3b81b446d4ecb954f1b9dd191164a78fd278ad Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 4 Apr 2017 17:08:42 -0700 Subject: [PATCH 097/249] NFS: Use ERR_CAST() to avoid cross-structure cast When the call to nfs_devname() fails, the error path attempts to retain the error via the mnt variable, but this requires a cast across very different types (char * to struct vfsmount *), which the upcoming structure layout randomization plugin flags as being potentially dangerous in the face of randomization. This is a false positive, but what this code actually wants to do is retain the error value, so this patch explicitly sets it, instead of using what seems to be an unexpected cast. Signed-off-by: Kees Cook Acked-by: Trond Myklebust Reviewed-by: Christoph Hellwig --- fs/nfs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1a224a33a6c2..e5686be67be8 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -246,7 +246,7 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, devname = nfs_devname(dentry, page, PAGE_SIZE); if (IS_ERR(devname)) - mnt = (struct vfsmount *)devname; + mnt = ERR_CAST(devname); else mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata); From fee2aa753823860f2b8dfe58d98cafe8e4840855 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 May 2017 14:45:26 -0700 Subject: [PATCH 098/249] ntfs: Use ERR_CAST() to avoid cross-structure cast When trying to propagate an error result, the error return path attempts to retain the error, but does this with an open cast across very different types, which the upcoming structure layout randomization plugin flags as being potentially dangerous in the face of randomization. This is a false positive, but what this code actually wants to do is use ERR_CAST() to retain the error value. Cc: Anton Altaparmakov Cc: Andrew Morton Signed-off-by: Kees Cook --- fs/ntfs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 358258364616..4690cd75d8d7 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -159,7 +159,7 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, PTR_ERR(dent_inode)); kfree(name); /* Return the error code. */ - return (struct dentry *)dent_inode; + return ERR_CAST(dent_inode); } /* It is guaranteed that @name is no longer allocated at this point. */ if (MREF_ERR(mref) == -ENOENT) { From 7585d12f6555fdf4faaefec34ac58b28555b27d0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 May 2017 14:49:27 -0700 Subject: [PATCH 099/249] ocfs2: Use ERR_CAST() to avoid cross-structure cast When trying to propagate an error result, the error return path attempts to retain the error, but does this with an open cast across very different types, which the upcoming structure layout randomization plugin flags as being potentially dangerous in the face of randomization. This is a false positive, but what this code actually wants to do is use ERR_CAST() to retain the error value. Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Kees Cook --- fs/ocfs2/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 827fc9809bc2..9f88188060db 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -119,7 +119,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, if (IS_ERR(inode)) { mlog_errno(PTR_ERR(inode)); - result = (void *)inode; + result = ERR_CAST(inode); goto bail; } From 234041dfe5ca83d5c8122ec1999eaf3f00335d7b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 4 Apr 2017 22:07:10 -0700 Subject: [PATCH 100/249] sgi-xp: Use designated initializers Prepare to mark sensitive kernel structures for randomization by making sure they're using designated initializers. In this case, no initializers are needed (they can be NULL initialized and callers adjusted to check for NULL, which is more efficient than an indirect call). Cc: Robin Holt Signed-off-by: Kees Cook Reviewed-by: Christoph Hellwig --- drivers/misc/sgi-xp/xp.h | 12 +++++++++++- drivers/misc/sgi-xp/xp_main.c | 36 +++++++---------------------------- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index c862cd4583cc..b8069eec18cb 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -309,6 +309,9 @@ static inline enum xp_retval xpc_send(short partid, int ch_number, u32 flags, void *payload, u16 payload_size) { + if (!xpc_interface.send) + return xpNotLoaded; + return xpc_interface.send(partid, ch_number, flags, payload, payload_size); } @@ -317,6 +320,9 @@ static inline enum xp_retval xpc_send_notify(short partid, int ch_number, u32 flags, void *payload, u16 payload_size, xpc_notify_func func, void *key) { + if (!xpc_interface.send_notify) + return xpNotLoaded; + return xpc_interface.send_notify(partid, ch_number, flags, payload, payload_size, func, key); } @@ -324,12 +330,16 @@ xpc_send_notify(short partid, int ch_number, u32 flags, void *payload, static inline void xpc_received(short partid, int ch_number, void *payload) { - return xpc_interface.received(partid, ch_number, payload); + if (xpc_interface.received) + xpc_interface.received(partid, ch_number, payload); } static inline enum xp_retval xpc_partid_to_nasids(short partid, void *nasids) { + if (!xpc_interface.partid_to_nasids) + return xpNotLoaded; + return xpc_interface.partid_to_nasids(partid, nasids); } diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c index 01be66d02ca8..6d7f557fd1c1 100644 --- a/drivers/misc/sgi-xp/xp_main.c +++ b/drivers/misc/sgi-xp/xp_main.c @@ -69,23 +69,9 @@ struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS]; EXPORT_SYMBOL_GPL(xpc_registrations); /* - * Initialize the XPC interface to indicate that XPC isn't loaded. + * Initialize the XPC interface to NULL to indicate that XPC isn't loaded. */ -static enum xp_retval -xpc_notloaded(void) -{ - return xpNotLoaded; -} - -struct xpc_interface xpc_interface = { - (void (*)(int))xpc_notloaded, - (void (*)(int))xpc_notloaded, - (enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded, - (enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func, - void *))xpc_notloaded, - (void (*)(short, int, void *))xpc_notloaded, - (enum xp_retval(*)(short, void *))xpc_notloaded -}; +struct xpc_interface xpc_interface = { }; EXPORT_SYMBOL_GPL(xpc_interface); /* @@ -115,17 +101,7 @@ EXPORT_SYMBOL_GPL(xpc_set_interface); void xpc_clear_interface(void) { - xpc_interface.connect = (void (*)(int))xpc_notloaded; - xpc_interface.disconnect = (void (*)(int))xpc_notloaded; - xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16)) - xpc_notloaded; - xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *, - u16, xpc_notify_func, - void *))xpc_notloaded; - xpc_interface.received = (void (*)(short, int, void *)) - xpc_notloaded; - xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *)) - xpc_notloaded; + memset(&xpc_interface, 0, sizeof(xpc_interface)); } EXPORT_SYMBOL_GPL(xpc_clear_interface); @@ -188,7 +164,8 @@ xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size, mutex_unlock(®istration->mutex); - xpc_interface.connect(ch_number); + if (xpc_interface.connect) + xpc_interface.connect(ch_number); return xpSuccess; } @@ -237,7 +214,8 @@ xpc_disconnect(int ch_number) registration->assigned_limit = 0; registration->idle_limit = 0; - xpc_interface.disconnect(ch_number); + if (xpc_interface.disconnect) + xpc_interface.disconnect(ch_number); mutex_unlock(®istration->mutex); From 2a9d6d26e2b76eee1064d221b49f3ec527546c53 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 6 May 2017 00:54:07 -0700 Subject: [PATCH 101/249] drm/amdgpu: Use designated initializers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The randstruct plugin requires structures that are entirely function pointers be initialized using designated initializers. Cc: Alex Deucher Cc: Christian König Signed-off-by: Kees Cook --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index a4831fe0223b..a2c59a08b2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -220,9 +220,9 @@ static void amdgpu_vram_mgr_debug(struct ttm_mem_type_manager *man, } const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func = { - amdgpu_vram_mgr_init, - amdgpu_vram_mgr_fini, - amdgpu_vram_mgr_new, - amdgpu_vram_mgr_del, - amdgpu_vram_mgr_debug + .init = amdgpu_vram_mgr_init, + .takedown = amdgpu_vram_mgr_fini, + .get_node = amdgpu_vram_mgr_new, + .put_node = amdgpu_vram_mgr_del, + .debug = amdgpu_vram_mgr_debug }; From 3ddd396f6b57cbd5cb034498b5c4cd3dd920cf15 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 6 May 2017 01:09:00 -0700 Subject: [PATCH 102/249] drm/amd/powerplay: Use designated initializers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The randstruct plugin requires designated initializers for structures that are entirely function pointers. Cc: Christian König Cc: Eric Huang Cc: Alex Deucher Signed-off-by: Kees Cook --- .../drm/amd/powerplay/hwmgr/vega10_thermal.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c index d5f53d04fa08..83e40fe51b62 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_thermal.c @@ -709,17 +709,17 @@ static int tf_vega10_thermal_disable_alert(struct pp_hwmgr *hwmgr, static struct phm_master_table_item vega10_thermal_start_thermal_controller_master_list[] = { - {NULL, tf_vega10_thermal_initialize}, - {NULL, tf_vega10_thermal_set_temperature_range}, - {NULL, tf_vega10_thermal_enable_alert}, + { .tableFunction = tf_vega10_thermal_initialize }, + { .tableFunction = tf_vega10_thermal_set_temperature_range }, + { .tableFunction = tf_vega10_thermal_enable_alert }, /* We should restrict performance levels to low before we halt the SMC. * On the other hand we are still in boot state when we do this * so it would be pointless. * If this assumption changes we have to revisit this table. */ - {NULL, tf_vega10_thermal_setup_fan_table}, - {NULL, tf_vega10_thermal_start_smc_fan_control}, - {NULL, NULL} + { .tableFunction = tf_vega10_thermal_setup_fan_table }, + { .tableFunction = tf_vega10_thermal_start_smc_fan_control }, + { } }; static struct phm_master_table_header @@ -731,10 +731,10 @@ vega10_thermal_start_thermal_controller_master = { static struct phm_master_table_item vega10_thermal_set_temperature_range_master_list[] = { - {NULL, tf_vega10_thermal_disable_alert}, - {NULL, tf_vega10_thermal_set_temperature_range}, - {NULL, tf_vega10_thermal_enable_alert}, - {NULL, NULL} + { .tableFunction = tf_vega10_thermal_disable_alert }, + { .tableFunction = tf_vega10_thermal_set_temperature_range }, + { .tableFunction = tf_vega10_thermal_enable_alert }, + { } }; struct phm_master_table_header From 243dd05d39aa14fac2ffde75cc66dee3270896f8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 6 May 2017 01:10:06 -0700 Subject: [PATCH 103/249] mtk-vcodec: Use designated initializers The randstruct plugin requires designated initializers for structures that are entirely function pointers. Cc: Wu-Cheng Li Cc: Tiffany Lin Cc: Hans Verkuil Cc: Mauro Carvalho Chehab Signed-off-by: Kees Cook --- drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c | 8 ++++---- drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c | 8 ++++---- drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c index 57a842ff3097..b7731b18ecae 100644 --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_h264_if.c @@ -493,10 +493,10 @@ static int vdec_h264_get_param(unsigned long h_vdec, } static struct vdec_common_if vdec_h264_if = { - vdec_h264_init, - vdec_h264_decode, - vdec_h264_get_param, - vdec_h264_deinit, + .init = vdec_h264_init, + .decode = vdec_h264_decode, + .get_param = vdec_h264_get_param, + .deinit = vdec_h264_deinit, }; struct vdec_common_if *get_h264_dec_comm_if(void); diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c index 6e7a62ae0842..b9fad6a48879 100644 --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp8_if.c @@ -620,10 +620,10 @@ static void vdec_vp8_deinit(unsigned long h_vdec) } static struct vdec_common_if vdec_vp8_if = { - vdec_vp8_init, - vdec_vp8_decode, - vdec_vp8_get_param, - vdec_vp8_deinit, + .init = vdec_vp8_init, + .decode = vdec_vp8_decode, + .get_param = vdec_vp8_get_param, + .deinit = vdec_vp8_deinit, }; struct vdec_common_if *get_vp8_dec_comm_if(void); diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c index 5539b1853f16..1daee1207469 100644 --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c @@ -979,10 +979,10 @@ static int vdec_vp9_get_param(unsigned long h_vdec, } static struct vdec_common_if vdec_vp9_if = { - vdec_vp9_init, - vdec_vp9_decode, - vdec_vp9_get_param, - vdec_vp9_deinit, + .init = vdec_vp9_init, + .decode = vdec_vp9_decode, + .get_param = vdec_vp9_get_param, + .deinit = vdec_vp9_deinit, }; struct vdec_common_if *get_vp9_dec_comm_if(void); From 4013ef48798ae954530bdb8971d3794333e46526 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 8 Mar 2017 15:12:53 +0100 Subject: [PATCH 104/249] drm/exynos: Merge pre/postclose hooks Again no apparent explanation for the split except hysterical raisins. Cc: Inki Dae Cc: Joonyoung Shim Cc: Seung-Woo Kim Cc: Kyungmin Park Signed-off-by: Daniel Vetter Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 09d3c4c3c858..50294a7bd29d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -82,14 +82,9 @@ static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) return ret; } -static void exynos_drm_preclose(struct drm_device *dev, - struct drm_file *file) -{ - exynos_drm_subdrv_close(dev, file); -} - static void exynos_drm_postclose(struct drm_device *dev, struct drm_file *file) { + exynos_drm_subdrv_close(dev, file); kfree(file->driver_priv); file->driver_priv = NULL; } @@ -145,7 +140,6 @@ static struct drm_driver exynos_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME | DRIVER_ATOMIC | DRIVER_RENDER, .open = exynos_drm_open, - .preclose = exynos_drm_preclose, .lastclose = exynos_drm_lastclose, .postclose = exynos_drm_postclose, .gem_free_object_unlocked = exynos_drm_gem_free_object, From f2921d8c4899a3d9d7c6326315c24efea2c9efd9 Mon Sep 17 00:00:00 2001 From: Hoegeun Kwon Date: Thu, 13 Apr 2017 15:05:26 +0900 Subject: [PATCH 105/249] drm/exynos: dsi: Fix the parse_dt function The dsi + panel is a parental relationship, so OF grpah is not needed. Therefore, the current dsi_parse_dt function will throw an error, because there is no linked OF graph for the case fimd + dsi + panel. Parse the Pll burst and esc clock frequency properties in dsi_parse_dt() and create a bridge_node only if there is an OF graph associated with dsi. Signed-off-by: Hoegeun Kwon Reviewed-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index fc4fda738906..24ab77c674ec 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1633,7 +1633,6 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi) { struct device *dev = dsi->dev; struct device_node *node = dev->of_node; - struct device_node *ep; int ret; ret = exynos_dsi_of_read_u32(node, "samsung,pll-clock-frequency", @@ -1641,32 +1640,21 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi) if (ret < 0) return ret; - ep = of_graph_get_endpoint_by_regs(node, DSI_PORT_OUT, 0); - if (!ep) { - dev_err(dev, "no output port with endpoint specified\n"); - return -EINVAL; - } - - ret = exynos_dsi_of_read_u32(ep, "samsung,burst-clock-frequency", + ret = exynos_dsi_of_read_u32(node, "samsung,burst-clock-frequency", &dsi->burst_clk_rate); if (ret < 0) - goto end; + return ret; - ret = exynos_dsi_of_read_u32(ep, "samsung,esc-clock-frequency", + ret = exynos_dsi_of_read_u32(node, "samsung,esc-clock-frequency", &dsi->esc_clk_rate); if (ret < 0) - goto end; - - of_node_put(ep); + return ret; dsi->bridge_node = of_graph_get_remote_node(node, DSI_PORT_OUT, 0); if (!dsi->bridge_node) return -EINVAL; -end: - of_node_put(ep); - - return ret; + return 0; } static int exynos_dsi_bind(struct device *dev, struct device *master, From 70505c2ef94b7584dc24d6eaaeb0fc335b99813a Mon Sep 17 00:00:00 2001 From: Hoegeun Kwon Date: Fri, 26 May 2017 10:02:01 +0900 Subject: [PATCH 106/249] drm/exynos: dsi: Remove bridge node reference in removal Since bridge node is referenced during in the probe, it should be released on removal. Suggested-by: Andrzej Hajda Signed-off-by: Hoegeun Kwon Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 24ab77c674ec..d404de86d5f9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -1805,6 +1805,10 @@ static int exynos_dsi_probe(struct platform_device *pdev) static int exynos_dsi_remove(struct platform_device *pdev) { + struct exynos_dsi *dsi = platform_get_drvdata(pdev); + + of_node_put(dsi->bridge_node); + pm_runtime_disable(&pdev->dev); component_del(&pdev->dev, &exynos_dsi_component_ops); From e379cbee79a8abb8e9b4da63187884939852ddb4 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Mon, 29 May 2017 09:59:05 +0900 Subject: [PATCH 107/249] drm/exynos: clean up description of exynos_drm_crtc This patch removes unnecessary descriptions on exynos_drm_crtc structure and adds one description which specifies what pipe_clk member does. pipe_clk support had been added by below patch without any description, drm/exynos: add support for pipeline clock to the framework Commit-id : f26b9343f582f44ec920474d71b4b2220b1ed9a8 Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index cb3176930596..39c740572034 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -160,12 +160,9 @@ struct exynos_drm_clk { * drm framework doesn't support multiple irq yet. * we can refer to the crtc to current hardware interrupt occurred through * this pipe value. - * @enabled: if the crtc is enabled or not - * @event: vblank event that is currently queued for flip - * @wait_update: wait all pending planes updates to finish - * @pending_update: number of pending plane updates in this crtc * @ops: pointer to callbacks for exynos drm specific functionality * @ctx: A pointer to the crtc's implementation specific context + * @pipe_clk: A pointer to the crtc's pipeline clock. */ struct exynos_drm_crtc { struct drm_crtc base; From dac6ca243c4c49a9ca7507d3d66140ebfac8b04b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 28 May 2017 22:04:14 +0200 Subject: [PATCH 108/249] x86/microcode/AMD: Change load_microcode_amd()'s param to bool to fix preemptibility bug With CONFIG_DEBUG_PREEMPT enabled, I get: BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 caller is debug_smp_processor_id CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.12.0-rc2+ #2 Call Trace: dump_stack check_preemption_disabled debug_smp_processor_id save_microcode_in_initrd_amd ? microcode_init save_microcode_in_initrd ... because, well, it says it above, we're using smp_processor_id() in preemptible code. But passing the CPU number is not really needed. It is only used to determine whether we're on the BSP, and, if so, to save the microcode patch for early loading. [ We don't absolutely need to do it on the BSP but we do that customarily there. ] Instead, convert that function parameter to a boolean which denotes whether the patch should be saved or not, thereby avoiding the use of smp_processor_id() in preemptible code. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170528200414.31305-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/amd.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 45db4d2ebd01..e9f4d762aa5b 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax) } static enum ucode_state -load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size); int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) { @@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) if (!desc.mc) return -EINVAL; - ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), - desc.data, desc.size); + ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); if (ret != UCODE_OK) return -EINVAL; @@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, } static enum ucode_state -load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) +load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { enum ucode_state ret; @@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size) #ifdef CONFIG_X86_32 /* save BSP's matching patch for early load */ - if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { - struct ucode_patch *p = find_patch(cpu); + if (save) { + struct ucode_patch *p = find_patch(0); if (p) { memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), @@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, { char fw_name[36] = "amd-ucode/microcode_amd.bin"; struct cpuinfo_x86 *c = &cpu_data(cpu); + bool bsp = c->cpu_index == boot_cpu_data.cpu_index; enum ucode_state ret = UCODE_NFOUND; const struct firmware *fw; /* reload ucode container only on the boot cpu */ - if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + if (!refresh_fw || !bsp) return UCODE_OK; if (c->x86 >= 0x15) @@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, goto fw_release; } - ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size); + ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size); fw_release: release_firmware(fw); From 5d9070b1f0fc9a159a9a3240c43004828408444b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 28 May 2017 11:03:42 +0200 Subject: [PATCH 109/249] x86/debug/32: Convert a smp_processor_id() call to raw to avoid DEBUG_PREEMPT warning ... to raw_smp_processor_id() to not trip the BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 check. The reasoning behind it is that __warn() already uses the raw_ variants but the show_regs() path on 32-bit doesn't. Signed-off-by: Borislav Petkov Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170528092212.fiod7kygpjm23m3o@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index ff40e74c9181..ffeae818aa7a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -78,7 +78,7 @@ void __show_regs(struct pt_regs *regs, int all) printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip); printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags, - smp_processor_id()); + raw_smp_processor_id()); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); From 6ac56951dc10232e24419f6972fc8131dd0166e0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 22 May 2017 19:59:24 +0200 Subject: [PATCH 110/249] rbd: implement REQ_OP_WRITE_ZEROES Commit 93c1defedcae ("rbd: remove the discard_zeroes_data flag") explicitly didn't implement REQ_OP_WRITE_ZEROES for rbd, while the following commit 48920ff2a5a9 ("block: remove the discard_zeroes_data flag") dropped ->discard_zeroes_data in favor of REQ_OP_WRITE_ZEROES. rbd does support efficient zeroing via CEPH_OSD_OP_ZERO opcode and will release either some or all blocks depending on whether the zeroing request is rbd_obj_bytes() aligned. This is how we currently implement discards, so REQ_OP_WRITE_ZEROES can be identical to REQ_OP_DISCARD for now. Caveats: - REQ_NOUNMAP is ignored, but AFAICT that's true of at least two other current implementations - nvme and loop - there is no ->write_zeroes_alignment and blk_bio_write_zeroes_split() is hence less helpful than blk_bio_discard_split(), but this can (and should) be fixed on the rbd side In the future we will split these into two code paths to respect REQ_NOUNMAP on zeroout and save on zeroing blocks that couldn't be released on discard. Fixes: 93c1defedcae ("rbd: remove the discard_zeroes_data flag") Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman Reviewed-by: Christoph Hellwig --- drivers/block/rbd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 454bf9c34882..c16f74547804 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4023,6 +4023,7 @@ static void rbd_queue_workfn(struct work_struct *work) switch (req_op(rq)) { case REQ_OP_DISCARD: + case REQ_OP_WRITE_ZEROES: op_type = OBJ_OP_DISCARD; break; case REQ_OP_WRITE: @@ -4420,6 +4421,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.discard_granularity = segment_size; q->limits.discard_alignment = segment_size; blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); + blk_queue_max_write_zeroes_sectors(q, segment_size / SECTOR_SIZE); if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) q->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; From f3a1568582cc207663a4d5e37da790334372855b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Wed, 24 May 2017 15:29:33 +0300 Subject: [PATCH 111/249] ovl: mark upper merge dir with type origin entries "impure" An upper dir is marked "impure" to let ovl_iterate() know that this directory may contain non pure upper entries whose d_ino may need to be read from the origin inode. We already mark a non-merge dir "impure" when moving a non-pure child entry inside it, to let ovl_iterate() know not to iterate the non-merge dir directly. Mark also a merge dir "impure" when moving a non-pure child entry inside it and when copying up a child entry inside it. This can be used to optimize ovl_iterate() to perform a "pure merge" of upper and lower directories, merging the content of the directories, without having to read d_ino from origin inodes. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 5 +++++ fs/overlayfs/dir.c | 31 +++++------------------------ fs/overlayfs/namei.c | 20 ------------------- fs/overlayfs/overlayfs.h | 15 ++++++++++---- fs/overlayfs/super.c | 5 ++++- fs/overlayfs/util.c | 42 +++++++++++++++++++++++++++++++++------- 6 files changed, 60 insertions(+), 58 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 843ed2a2d7db..7a44533f4bbf 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -459,6 +459,11 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; + /* Mark parent "impure" because it may now contain non-pure upper */ + err = ovl_set_impure(parent, upperdir); + if (err) + return err; + err = vfs_getattr(&parentpath, &pstat, STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); if (err) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f2a118ba00e4..a63a71656e9b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -149,22 +149,6 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) return ovl_set_opaque_xerr(dentry, upperdentry, -EIO); } -static int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) -{ - int err; - - /* - * Do not fail when upper doesn't support xattrs. - * Upper inodes won't have origin nor redirect xattr anyway. - */ - err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, - "y", 1, 0); - if (!err) - ovl_dentry_set_impure(dentry); - - return err; -} - /* Common operations required to be done after creation of file on upper */ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, struct dentry *newdentry, bool hardlink) @@ -976,21 +960,16 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (!samedir) { /* * When moving a merge dir or non-dir with copy up origin into - * a non-merge upper dir (a.k.a pure upper dir), we are making - * the target parent dir "impure". ovl_iterate() iterates pure - * upper dirs directly, because there is no need to filter out - * whiteouts and merge dir content with lower dir. But for the - * case of an "impure" upper dir, ovl_iterate() cannot iterate - * the real directory directly, because it looks for the inode - * numbers to fill d_ino in the entries origin inode. + * a new parent, we are marking the new parent dir "impure". + * When ovl_iterate() iterates an "impure" upper dir, it will + * lookup the origin inodes of the entries to fill d_ino. */ - if (ovl_type_origin(old) && !ovl_type_merge(new->d_parent)) { + if (ovl_type_origin(old)) { err = ovl_set_impure(new->d_parent, new_upperdir); if (err) goto out_revert_creds; } - if (!overwrite && ovl_type_origin(new) && - !ovl_type_merge(old->d_parent)) { + if (!overwrite && ovl_type_origin(new)) { err = ovl_set_impure(old->d_parent, old_upperdir); if (err) goto out_revert_creds; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 0c72a5909db2..f3136c31e72a 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -167,31 +167,11 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, goto out; } -static bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) -{ - int res; - char val; - - if (!d_is_dir(dentry)) - return false; - - res = vfs_getxattr(dentry, name, &val, 1); - if (res == 1 && val == 'y') - return true; - - return false; -} - static bool ovl_is_opaquedir(struct dentry *dentry) { return ovl_check_dir_xattr(dentry, OVL_XATTR_OPAQUE); } -static bool ovl_is_impuredir(struct dentry *dentry) -{ - return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); -} - static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d, const char *name, unsigned int namelen, size_t prelen, const char *post, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index a9fb958fd5d4..0623cebeefff 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -207,7 +207,6 @@ bool ovl_dentry_is_opaque(struct dentry *dentry); bool ovl_dentry_is_impure(struct dentry *dentry); bool ovl_dentry_is_whiteout(struct dentry *dentry); void ovl_dentry_set_opaque(struct dentry *dentry); -void ovl_dentry_set_impure(struct dentry *dentry); bool ovl_redirect_dir(struct super_block *sb); const char *ovl_dentry_get_redirect(struct dentry *dentry); void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect); @@ -221,6 +220,17 @@ bool ovl_is_whiteout(struct dentry *dentry); struct file *ovl_path_open(struct path *path, int flags); int ovl_copy_up_start(struct dentry *dentry); void ovl_copy_up_end(struct dentry *dentry); +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name); +int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, + const char *name, const void *value, size_t size, + int xerr); +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); + +static inline bool ovl_is_impuredir(struct dentry *dentry) +{ + return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE); +} + /* namei.c */ int ovl_path_next(int idx, struct dentry *dentry, struct path *path); @@ -281,6 +291,3 @@ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_flags(struct dentry *dentry, int flags); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, - const char *name, const void *value, size_t size, - int xerr); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index f1647626a882..4882ffb37bae 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -974,7 +974,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) path_put(&workpath); kfree(lowertmp); - oe->__upperdentry = upperpath.dentry; + if (upperpath.dentry) { + oe->__upperdentry = upperpath.dentry; + oe->impure = ovl_is_impuredir(upperpath.dentry); + } for (i = 0; i < numlower; i++) { oe->lowerstack[i].dentry = stack[i].dentry; oe->lowerstack[i].mnt = ufs->lower_mnt[i]; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index e0dfb07d5457..809048913889 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -194,13 +194,6 @@ void ovl_dentry_set_opaque(struct dentry *dentry) oe->opaque = true; } -void ovl_dentry_set_impure(struct dentry *dentry) -{ - struct ovl_entry *oe = dentry->d_fsdata; - - oe->impure = true; -} - bool ovl_redirect_dir(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; @@ -311,6 +304,21 @@ void ovl_copy_up_end(struct dentry *dentry) spin_unlock(&ofs->copyup_wq.lock); } +bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) +{ + int res; + char val; + + if (!d_is_dir(dentry)) + return false; + + res = vfs_getxattr(dentry, name, &val, 1); + if (res == 1 && val == 'y') + return true; + + return false; +} + int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, const char *name, const void *value, size_t size, int xerr) @@ -331,3 +339,23 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, return err; } + +int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) +{ + int err; + struct ovl_entry *oe = dentry->d_fsdata; + + if (oe->impure) + return 0; + + /* + * Do not fail when upper doesn't support xattrs. + * Upper inodes won't have origin nor redirect xattr anyway. + */ + err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, + "y", 1, 0); + if (!err) + oe->impure = true; + + return err; +} From 118b90f3f18e733c99f0e8b98ea31a815ffc4d14 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 May 2017 14:10:22 +0300 Subject: [PATCH 112/249] drm/dp: add helper for reading DP sink/branch device desc from DPCD Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/acba54da7d80eafea9e59a893e27e3c31028c0ba.1495105635.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_dp_helper.c | 35 +++++++++++++++++++++++++++++++++ include/drm/drm_dp_helper.h | 19 ++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 3e5f52110ea1..52e0ca9a5bb1 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -1208,3 +1208,38 @@ int drm_dp_stop_crc(struct drm_dp_aux *aux) return 0; } EXPORT_SYMBOL(drm_dp_stop_crc); + +/** + * drm_dp_read_desc - read sink/branch descriptor from DPCD + * @aux: DisplayPort AUX channel + * @desc: Device decriptor to fill from DPCD + * @is_branch: true for branch devices, false for sink devices + * + * Read DPCD 0x400 (sink) or 0x500 (branch) into @desc. Also debug log the + * identification. + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, + bool is_branch) +{ + struct drm_dp_dpcd_ident *ident = &desc->ident; + unsigned int offset = is_branch ? DP_BRANCH_OUI : DP_SINK_OUI; + int ret, dev_id_len; + + ret = drm_dp_dpcd_read(aux, offset, ident, sizeof(*ident)); + if (ret < 0) + return ret; + + dev_id_len = strnlen(ident->device_id, sizeof(ident->device_id)); + + DRM_DEBUG_KMS("DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n", + is_branch ? "branch" : "sink", + (int)sizeof(ident->oui), ident->oui, + dev_id_len, ident->device_id, + ident->hw_rev >> 4, ident->hw_rev & 0xf, + ident->sw_major_rev, ident->sw_minor_rev); + + return 0; +} +EXPORT_SYMBOL(drm_dp_read_desc); diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index c0bd0d7651a9..84502da177a1 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -913,4 +913,23 @@ void drm_dp_aux_unregister(struct drm_dp_aux *aux); int drm_dp_start_crc(struct drm_dp_aux *aux, struct drm_crtc *crtc); int drm_dp_stop_crc(struct drm_dp_aux *aux); +struct drm_dp_dpcd_ident { + u8 oui[3]; + u8 device_id[6]; + u8 hw_rev; + u8 sw_major_rev; + u8 sw_minor_rev; +} __packed; + +/** + * struct drm_dp_desc - DP branch/sink device descriptor + * @ident: DP device identification from DPCD 0x400 (sink) or 0x500 (branch). + */ +struct drm_dp_desc { + struct drm_dp_dpcd_ident ident; +}; + +int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, + bool is_branch); + #endif /* _DRM_DP_HELPER_H_ */ From 84c367533bd24108e5392b355280647a4a2893ac Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 May 2017 14:10:23 +0300 Subject: [PATCH 113/249] drm/i915: use drm DP helper to read DPCD desc Switch to using the common DP helpers instead of using our own. v2: also remove leftover struct intel_dp_desc (Daniel) Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 37 ++++------------------------- drivers/gpu/drm/i915/intel_drv.h | 13 +--------- drivers/gpu/drm/i915/intel_lspcon.c | 2 +- 3 files changed, 6 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index ee77b519835c..5ce45d98da78 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1507,37 +1507,6 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) DRM_DEBUG_KMS("common rates: %s\n", str); } -bool -__intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc) -{ - u32 base = drm_dp_is_branch(intel_dp->dpcd) ? DP_BRANCH_OUI : - DP_SINK_OUI; - - return drm_dp_dpcd_read(&intel_dp->aux, base, desc, sizeof(*desc)) == - sizeof(*desc); -} - -bool intel_dp_read_desc(struct intel_dp *intel_dp) -{ - struct intel_dp_desc *desc = &intel_dp->desc; - bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & - DP_OUI_SUPPORT; - int dev_id_len; - - if (!__intel_dp_read_desc(intel_dp, desc)) - return false; - - dev_id_len = strnlen(desc->device_id, sizeof(desc->device_id)); - DRM_DEBUG_KMS("DP %s: OUI %*phD%s dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n", - drm_dp_is_branch(intel_dp->dpcd) ? "branch" : "sink", - (int)sizeof(desc->oui), desc->oui, oui_sup ? "" : "(NS)", - dev_id_len, desc->device_id, - desc->hw_rev >> 4, desc->hw_rev & 0xf, - desc->sw_major_rev, desc->sw_minor_rev); - - return true; -} - static int rate_to_index(int find, const int *rates) { int i = 0; @@ -3622,7 +3591,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) if (!intel_dp_read_dpcd(intel_dp)) return false; - intel_dp_read_desc(intel_dp); + drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, + drm_dp_is_branch(intel_dp->dpcd)); if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & @@ -4624,7 +4594,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) intel_dp_print_rates(intel_dp); - intel_dp_read_desc(intel_dp); + drm_dp_read_desc(&intel_dp->aux, &intel_dp->desc, + drm_dp_is_branch(intel_dp->dpcd)); intel_dp_configure_mst(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index aaee3949a422..f630c7af5020 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -906,14 +906,6 @@ enum link_m_n_set { M2_N2 }; -struct intel_dp_desc { - u8 oui[3]; - u8 device_id[6]; - u8 hw_rev; - u8 sw_major_rev; - u8 sw_minor_rev; -} __packed; - struct intel_dp_compliance_data { unsigned long edid; uint8_t video_pattern; @@ -957,7 +949,7 @@ struct intel_dp { /* Max link BW for the sink as per DPCD registers */ int max_sink_link_bw; /* sink or branch descriptor */ - struct intel_dp_desc desc; + struct drm_dp_desc desc; struct drm_dp_aux aux; enum intel_display_power_domain aux_power_domain; uint8_t train_set[4]; @@ -1532,9 +1524,6 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) } bool intel_dp_read_dpcd(struct intel_dp *intel_dp); -bool __intel_dp_read_desc(struct intel_dp *intel_dp, - struct intel_dp_desc *desc); -bool intel_dp_read_desc(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); bool intel_digital_port_connected(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 71cbe9c08932..5abef482eacf 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -240,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } - intel_dp_read_desc(dp); + drm_dp_read_desc(&dp->aux, &dp->desc, drm_dp_is_branch(dp->dpcd)); DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; From 76fa998acd86b6b40d0217e12af39c2406bdcd2b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 May 2017 14:10:24 +0300 Subject: [PATCH 114/249] drm/dp: start a DPCD based DP sink/branch device quirk database MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Face the fact, there are Display Port sink and branch devices out there in the wild that don't follow the Display Port specifications, or they have bugs, or just otherwise require special treatment. Start a common quirk database the drivers can query based on the DP device identification. At least for now, we leave the workarounds for the drivers to implement as they see fit. For starters, add a branch device that can't handle full 24-bit main link Mdiv and Ndiv main link attributes properly. Naturally, the workaround of reducing main link attributes for all devices ended up in regressions for other devices. So here we are. v2: Rebase on DRM DP desc read helpers v3: Fix the OUI memcmp blunder (Clint) Cc: Ville Syrjälä Cc: Dhinakaran Pandiyan Cc: Clint Taylor Cc: Adam Jackson Cc: Harry Wentland Tested-by: Clinton Taylor Reviewed-by: Clinton Taylor Reviewed-by: Daniel Vetter # v2 Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/91ec198dd95258dbf3bee2f6be739e0da73b4fdd.1495105635.git.jani.nikula@intel.com --- drivers/gpu/drm/drm_dp_helper.c | 52 +++++++++++++++++++++++++++++++-- include/drm/drm_dp_helper.h | 32 ++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 52e0ca9a5bb1..213fb837e1c4 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -1209,6 +1209,51 @@ int drm_dp_stop_crc(struct drm_dp_aux *aux) } EXPORT_SYMBOL(drm_dp_stop_crc); +struct dpcd_quirk { + u8 oui[3]; + bool is_branch; + u32 quirks; +}; + +#define OUI(first, second, third) { (first), (second), (third) } + +static const struct dpcd_quirk dpcd_quirk_list[] = { + /* Analogix 7737 needs reduced M and N at HBR2 link rates */ + { OUI(0x00, 0x22, 0xb9), true, BIT(DP_DPCD_QUIRK_LIMITED_M_N) }, +}; + +#undef OUI + +/* + * Get a bit mask of DPCD quirks for the sink/branch device identified by + * ident. The quirk data is shared but it's up to the drivers to act on the + * data. + * + * For now, only the OUI (first three bytes) is used, but this may be extended + * to device identification string and hardware/firmware revisions later. + */ +static u32 +drm_dp_get_quirks(const struct drm_dp_dpcd_ident *ident, bool is_branch) +{ + const struct dpcd_quirk *quirk; + u32 quirks = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(dpcd_quirk_list); i++) { + quirk = &dpcd_quirk_list[i]; + + if (quirk->is_branch != is_branch) + continue; + + if (memcmp(quirk->oui, ident->oui, sizeof(ident->oui)) != 0) + continue; + + quirks |= quirk->quirks; + } + + return quirks; +} + /** * drm_dp_read_desc - read sink/branch descriptor from DPCD * @aux: DisplayPort AUX channel @@ -1231,14 +1276,17 @@ int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, if (ret < 0) return ret; + desc->quirks = drm_dp_get_quirks(ident, is_branch); + dev_id_len = strnlen(ident->device_id, sizeof(ident->device_id)); - DRM_DEBUG_KMS("DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n", + DRM_DEBUG_KMS("DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d quirks 0x%04x\n", is_branch ? "branch" : "sink", (int)sizeof(ident->oui), ident->oui, dev_id_len, ident->device_id, ident->hw_rev >> 4, ident->hw_rev & 0xf, - ident->sw_major_rev, ident->sw_minor_rev); + ident->sw_major_rev, ident->sw_minor_rev, + desc->quirks); return 0; } diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 84502da177a1..bb837310c07e 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -924,12 +924,44 @@ struct drm_dp_dpcd_ident { /** * struct drm_dp_desc - DP branch/sink device descriptor * @ident: DP device identification from DPCD 0x400 (sink) or 0x500 (branch). + * @quirks: Quirks; use drm_dp_has_quirk() to query for the quirks. */ struct drm_dp_desc { struct drm_dp_dpcd_ident ident; + u32 quirks; }; int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, bool is_branch); +/** + * enum drm_dp_quirk - Display Port sink/branch device specific quirks + * + * Display Port sink and branch devices in the wild have a variety of bugs, try + * to collect them here. The quirks are shared, but it's up to the drivers to + * implement workarounds for them. + */ +enum drm_dp_quirk { + /** + * @DP_DPCD_QUIRK_LIMITED_M_N: + * + * The device requires main link attributes Mvid and Nvid to be limited + * to 16 bits. + */ + DP_DPCD_QUIRK_LIMITED_M_N, +}; + +/** + * drm_dp_has_quirk() - does the DP device have a specific quirk + * @desc: Device decriptor filled by drm_dp_read_desc() + * @quirk: Quirk to query for + * + * Return true if DP device identified by @desc has @quirk. + */ +static inline bool +drm_dp_has_quirk(const struct drm_dp_desc *desc, enum drm_dp_quirk quirk) +{ + return desc->quirks & BIT(quirk); +} + #endif /* _DRM_DP_HELPER_H_ */ From b31e85eda38c58cae986162ae2c462b53b0a2065 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 18 May 2017 14:10:25 +0300 Subject: [PATCH 115/249] drm/i915: Detect USB-C specific dongles before reducing M and N MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Analogix 7737 DP to HDMI converter requires reduced M and N values when to operate correctly at HBR2. We tried to reduce the M/N values for all devices in commit 9a86cda07af2 ("drm/i915/dp: reduce link M/N parameters"), but that regressed some other sinks. Detect this IC by its OUI value of 0x0022B9 via the DPCD quirk list, and only reduce the M/N values for that. v2 by Jani: Rebased on the DP quirk database v3 by Jani: Rebased on the reworked DP quirk database v4 by Jani: Improve commit message (Daniel) Fixes: 9a86cda07af2 ("drm/i915/dp: reduce link M/N parameters") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93578 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100755 Cc: Jani Nikula Cc: Dhinakaran Pandiyan Cc: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Clint Taylor Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/2d2e30f8f47d3f28c9b74ca2612336a54585c3ec.1495105635.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/intel_display.c | 22 ++++++++++++++-------- drivers/gpu/drm/i915/intel_dp.c | 8 ++++++-- drivers/gpu/drm/i915/intel_dp_mst.c | 5 ++++- 4 files changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c9b0949f6c1a..963f6d4481f7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -562,7 +562,8 @@ struct intel_link_m_n { void intel_link_compute_m_n(int bpp, int nlanes, int pixel_clock, int link_clock, - struct intel_link_m_n *m_n); + struct intel_link_m_n *m_n, + bool reduce_m_n); /* Interface history: * diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3617927af269..3cabe52a4e3b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6101,7 +6101,7 @@ static int ironlake_fdi_compute_config(struct intel_crtc *intel_crtc, pipe_config->fdi_lanes = lane; intel_link_compute_m_n(pipe_config->pipe_bpp, lane, fdi_dotclock, - link_bw, &pipe_config->fdi_m_n); + link_bw, &pipe_config->fdi_m_n, false); ret = ironlake_check_fdi_lanes(dev, intel_crtc->pipe, pipe_config); if (ret == -EINVAL && pipe_config->pipe_bpp > 6*3) { @@ -6277,7 +6277,8 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) } static void compute_m_n(unsigned int m, unsigned int n, - uint32_t *ret_m, uint32_t *ret_n) + uint32_t *ret_m, uint32_t *ret_n, + bool reduce_m_n) { /* * Reduce M/N as much as possible without loss in precision. Several DP @@ -6285,9 +6286,11 @@ static void compute_m_n(unsigned int m, unsigned int n, * values. The passed in values are more likely to have the least * significant bits zero than M after rounding below, so do this first. */ - while ((m & 1) == 0 && (n & 1) == 0) { - m >>= 1; - n >>= 1; + if (reduce_m_n) { + while ((m & 1) == 0 && (n & 1) == 0) { + m >>= 1; + n >>= 1; + } } *ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX); @@ -6298,16 +6301,19 @@ static void compute_m_n(unsigned int m, unsigned int n, void intel_link_compute_m_n(int bits_per_pixel, int nlanes, int pixel_clock, int link_clock, - struct intel_link_m_n *m_n) + struct intel_link_m_n *m_n, + bool reduce_m_n) { m_n->tu = 64; compute_m_n(bits_per_pixel * pixel_clock, link_clock * nlanes * 8, - &m_n->gmch_m, &m_n->gmch_n); + &m_n->gmch_m, &m_n->gmch_n, + reduce_m_n); compute_m_n(pixel_clock, link_clock, - &m_n->link_m, &m_n->link_n); + &m_n->link_m, &m_n->link_n, + reduce_m_n); } static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 5ce45d98da78..fc691b8b317c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1593,6 +1593,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, int common_rates[DP_MAX_SUPPORTED_RATES] = {}; int common_len; uint8_t link_bw, rate_select; + bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, + DP_DPCD_QUIRK_LIMITED_M_N); common_len = intel_dp_common_rates(intel_dp, common_rates); @@ -1722,7 +1724,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_link_compute_m_n(bpp, lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, - &pipe_config->dp_m_n); + &pipe_config->dp_m_n, + reduce_m_n); if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { @@ -1730,7 +1733,8 @@ intel_dp_compute_config(struct intel_encoder *encoder, intel_link_compute_m_n(bpp, lane_count, intel_connector->panel.downclock_mode->clock, pipe_config->port_clock, - &pipe_config->dp_m2_n2); + &pipe_config->dp_m2_n2, + reduce_m_n); } /* diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index c1f62eb07c07..989e25577ac0 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -44,6 +44,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, int lane_count, slots; const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; int mst_pbn; + bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, + DP_DPCD_QUIRK_LIMITED_M_N); pipe_config->has_pch_encoder = false; bpp = 24; @@ -75,7 +77,8 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, intel_link_compute_m_n(bpp, lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, - &pipe_config->dp_m_n); + &pipe_config->dp_m_n, + reduce_m_n); pipe_config->dp_m_n.tu = slots; From f3d3eab667de62572376abb1aa26316191c39929 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 9 May 2017 10:04:36 +0200 Subject: [PATCH 116/249] HID: i2c: Call acpi_device_fix_up_power for ACPI-enumerated devices For ACPI devices which do not have a _PSC method, the ACPI subsys cannot query their initial state at boot, so these devices are assumed to have been put in D0 by the BIOS, but for touchscreens that is not always true. This commit adds a call to acpi_device_fix_up_power to explicitly put devices without a _PSC method into D0 state (for devices with a _PSC method it is a nop). Note we only need to do this on probe, after a resume the ACPI subsys knows the device is in D3 and will properly put it in D0. This fixes the SIS0817 i2c-hid touchscreen on a Peaq C1010 2-in-1 device failing to probe with a "hid_descr_cmd failed" error. Acked-by: Benjamin Tissoires Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 8daa8ce64ebb..fb55fb4c39fc 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -897,6 +897,15 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client, return 0; } +static void i2c_hid_acpi_fix_up_power(struct device *dev) +{ + acpi_handle handle = ACPI_HANDLE(dev); + struct acpi_device *adev; + + if (handle && acpi_bus_get_device(handle, &adev) == 0) + acpi_device_fix_up_power(adev); +} + static const struct acpi_device_id i2c_hid_acpi_match[] = { {"ACPI0C50", 0 }, {"PNP0C50", 0 }, @@ -909,6 +918,8 @@ static inline int i2c_hid_acpi_pdata(struct i2c_client *client, { return -ENODEV; } + +static inline void i2c_hid_acpi_fix_up_power(struct device *dev) {} #endif #ifdef CONFIG_OF @@ -1030,6 +1041,8 @@ static int i2c_hid_probe(struct i2c_client *client, if (ret < 0) goto err_regulator; + i2c_hid_acpi_fix_up_power(&client->dev); + pm_runtime_get_noresume(&client->dev); pm_runtime_set_active(&client->dev); pm_runtime_enable(&client->dev); From a082c6f680da298cf075886ff032f32ccb7c5e1a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 29 May 2017 15:15:27 +0200 Subject: [PATCH 117/249] ovl: filter trusted xattr for non-admin Filesystems filter out extended attributes in the "trusted." domain for unprivlieged callers. Overlay calls underlying filesystem's method with elevated privs, so need to do the filtering in overlayfs too. Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ad9547f82da5..d613e2c41242 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -240,6 +240,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name, return res; } +static bool ovl_can_list(const char *s) +{ + /* List all non-trusted xatts */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ + return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); +} + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); @@ -263,7 +273,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (ovl_is_private_xattr(s)) { + if (!ovl_can_list(s)) { res -= slen; memmove(s, s + slen, len); } else { From b0f5a8f32e8bbdaae1abb8abe2d3cbafaba57e08 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 29 May 2017 09:22:07 +0200 Subject: [PATCH 118/249] kthread: fix boot hang (regression) on MIPS/OpenRISC This fixes a regression in commit 4d6501dce079 where I didn't notice that MIPS and OpenRISC were reinitialising p->{set,clear}_child_tid to NULL after our initialisation in copy_process(). We can simply get rid of the arch-specific initialisation here since it is now always done in copy_process() before hitting copy_thread{,_tls}(). Review notes: - As far as I can tell, copy_process() is the only user of copy_thread_tls(), which is the only caller of copy_thread() for architectures that don't implement copy_thread_tls(). - After this patch, there is no arch-specific code touching p->set_child_tid or p->clear_child_tid whatsoever. - It may look like MIPS/OpenRISC wanted to always have these fields be NULL, but that's not true, as copy_process() would unconditionally set them again _after_ calling copy_thread_tls() before commit 4d6501dce079. Fixes: 4d6501dce079c1eb6bf0b1d8f528a5e81770109e ("kthread: Fix use-after-free if kthread fork fails") Reported-by: Guenter Roeck Tested-by: Guenter Roeck # MIPS only Acked-by: Stafford Horne Acked-by: Oleg Nesterov Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: Jonas Bonn Cc: Stefan Kristiansson Cc: openrisc@lists.librecores.org Cc: Jamie Iles Cc: Thomas Gleixner Signed-off-by: Vegard Nossum Signed-off-by: Linus Torvalds --- arch/mips/kernel/process.c | 1 - arch/openrisc/kernel/process.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 918d4c73e951..5351e1f3950d 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -120,7 +120,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, struct thread_info *ti = task_thread_info(p); struct pt_regs *childregs, *regs = current_pt_regs(); unsigned long childksp; - p->set_child_tid = p->clear_child_tid = NULL; childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index f8da545854f9..106859ae27ff 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -167,8 +167,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, top_of_kernel_stack = sp; - p->set_child_tid = p->clear_child_tid = NULL; - /* Locate userspace context on stack... */ sp -= STACK_FRAME_OVERHEAD; /* redzone */ sp -= sizeof(struct pt_regs); From 878d8db039daac0938238e9a40a5bd6e50ee3c9b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 10 May 2017 18:12:40 +0200 Subject: [PATCH 119/249] Revert "ACPI / button: Change default behavior to lid_init_state=open" Revert commit 77e9a4aa9de1 (ACPI / button: Change default behavior to lid_init_state=open) which changed the kernel's behavior on laptops that boot with closed lids and expect the lid switch state to be reported accurately by the kernel. If you boot or resume your laptop with the lid closed on a docking station while using an external monitor connected to it, both internal and external displays will light on, while only the external should. There is a design choice in gdm to only provide the greeter on the internal display when lit on, so users only see a gray area on the external monitor. Also, the cursor will not show up as it's by default on the internal display too. To "fix" that, users have to open the laptop once and close it once again to sync the state of the switch with the hardware state. Even if the "method" operation mode implementation can be buggy on some platforms, the "open" choice is worse. It breaks docking stations basically and there is no way to have a user-space hwdb to fix that. On the contrary, it's rather easy in user-space to have a hwdb with the problematic platforms. Then, libinput (1.7.0+) can fix the state of the lid switch for us: you need to set the udev property LIBINPUT_ATTR_LID_SWITCH_RELIABILITY to 'write_open'. When libinput detects internal keyboard events, it will overwrite the state of the switch to open, making it reliable again. Given that logind only checks the lid switch value after a timeout, we can assume the user will use the internal keyboard before this timeout expires. For example, such a hwdb entry is: libinput:name:*Lid Switch*:dmi:*svnMicrosoftCorporation:pnSurface3:* LIBINPUT_ATTR_LID_SWITCH_RELIABILITY=write_open Link: https://bugzilla.gnome.org/show_bug.cgi?id=782380 Cc: 4.11+ # 4.11+ Signed-off-by: Benjamin Tissoires Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 6d5a8c1d3132..e19f530f1083 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -113,7 +113,7 @@ struct acpi_button { static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier); static struct acpi_device *lid_device; -static u8 lid_init_state = ACPI_BUTTON_LID_INIT_OPEN; +static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD; static unsigned long lid_report_interval __read_mostly = 500; module_param(lid_report_interval, ulong, 0644); From 2ea65321b83539afc1d45c1bea39c55ab42af62b Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Tue, 9 May 2017 13:57:31 +0800 Subject: [PATCH 120/249] ACPICA: Tables: Fix regression introduced by a too early mechanism enabling In the Linux kernel, acpi_get_table() "clones" haven't been fully balanced by acpi_put_table() invocations. In upstream ACPICA, due to the design change, there are also unbalanced acpi_get_table_by_index() invocations requiring special care. acpi_get_table() reference counting mismatches may occor due to that and printing error messages related to them is not useful at this point. The strict balanced validation count check should only be enabled after confirming that all invocations are safe and aligned with their designed purposes. Thus this patch removes the error value returned by acpi_tb_get_table() in that case along with the accompanying error message to fix the issue. Fixes: 174cc7187e6f (ACPICA: Tables: Back port acpi_get_table_with_size() and early_acpi_os_unmap_memory() from Linux kernel) Cc: 4.10+ # 4.10+ Reported-by: Anush Seetharaman Reported-by: Dan Williams Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbutils.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 5a968a78652b..7abe66505739 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -418,11 +418,7 @@ acpi_tb_get_table(struct acpi_table_desc *table_desc, table_desc->validation_count++; if (table_desc->validation_count == 0) { - ACPI_ERROR((AE_INFO, - "Table %p, Validation count is zero after increment\n", - table_desc)); table_desc->validation_count--; - return_ACPI_STATUS(AE_LIMIT); } *out_table = table_desc->pointer; From 6c77003677d5f1ce15f26d24360cb66c0bc07bb3 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Fri, 26 May 2017 11:37:31 -0400 Subject: [PATCH 121/249] cpufreq: cpufreq_register_driver() should return -ENODEV if init fails For a driver that does not set the CPUFREQ_STICKY flag, if all of the ->init() calls fail, cpufreq_register_driver() should return an error. This will prevent the driver from loading. Fixes: ce1bcfe94db8 (cpufreq: check cpufreq_policy_list instead of scanning policies for all CPUs) Cc: 4.0+ # 4.0+ Signed-off-by: David Arcari Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 0e3f6496524d..26b643d57847 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2468,6 +2468,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (!(cpufreq_driver->flags & CPUFREQ_STICKY) && list_empty(&cpufreq_policy_list)) { /* if all ->init() calls failed, unregister */ + ret = -ENODEV; pr_debug("%s: No CPU initialized for driver %s\n", __func__, driver_data->name); goto err_if_unreg; From 7575f8257279f9dd24b3fc950aa2c7e98564a103 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 17 May 2017 12:20:35 +0530 Subject: [PATCH 122/249] cpufreq: kirkwood-cpufreq:- Handle return value of clk_prepare_enable() clk_prepare_enable() can fail here and we must check its return value. Signed-off-by: Arvind Yadav Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/kirkwood-cpufreq.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c index 1b9bcd76c60e..c2dd43f3f5d8 100644 --- a/drivers/cpufreq/kirkwood-cpufreq.c +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -127,7 +127,12 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) return PTR_ERR(priv.cpu_clk); } - clk_prepare_enable(priv.cpu_clk); + err = clk_prepare_enable(priv.cpu_clk); + if (err) { + dev_err(priv.dev, "Unable to prepare cpuclk\n"); + return err; + } + kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000; priv.ddr_clk = of_clk_get_by_name(np, "ddrclk"); @@ -137,7 +142,11 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) goto out_cpu; } - clk_prepare_enable(priv.ddr_clk); + err = clk_prepare_enable(priv.ddr_clk); + if (err) { + dev_err(priv.dev, "Unable to prepare ddrclk\n"); + goto out_cpu; + } kirkwood_freq_table[1].frequency = clk_get_rate(priv.ddr_clk) / 1000; priv.powersave_clk = of_clk_get_by_name(np, "powersave"); @@ -146,7 +155,11 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev) err = PTR_ERR(priv.powersave_clk); goto out_ddr; } - clk_prepare_enable(priv.powersave_clk); + err = clk_prepare_enable(priv.powersave_clk); + if (err) { + dev_err(priv.dev, "Unable to prepare powersave clk\n"); + goto out_ddr; + } of_node_put(np); np = NULL; From 1943d1723e7a7ee1a7b9b0ba4878dde0dc100671 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 26 May 2017 16:09:48 -0700 Subject: [PATCH 123/249] Input: silead - disable interrupt during suspend When we put the touchscreen controller in low-power mode the irq pin may trigger (float) and if we then try to read a data packet we get the following error in dmesg: [ 478.801017] silead_ts i2c-MSSL1680:00: Data read error -121 This commit disables the irq during suspend/resume fixing this error. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/silead.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 813dd68a5c82..0dbcf105f7db 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -526,6 +526,7 @@ static int __maybe_unused silead_ts_suspend(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); + disable_irq(client->irq); silead_ts_set_power(client, SILEAD_POWER_OFF); return 0; } @@ -551,6 +552,8 @@ static int __maybe_unused silead_ts_resume(struct device *dev) return -ENODEV; } + enable_irq(client->irq); + return 0; } From 2755551188d240f0098cdc6f1a2984f8a1785689 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 29 May 2017 19:57:19 -0700 Subject: [PATCH 124/249] Input: synaptics - clear device info before filling in synaptics_query_hardware() was being passed a 'struct synaptics_device_info' in uninitialized stack memory, then not always initializing all fields. This caused garbage to show up in certain fields, making the touchpad unusable. Fix by zeroing the device info, so all fields default to 0. Fixes: 6c53694fb222 ("Input: synaptics - split device info into a separate structure") Signed-off-by: Eric Biggers Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 131df9d3660f..4f97970abc94 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -397,6 +397,8 @@ static int synaptics_query_hardware(struct psmouse *psmouse, { int error; + memset(info, 0, sizeof(*info)); + error = synaptics_identify(psmouse, info); if (error) return error; From f4947d79a7080b25829997eeee38d4d65137c161 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 23 May 2017 14:15:24 -0700 Subject: [PATCH 125/249] Input: synaptics - keep PS/2 around when RMI4_SMB is not enabled Or the user might have the touchpad unbound from PS/2 but never picked up by rmi-smbus.ko Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 4f97970abc94..9b27a6c710b2 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1689,7 +1689,8 @@ enum { SYNAPTICS_INTERTOUCH_ON, }; -static int synaptics_intertouch = SYNAPTICS_INTERTOUCH_NOT_SET; +static int synaptics_intertouch = IS_ENABLED(CONFIG_RMI4_SMB) ? + SYNAPTICS_INTERTOUCH_NOT_SET : SYNAPTICS_INTERTOUCH_OFF; module_param_named(synaptics_intertouch, synaptics_intertouch, int, 0644); MODULE_PARM_DESC(synaptics_intertouch, "Use a secondary bus for the Synaptics device."); From f4101ff87dafd22fbcc3547fd3a3a3717d3d72d3 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 26 May 2017 16:21:36 -0700 Subject: [PATCH 126/249] Input: synaptics - warn the users when there is a better mode The Synaptics touchpads are now either using i2c-hid or rmi-smbus. Warn the users if they are missing the rmi-smbus modules and have no chance of reporting correct data. Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 9b27a6c710b2..604aa3aa7284 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -176,6 +176,12 @@ static const char * const smbus_pnp_ids[] = { NULL }; +static const char * const forcepad_pnp_ids[] = { + "SYN300D", + "SYN3014", + NULL +}; + /* * Send a command to the synpatics touchpad by special commands */ @@ -482,13 +488,6 @@ static const struct min_max_quirk min_max_pnpid_table[] = { { } }; -/* This list has been kindly provided by Synaptics. */ -static const char * const forcepad_pnp_ids[] = { - "SYN300D", - "SYN3014", - NULL -}; - /***************************************************************************** * Synaptics communications functions ****************************************************************************/ @@ -1813,6 +1812,15 @@ int synaptics_init(struct psmouse *psmouse) } if (SYN_CAP_INTERTOUCH(info.ext_cap_0c)) { + if ((!IS_ENABLED(CONFIG_RMI4_SMB) || + !IS_ENABLED(CONFIG_MOUSE_PS2_SYNAPTICS_SMBUS)) && + /* Forcepads need F21, which is not ready */ + !psmouse_matches_pnp_id(psmouse, forcepad_pnp_ids)) { + psmouse_warn(psmouse, + "The touchpad can support a better bus than the too old PS/2 protocol. " + "Make sure MOUSE_PS2_SYNAPTICS_SMBUS and RMI4_SMB are enabled to get a better touchpad experience.\n"); + } + error = synaptics_setup_intertouch(psmouse, &info, true); if (!error) return PSMOUSE_SYNAPTICS_SMBUS; From 2fef826e45c6a1e63f55ab72546f7d795300d9a8 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 26 May 2017 16:51:19 -0700 Subject: [PATCH 127/249] Input: synaptics - tell users to report when they should be using rmi-smbus Users should really consider switching to rmi-smbus instead of plain PS/2. Notify them that they should report a missing pnpID in the file. Signed-off-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/synaptics.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 604aa3aa7284..16c30460ef04 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1739,8 +1739,16 @@ static int synaptics_setup_intertouch(struct psmouse *psmouse, if (synaptics_intertouch == SYNAPTICS_INTERTOUCH_NOT_SET) { if (!psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) && - !psmouse_matches_pnp_id(psmouse, smbus_pnp_ids)) + !psmouse_matches_pnp_id(psmouse, smbus_pnp_ids)) { + + if (!psmouse_matches_pnp_id(psmouse, forcepad_pnp_ids)) + psmouse_info(psmouse, + "Your touchpad (%s) says it can support a different bus. " + "If i2c-hid and hid-rmi are not used, you might want to try setting psmouse.synaptics_intertouch to 1 and report this to linux-input@vger.kernel.org.\n", + psmouse->ps2dev.serio->firmware_id); + return -ENXIO; + } } psmouse_info(psmouse, "Trying to set up SMBus access\n"); From 56b177055adb246cdeca174331dbf92fc49bfccd Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 23 May 2017 07:08:43 +0000 Subject: [PATCH 128/249] rcar-dmac: fixup descriptor pointer for descriptor mode In descriptor mode, the descriptor running pointer is not maintained by the interrupt handler, thus, driver finds the running descriptor from the descriptor pointer field in the CHCRB register. But, CHCRB::DPTR indicates *next* descriptor pointer, not current. Thus, The residue calculation will be missed. This patch fixup it. Signed-off-by: Kuninori Morimoto Signed-off-by: Vinod Koul --- drivers/dma/sh/rcar-dmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index db41795fe42a..bd261c9e9664 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1287,6 +1287,9 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan, if (desc->hwdescs.use) { dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) & RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT; + if (dptr == 0) + dptr = desc->nchunks; + dptr--; WARN_ON(dptr >= desc->nchunks); } else { running = desc->running; From 477c50e8dc1c7b5004e099b8b74ca8b70be2f4fd Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Tue, 30 May 2017 11:56:22 +0100 Subject: [PATCH 129/249] drivers/perf: arm_pmu_acpi: avoid perf IRQ init when guest PMU is off We saw perf IRQ init failures when running Linux kernel in an ACPI guest without PMU (i.e. pmu=off). This is because perf IRQ is not present when pmu=off, but arm_pmu_acpi still tries to register or unregister GSI. This patch addresses the problem by checking gicc->performance_interrupt. If it is 0, which is the value set by qemu when pmu=off, we skip the IRQ register/unregister process. [ 4.069470] bc00: 0000000000040b00 ffff0000089db190 [ 4.070267] [] enable_percpu_irq+0xdc/0xe4 [ 4.071192] [] arm_perf_starting_cpu+0x108/0x10c [ 4.072200] [] cpuhp_invoke_callback+0x14c/0x4ac [ 4.073210] [] cpuhp_thread_fun+0xd4/0x11c [ 4.074132] [] smpboot_thread_fn+0x1b4/0x1c4 [ 4.075081] [] kthread+0x10c/0x138 [ 4.075921] [] ret_from_fork+0x10/0x50 [ 4.076947] genirq: Setting trigger mode 4 for irq 43 failed (gic_set_type+0x0/0x74) Signed-off-by: Wei Huang [will: add comment justifying deviation from ACPI spec, removed redundant hunk] Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/perf/arm_pmu_acpi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 34c862f213c7..0a9b78705ee8 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -29,6 +29,17 @@ static int arm_pmu_acpi_register_irq(int cpu) return -EINVAL; gsi = gicc->performance_interrupt; + + /* + * Per the ACPI spec, the MADT cannot describe a PMU that doesn't + * have an interrupt. QEMU advertises this by using a GSI of zero, + * which is not known to be valid on any hardware despite being + * valid per the spec. Take the pragmatic approach and reject a + * GSI of zero for now. + */ + if (!gsi) + return 0; + if (gicc->flags & ACPI_MADT_PERFORMANCE_IRQ_MODE) trigger = ACPI_EDGE_SENSITIVE; else From af622b86665881ffa96bc77fb89c94fae02cfa6b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 23 May 2017 17:49:54 +0200 Subject: [PATCH 130/249] nbd: nbd_reset() call in nbd_dev_add() is redundant There is nothing to clear -- nbd_device has just been allocated. Fold nbd_reset() into its other caller, nbd_config_put(). Signed-off-by: Ilya Dryomov Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9a7bb2c29447..e725d8d5ab0b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -937,14 +937,6 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) return -ENOSPC; } -/* Reset all properties of an NBD device */ -static void nbd_reset(struct nbd_device *nbd) -{ - nbd->config = NULL; - nbd->tag_set.timeout = 0; - queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); -} - static void nbd_bdev_reset(struct block_device *bdev) { if (bdev->bd_openers > 1) @@ -1029,7 +1021,10 @@ static void nbd_config_put(struct nbd_device *nbd) } kfree(config->socks); } - nbd_reset(nbd); + nbd->config = NULL; + + nbd->tag_set.timeout = 0; + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); mutex_unlock(&nbd->config_lock); nbd_put(nbd); @@ -1483,7 +1478,6 @@ static int nbd_dev_add(int index) disk->fops = &nbd_fops; disk->private_data = nbd; sprintf(disk->disk_name, "nbd%d", index); - nbd_reset(nbd); add_disk(disk); nbd_total_devices++; return index; From fa9765323a93473d3853d04c9903958453c92ad4 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 23 May 2017 17:49:55 +0200 Subject: [PATCH 131/249] nbd: don't leak nbd_config nbd_config is allocated in nbd_alloc_config(), but never freed. Fixes: 5ea8d10802ec ("nbd: separate out the config information") Signed-off-by: Ilya Dryomov Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index e725d8d5ab0b..f3f191ba8ca4 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1021,6 +1021,7 @@ static void nbd_config_put(struct nbd_device *nbd) } kfree(config->socks); } + kfree(nbd->config); nbd->config = NULL; nbd->tag_set.timeout = 0; From cbf712792b6e61317b93dd56dd5c0784363c9ac9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 19 May 2017 15:48:51 +0200 Subject: [PATCH 132/249] KVM: nVMX: fix nested_vmx_check_vmptr failure paths under debugging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_skip_emulated_instruction() will return 0 if userspace is single-stepping the guest. kvm_skip_emulated_instruction() uses return status convention of exit handler: 0 means "exit to userspace" and 1 means "continue vm entries". The problem is that nested_vmx_check_vmptr() return status means something else: 0 is ok, 1 is error. This means we would continue executing after a failure. Static checker noticed it because vmptr was not initialized. Reported-by: Dan Carpenter Fixes: 6affcbedcac7 ("KVM: x86: Add kvm_skip_emulated_instruction and use it.") Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 140 ++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 83 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 880f371705bc..9b4b5d6dcd34 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6914,97 +6914,21 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, return 0; } -/* - * This function performs the various checks including - * - if it's 4KB aligned - * - No bits beyond the physical address width are set - * - Returns 0 on success or else 1 - * (Intel SDM Section 30.3) - */ -static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, - gpa_t *vmpointer) +static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) { gva_t gva; - gpa_t vmptr; struct x86_exception e; - struct page *page; - struct vcpu_vmx *vmx = to_vmx(vcpu); - int maxphyaddr = cpuid_maxphyaddr(vcpu); if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) return 1; - if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &vmptr, - sizeof(vmptr), &e)) { + if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer, + sizeof(*vmpointer), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } - switch (exit_reason) { - case EXIT_REASON_VMON: - /* - * SDM 3: 24.11.5 - * The first 4 bytes of VMXON region contain the supported - * VMCS revision identifier - * - * Note - IA32_VMX_BASIC[48] will never be 1 - * for the nested case; - * which replaces physical address width with 32 - * - */ - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - if (*(u32 *)kmap(page) != VMCS12_REVISION) { - kunmap(page); - nested_release_page_clean(page); - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } - kunmap(page); - nested_release_page_clean(page); - vmx->nested.vmxon_ptr = vmptr; - break; - case EXIT_REASON_VMCLEAR: - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } - - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } - break; - case EXIT_REASON_VMPTRLD: - if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { - nested_vmx_failValid(vcpu, - VMXERR_VMPTRLD_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } - - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, - VMXERR_VMPTRLD_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } - break; - default: - return 1; /* shouldn't happen */ - } - - if (vmpointer) - *vmpointer = vmptr; return 0; } @@ -7066,6 +6990,8 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) static int handle_vmon(struct kvm_vcpu *vcpu) { int ret; + gpa_t vmptr; + struct page *page; struct vcpu_vmx *vmx = to_vmx(vcpu); const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -7095,9 +7021,37 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - + + /* + * SDM 3: 24.11.5 + * The first 4 bytes of VMXON region contain the supported + * VMCS revision identifier + * + * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; + * which replaces physical address width with 32 + */ + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + + page = nested_get_page(vcpu, vmptr); + if (page == NULL) { + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + if (*(u32 *)kmap(page) != VMCS12_REVISION) { + kunmap(page); + nested_release_page_clean(page); + nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + kunmap(page); + nested_release_page_clean(page); + + vmx->nested.vmxon_ptr = vmptr; ret = enter_vmx_operation(vcpu); if (ret) return ret; @@ -7213,9 +7167,19 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); + return kvm_skip_emulated_instruction(vcpu); + } + + if (vmptr == vmx->nested.vmxon_ptr) { + nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); + return kvm_skip_emulated_instruction(vcpu); + } + if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); @@ -7545,9 +7509,19 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMPTRLD, &vmptr)) + if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { + nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); + return kvm_skip_emulated_instruction(vcpu); + } + + if (vmptr == vmx->nested.vmxon_ptr) { + nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER); + return kvm_skip_emulated_instruction(vcpu); + } + if (vmx->nested.current_vmptr != vmptr) { struct vmcs12 *new_vmcs12; struct page *page; From 8eae9570d1d3887487be0b355d12656b46fac226 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Tue, 30 May 2017 15:24:45 +0200 Subject: [PATCH 133/249] KVM: SVM: ignore type when setting segment registers Commit 19bca6ab75d8 ("KVM: SVM: Fix cross vendor migration issue with unusable bit") added checking type when setting unusable. So unusable can be set if present is 0 OR type is 0. According to the AMD processor manual, long mode ignores the type value in segment descriptor. And type can be 0 if it is read-only data segment. Therefore type value is not related to unusable flag. This patch is based on linux-next v4.12.0-rc3. Signed-off-by: Gioh Kim Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 183ddb235fb4..a654372efea1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1807,7 +1807,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, * AMD's VMCB does not have an explicit unusable field, so emulate it * for cross vendor migration purposes by "not present" */ - var->unusable = !var->present || (var->type == 0); + var->unusable = !var->present; switch (seg) { case VCPU_SREG_TR: From e4dc2b32df5573b077f6723e01cf761d236d5113 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 30 May 2017 14:39:11 -0400 Subject: [PATCH 134/249] blk-mq: Take tagset lock when updating hw queues The tagset lock needs to be held when iterating the tag_list, so a lockdep assert was added when updating number of hardware queues. The drivers calling this API, however, were unaware of the new requirement, so are failing the assertion. This patch takes the lock within the blk-mq function so the drivers do not have to be modified in order to be safe. Fixes: 705cda97e ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list") Reported-by: Gabriel Krisman Bertazi Reviewed-by: Bart Van Assche Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-mq.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f2224ffd225d..1bcccedcc74f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2641,7 +2641,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) return ret; } -void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) +static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, + int nr_hw_queues) { struct request_queue *q; @@ -2665,6 +2666,13 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_unfreeze_queue(q); } + +void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues) +{ + mutex_lock(&set->tag_list_lock); + __blk_mq_update_nr_hw_queues(set, nr_hw_queues); + mutex_unlock(&set->tag_list_lock); +} EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues); /* Enable polling stats and return whether they were already enabled. */ From f511c0b17b081562dca8ac5061dfa86db4c66cc2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 30 May 2017 12:38:59 -0700 Subject: [PATCH 135/249] "Yes, people use FOLL_FORCE ;)" This effectively reverts commit 8ee74a91ac30 ("proc: try to remove use of FOLL_FORCE entirely") It turns out that people do depend on FOLL_FORCE for the /proc//mem case, and we're talking not just debuggers. Talking to the affected people, the use-cases are: Keno Fischer: "We used these semantics as a hardening mechanism in the julia JIT. By opening /proc/self/mem and using these semantics, we could avoid needing RWX pages, or a dual mapping approach. We do have fallbacks to these other methods (though getting EIO here actually causes an assert in released versions - we'll updated that to make sure to take the fall back in that case). Nevertheless the /proc/self/mem approach was our favored approach because it a) Required an attacker to be able to execute syscalls which is a taller order than getting memory write and b) didn't double the virtual address space requirements (as a dual mapping approach would). I think in general this feature is very useful for anybody who needs to precisely control the execution of some other process. Various debuggers (gdb/lldb/rr) certainly fall into that category, but there's another class of such processes (wine, various emulators) which may want to do that kind of thing. Now, I suspect most of these will have the other process under ptrace control, so maybe allowing (same_mm || ptraced) would be ok, but at least for the sandbox/remote-jit use case, it would be perfectly reasonable to not have the jit server be a ptracer" Robert O'Callahan: "We write to readonly code and data mappings via /proc/.../mem in lots of different situations, particularly when we're adjusting program state during replay to match the recorded execution. Like Julia, we can add workarounds, but they could be expensive." so not only do people use FOLL_FORCE for both reads and writes, but they use it for both the local mm and remote mm. With these comments in mind, we likely also cannot add the "are we actively ptracing" check either, so this keeps the new code organization and does not do a real revert that would add back the original comment about "Maybe we should limit FOLL_FORCE to actual ptrace users?" Reported-by: Keno Fischer Reported-by: Robert O'Callahan Cc: Kees Cook Cc: Andy Lutomirski Cc: Eric Biederman Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 45f6bf68fff3..f1e1927ccd48 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -821,7 +821,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!mmget_not_zero(mm)) goto free; - flags = write ? FOLL_WRITE : 0; + flags = FOLL_FORCE | (write ? FOLL_WRITE : 0); while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); From 08fd5e76c268b7a5353302825b6b63a6027ddd0a Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 27 May 2017 06:50:05 -0700 Subject: [PATCH 136/249] hwmon: (aspeed-pwm-tacho) Select REGMAP The driver uses regmap and thus has to select it to avoid build errors such as the following. drivers/hwmon/aspeed-pwm-tacho.c:337:21: error: variable 'aspeed_pwm_tacho_regmap_config' has initializer but incomplete type Reported-by: kbuild test robot Acked-by: Joel Stanley Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 22d5eafd6815..5ef2814345ef 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -343,6 +343,7 @@ config SENSORS_ASB100 config SENSORS_ASPEED tristate "ASPEED AST2400/AST2500 PWM and Fan tach driver" + select REGMAP help This driver provides support for ASPEED AST2400/AST2500 PWM and Fan Tacho controllers. From 7ed1c5e5ddbb87aef9df7f8a8a714bcffe90f0eb Mon Sep 17 00:00:00 2001 From: Patrick Venture Date: Tue, 30 May 2017 12:42:01 -0700 Subject: [PATCH 137/249] hwmon: (aspeed-pwm-tacho) On read failure return -ETIMEDOUT When the controller fails to provide an RPM reading within the alloted time; the driver returns -ETIMEDOUT and no file contents. Signed-off-by: Patrick Venture Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Signed-off-by: Guenter Roeck --- drivers/hwmon/aspeed-pwm-tacho.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index 48403a2115be..12b716b70ead 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -494,7 +495,7 @@ static u32 aspeed_get_fan_tach_ch_measure_period(struct aspeed_pwm_tacho_data return clk / (clk_unit * div_h * div_l * tacho_div * tacho_unit); } -static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, +static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, u8 fan_tach_ch) { u32 raw_data, tach_div, clk_source, sec, val; @@ -510,6 +511,9 @@ static u32 aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, msleep(sec); regmap_read(priv->regmap, ASPEED_PTCR_RESULT, &val); + if (!(val & RESULT_STATUS_MASK)) + return -ETIMEDOUT; + raw_data = val & RESULT_VALUE_MASK; tach_div = priv->type_fan_tach_clock_division[type]; tach_div = 0x4 << (tach_div * 2); @@ -561,12 +565,14 @@ static ssize_t show_rpm(struct device *dev, struct device_attribute *attr, { struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); int index = sensor_attr->index; - u32 rpm; + int rpm; struct aspeed_pwm_tacho_data *priv = dev_get_drvdata(dev); rpm = aspeed_get_fan_tach_ch_rpm(priv, index); + if (rpm < 0) + return rpm; - return sprintf(buf, "%u\n", rpm); + return sprintf(buf, "%d\n", rpm); } static umode_t pwm_is_visible(struct kobject *kobj, From 64188cfbe5245d412de2139a3864e4e00b4136f0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 May 2017 09:23:41 +0200 Subject: [PATCH 138/249] Revert "ALSA: usb-audio: purge needless variable length array" This reverts commit 89b593c30e83 ("ALSA: usb-audio: purge needless variable length array"). The patch turned out to cause a severe regression, triggering an Oops at snd_usb_ctl_msg(). It was overseen that snd_usb_ctl_msg() writes back the response to the given buffer, while the patch changed it to a read-only const buffer. (One should always double-check when an extra pointer cast is present...) As a simple fix, just revert the affected commit. It was merely a cleanup. Although it brings VLA again, it's clearer as a fix. We'll address the VLA later in another patch. Fixes: 89b593c30e83 ("ALSA: usb-audio: purge needless variable length array") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195875 Cc: # v4.11+ Signed-off-by: Takashi Iwai --- sound/usb/mixer_us16x08.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index dc48eedea92e..29d2c9282987 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -698,12 +698,12 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, struct snd_usb_audio *chip = elem->head.mixer->chip; struct snd_us16x08_meter_store *store = elem->private_data; u8 meter_urb[64]; - char tmp[sizeof(mix_init_msg2)] = {0}; + char tmp[max(sizeof(mix_init_msg1), sizeof(mix_init_msg2))]; switch (kcontrol->private_value) { case 0: - snd_us16x08_send_urb(chip, (char *)mix_init_msg1, - sizeof(mix_init_msg1)); + memcpy(tmp, mix_init_msg1, sizeof(mix_init_msg1)); + snd_us16x08_send_urb(chip, tmp, 4); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value++; @@ -721,7 +721,7 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, case 3: memcpy(tmp, mix_init_msg2, sizeof(mix_init_msg2)); tmp[2] = snd_get_meter_comp_index(store); - snd_us16x08_send_urb(chip, tmp, sizeof(mix_init_msg2)); + snd_us16x08_send_urb(chip, tmp, 10); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value = 0; From 617163fc2580da3d489b6c1bacb6312e0e2aac02 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 May 2017 23:21:07 +0200 Subject: [PATCH 139/249] ALSA: usb: Fix a typo in Tascam US-16x08 mixer element A mixer element created in a quirk for Tascam US-16x08 contains a typo: it should be "EQ MidLow Q" instead of "EQ MidQLow Q". Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195875 Fixes: d2bb390a2081 ("ALSA: usb-audio: Tascam US-16x08 DSP mixer quirk") Cc: # v4.11+ Signed-off-by: Takashi Iwai --- sound/usb/mixer_us16x08.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index 29d2c9282987..442d8f7998e3 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -1135,7 +1135,7 @@ static const struct snd_us16x08_control_params eq_controls[] = { .control_id = SND_US16X08_ID_EQLOWMIDWIDTH, .type = USB_MIXER_U8, .num_channels = 16, - .name = "EQ MidQLow Q", + .name = "EQ MidLow Q", }, { /* EQ mid high gain */ .kcontrol_new = &snd_us16x08_eq_gain_ctl, From e49a14fa36aeb72476e038fcde96c151b7e4ecc8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 May 2017 23:24:45 +0200 Subject: [PATCH 140/249] ALSA: usb: Avoid VLA in mixer_us16x08.c This is another attempt to work around the VLA used in mixer_us16x08.c. Basically the temporary array is used individually for two cases, and we can declare locally in each block, instead of hackish max() usage. Signed-off-by: Takashi Iwai --- sound/usb/mixer_us16x08.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/usb/mixer_us16x08.c b/sound/usb/mixer_us16x08.c index 442d8f7998e3..26ed23b18b77 100644 --- a/sound/usb/mixer_us16x08.c +++ b/sound/usb/mixer_us16x08.c @@ -698,16 +698,18 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, struct snd_usb_audio *chip = elem->head.mixer->chip; struct snd_us16x08_meter_store *store = elem->private_data; u8 meter_urb[64]; - char tmp[max(sizeof(mix_init_msg1), sizeof(mix_init_msg2))]; switch (kcontrol->private_value) { - case 0: + case 0: { + char tmp[sizeof(mix_init_msg1)]; + memcpy(tmp, mix_init_msg1, sizeof(mix_init_msg1)); snd_us16x08_send_urb(chip, tmp, 4); snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); kcontrol->private_value++; break; + } case 1: snd_us16x08_recv_urb(chip, meter_urb, sizeof(meter_urb)); @@ -718,7 +720,9 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, sizeof(meter_urb)); kcontrol->private_value++; break; - case 3: + case 3: { + char tmp[sizeof(mix_init_msg2)]; + memcpy(tmp, mix_init_msg2, sizeof(mix_init_msg2)); tmp[2] = snd_get_meter_comp_index(store); snd_us16x08_send_urb(chip, tmp, 10); @@ -727,6 +731,7 @@ static int snd_us16x08_meter_get(struct snd_kcontrol *kcontrol, kcontrol->private_value = 0; break; } + } for (set = 0; set < 6; set++) get_meter_levels_from_urb(set, store, meter_urb); From ff0361b34ac63ef80c785c32d62e0e9d89a2cf89 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 31 May 2017 09:44:32 +0200 Subject: [PATCH 141/249] dm: make flush bios explicitly sync Commit b685d3d65ac7 ("block: treat REQ_FUA and REQ_PREFLUSH as synchronous") removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions. Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. Fixes: b685d3d65ac7 ("block: treat REQ_FUA and REQ_PREFLUSH as synchronous") Cc: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 2 +- drivers/md/dm-integrity.c | 3 ++- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-snap-persistent.c | 3 ++- drivers/md/dm.c | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index cd8139593ccd..840c1496b2b1 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1334,7 +1334,7 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = REQ_PREFLUSH, + .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 1feeb2ccf5a1..7910bfe50da4 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -783,7 +783,8 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi for (i = 0; i < commit_sections; i++) rw_section_mac(ic, commit_start + i, true); } - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, commit_sections, &io_comp); + rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start, + commit_sections, &io_comp); } else { unsigned to_end; io_comp.in_flight = (atomic_t)ATOMIC_INIT(2); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index a95cbb80fb34..e61c45047c25 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -260,7 +260,7 @@ static int mirror_flush(struct dm_target *ti) struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, - .bi_op_flags = REQ_PREFLUSH, + .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = ms->io_client, diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index b93476c3ba3f..c5534d294773 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -741,7 +741,8 @@ static void persistent_commit_exception(struct dm_exception_store *store, /* * Commit exceptions to disk. */ - if (ps->valid && area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA)) + if (ps->valid && area_io(ps, REQ_OP_WRITE, + REQ_PREFLUSH | REQ_FUA | REQ_SYNC)) ps->valid = 0; /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6ef9500226c0..37ccd73c79ec 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1657,7 +1657,7 @@ static struct mapped_device *alloc_dev(int minor) bio_init(&md->flush_bio, NULL, 0); md->flush_bio.bi_bdev = md->bdev; - md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; dm_stats_init(&md->stats); From 63db7c815bc0997c29e484d2409684fdd9fcd93b Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 31 May 2017 08:22:52 -0700 Subject: [PATCH 142/249] xfs: use ->b_state to fix buffer I/O accounting release race We've had user reports of unmount hangs in xfs_wait_buftarg() that analysis shows is due to btp->bt_io_count == -1. bt_io_count represents the count of in-flight asynchronous buffers and thus should always be >= 0. xfs_wait_buftarg() waits for this value to stabilize to zero in order to ensure that all untracked (with respect to the lru) buffers have completed I/O processing before unmount proceeds to tear down in-core data structures. The value of -1 implies an I/O accounting decrement race. Indeed, the fact that xfs_buf_ioacct_dec() is called from xfs_buf_rele() (where the buffer lock is no longer held) means that bp->b_flags can be updated from an unsafe context. While a user-level reproducer is currently not available, some intrusive hacks to run racing buffer lookups/ioacct/releases from multiple threads was used to successfully manufacture this problem. Existing callers do not expect to acquire the buffer lock from xfs_buf_rele(). Therefore, we can not safely update ->b_flags from this context. It turns out that we already have separate buffer state bits and associated serialization for dealing with buffer LRU state in the form of ->b_state and ->b_lock. Therefore, replace the _XBF_IN_FLIGHT flag with a ->b_state variant, update the I/O accounting wrappers appropriately and make sure they are used with the correct locking. This ensures that buffer in-flight state can be modified at buffer release time without racing with modifications from a buffer lock holder. Fixes: 9c7504aa72b6 ("xfs: track and serialize in-flight async buffers against unmount") Cc: # v4.8+ Signed-off-by: Brian Foster Reviewed-by: Nikolay Borisov Tested-by: Libor Pechacek Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 38 ++++++++++++++++++++++++++------------ fs/xfs/xfs_buf.h | 5 ++--- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 62fa39276a24..07b77b73b024 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -97,27 +97,41 @@ static inline void xfs_buf_ioacct_inc( struct xfs_buf *bp) { - if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT)) + if (bp->b_flags & XBF_NO_IOACCT) return; ASSERT(bp->b_flags & XBF_ASYNC); - bp->b_flags |= _XBF_IN_FLIGHT; - percpu_counter_inc(&bp->b_target->bt_io_count); + spin_lock(&bp->b_lock); + if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { + bp->b_state |= XFS_BSTATE_IN_FLIGHT; + percpu_counter_inc(&bp->b_target->bt_io_count); + } + spin_unlock(&bp->b_lock); } /* * Clear the in-flight state on a buffer about to be released to the LRU or * freed and unaccount from the buftarg. */ +static inline void +__xfs_buf_ioacct_dec( + struct xfs_buf *bp) +{ + ASSERT(spin_is_locked(&bp->b_lock)); + + if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { + bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; + percpu_counter_dec(&bp->b_target->bt_io_count); + } +} + static inline void xfs_buf_ioacct_dec( struct xfs_buf *bp) { - if (!(bp->b_flags & _XBF_IN_FLIGHT)) - return; - - bp->b_flags &= ~_XBF_IN_FLIGHT; - percpu_counter_dec(&bp->b_target->bt_io_count); + spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + spin_unlock(&bp->b_lock); } /* @@ -149,9 +163,9 @@ xfs_buf_stale( * unaccounted (released to LRU) before that occurs. Drop in-flight * status now to preserve accounting consistency. */ - xfs_buf_ioacct_dec(bp); - spin_lock(&bp->b_lock); + __xfs_buf_ioacct_dec(bp); + atomic_set(&bp->b_lru_ref, 0); if (!(bp->b_state & XFS_BSTATE_DISPOSE) && (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) @@ -979,12 +993,12 @@ xfs_buf_rele( * ensures the decrement occurs only once per-buf. */ if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); goto out_unlock; } /* the last reference has been dropped ... */ - xfs_buf_ioacct_dec(bp); + __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* * If the buffer is added to the LRU take a new reference to the diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 8d1d44f87ce9..1508121f29f2 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -63,7 +63,6 @@ typedef enum { #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */ -#define _XBF_IN_FLIGHT (1 << 25) /* I/O in flight, for accounting purposes */ typedef unsigned int xfs_buf_flags_t; @@ -84,14 +83,14 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" }, \ - { _XBF_IN_FLIGHT, "IN_FLIGHT" } + { _XBF_COMPOUND, "COMPOUND" } /* * Internal state flags. */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ +#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ /* * The xfs_buftarg contains 2 notions of "sector size" - From 5be6b75610cefd1e21b98a218211922c2feb6e08 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 1 Mar 2017 09:02:33 +0800 Subject: [PATCH 143/249] cfq-iosched: fix the delay of cfq_group's vdisktime under iops mode When adding a cfq_group into the cfq service tree, we use CFQ_IDLE_DELAY as the delay of cfq_group's vdisktime if there have been other cfq_groups already. When cfq is under iops mode, commit 9a7f38c42c2b ("cfq-iosched: Convert from jiffies to nanoseconds") could result in a large iops delay and lead to an abnormal io schedule delay for the added cfq_group. To fix it, we just need to revert to the old CFQ_IDLE_DELAY value: HZ / 5 when iops mode is enabled. Despite having the same value, the delay of a cfq_queue in idle class and the delay of cfq_group are different things, so I define two new macros for the delay of a cfq_group under time-slice mode and iops mode. Fixes: 9a7f38c42c2b ("cfq-iosched: Convert from jiffies to nanoseconds") Cc: # 4.8+ Signed-off-by: Hou Tao Acked-by: Jan Kara Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index da69b079725f..b7e9c7feeab2 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -38,9 +38,13 @@ static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */ static const int cfq_hist_divisor = 4; /* - * offset from end of service tree + * offset from end of queue service tree for idle class */ #define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service tree under time slice mode */ +#define CFQ_SLICE_MODE_GROUP_DELAY (NSEC_PER_SEC / 5) +/* offset from end of group service under IOPS mode */ +#define CFQ_IOPS_MODE_GROUP_DELAY (HZ / 5) /* * below this threshold, we consider thinktime immediate @@ -1362,6 +1366,14 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) cfqg->vfraction = max_t(unsigned, vfr, 1); } +static inline u64 cfq_get_cfqg_vdisktime_delay(struct cfq_data *cfqd) +{ + if (!iops_mode(cfqd)) + return CFQ_SLICE_MODE_GROUP_DELAY; + else + return CFQ_IOPS_MODE_GROUP_DELAY; +} + static void cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) { @@ -1381,7 +1393,8 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) n = rb_last(&st->rb); if (n) { __cfqg = rb_entry_cfqg(n); - cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; + cfqg->vdisktime = __cfqg->vdisktime + + cfq_get_cfqg_vdisktime_delay(cfqd); } else cfqg->vdisktime = st->min_vdisktime; cfq_group_service_tree_add(st, cfqg); From 5a8948f8a32ba56c17b3fb75d318ac98157f3ba5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 31 May 2017 09:44:33 +0200 Subject: [PATCH 144/249] md: Make flush bios explicitely sync Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. CC: linux-raid@vger.kernel.org CC: Shaohua Li Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- drivers/md/raid5-cache.c | 4 ++-- drivers/md/raid5-ppl.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 10367ffe92e3..212a6777ff31 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -765,7 +765,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, test_bit(FailFast, &rdev->flags) && !test_bit(LastDev, &rdev->flags)) ff = MD_FAILFAST; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff; + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA | ff; atomic_inc(&mddev->pending_writes); submit_bio(bio); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 4c00bc248287..0a7af8b0a80a 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1782,7 +1782,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, mb, PAGE_SIZE)); if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE, - REQ_FUA, false)) { + REQ_SYNC | REQ_FUA, false)) { __free_page(page); return -EIO; } @@ -2388,7 +2388,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, mb, PAGE_SIZE)); sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, - REQ_OP_WRITE, REQ_FUA, false); + REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false); sh->log_start = ctx->pos; list_add_tail(&sh->r5c, &log->stripe_in_journal_list); atomic_inc(&log->stripe_in_journal_count); diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 5d25bebf3328..ccce92e68d7f 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -907,8 +907,8 @@ static int ppl_write_empty_header(struct ppl_log *log) pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE)); if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, - PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_FUA, 0, - false)) { + PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC | + REQ_FUA, 0, false)) { md_error(rdev->mddev, rdev); ret = -EIO; } From 6ea44adce91526700535b3150f77f8639ae8c82d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 May 2017 17:00:32 +1000 Subject: [PATCH 145/249] SUNRPC: ensure correct error is reported by xs_tcp_setup_socket() If you attempt a TCP mount from an host that is unreachable in a way that triggers an immediate error from kernel_connect(), that error does not propagate up, instead EAGAIN is reported. This results in call_connect_status receiving the wrong error. A case that it easy to demonstrate is to attempt to mount from an address that results in ENETUNREACH, but first deleting any default route. Without this patch, the mount.nfs process is persistently runnable and is hard to kill. With this patch it exits as it should. The problem is caused by the fact that xs_tcp_force_close() eventually calls xprt_wake_pending_tasks(xprt, -EAGAIN); which causes an error return of -EAGAIN. so when xs_tcp_setup_sock() calls xprt_wake_pending_tasks(xprt, status); the status is ignored. Fixes: 4efdd92c9211 ("SUNRPC: Remove TCP client connection reset hack") Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 16aff8ddc16f..d5b54c020dec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2432,7 +2432,12 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ENETUNREACH: case -EADDRINUSE: case -ENOBUFS: - /* retry with existing socket, after a delay */ + /* + * xs_tcp_force_close() wakes tasks with -EIO. + * We need to wake them first to ensure the + * correct error code. + */ + xprt_wake_pending_tasks(xprt, status); xs_tcp_force_close(xprt); goto out; } From 45cc6586b7a73e84a8806881122b6ec306cdc9e7 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Mon, 29 May 2017 13:13:59 -0400 Subject: [PATCH 146/249] drm/amdgpu: Program ring for vce instance 1 at its register space We need program ring buffer on instance 1 register space domain, when only if instance 1 available, with two instances or instance 0, and we need only program instance 0 regsiter space domain for ring. Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 95 +++++++++++++++++++-------- 1 file changed, 68 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index fb0819359909..90332f55cfba 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -77,13 +77,26 @@ static int vce_v3_0_set_clockgating_state(void *handle, static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + u32 v; + + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); if (ring == &adev->vce.ring[0]) - return RREG32(mmVCE_RB_RPTR); + v = RREG32(mmVCE_RB_RPTR); else if (ring == &adev->vce.ring[1]) - return RREG32(mmVCE_RB_RPTR2); + v = RREG32(mmVCE_RB_RPTR2); else - return RREG32(mmVCE_RB_RPTR3); + v = RREG32(mmVCE_RB_RPTR3); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); + + return v; } /** @@ -96,13 +109,26 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + u32 v; + + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); if (ring == &adev->vce.ring[0]) - return RREG32(mmVCE_RB_WPTR); + v = RREG32(mmVCE_RB_WPTR); else if (ring == &adev->vce.ring[1]) - return RREG32(mmVCE_RB_WPTR2); + v = RREG32(mmVCE_RB_WPTR2); else - return RREG32(mmVCE_RB_WPTR3); + v = RREG32(mmVCE_RB_WPTR3); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); + + return v; } /** @@ -116,12 +142,22 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); + if (ring == &adev->vce.ring[0]) WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else if (ring == &adev->vce.ring[1]) WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); else WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); } static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) @@ -231,33 +267,38 @@ static int vce_v3_0_start(struct amdgpu_device *adev) struct amdgpu_ring *ring; int idx, r; - ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); - - ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); - - ring = &adev->vce.ring[2]; - WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); - mutex_lock(&adev->grbm_idx_mutex); for (idx = 0; idx < 2; ++idx) { if (adev->vce.harvest_config & (1 << idx)) continue; WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); + + /* Program instance 0 reg space for two instances or instance 0 case + program instance 1 reg space for only instance 1 available case */ + if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) { + ring = &adev->vce.ring[0]; + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vce.ring[1]; + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); + + ring = &adev->vce.ring[2]; + WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); + } + vce_v3_0_mc_resume(adev, idx); WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1); From 25cdda95fda78d22d44157da15aa7ea34be3c804 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 24 May 2017 21:47:09 -0700 Subject: [PATCH 147/249] iscsi-target: Fix initial login PDU asynchronous socket close OOPs This patch fixes a OOPs originally introduced by: commit bb048357dad6d604520c91586334c9c230366a14 Author: Nicholas Bellinger Date: Thu Sep 5 14:54:04 2013 -0700 iscsi-target: Add sk->sk_state_change to cleanup after TCP failure which would trigger a NULL pointer dereference when a TCP connection was closed asynchronously via iscsi_target_sk_state_change(), but only when the initial PDU processing in iscsi_target_do_login() from iscsi_np process context was blocked waiting for backend I/O to complete. To address this issue, this patch makes the following changes. First, it introduces some common helper functions used for checking socket closing state, checking login_flags, and atomically checking socket closing state + setting login_flags. Second, it introduces a LOGIN_FLAGS_INITIAL_PDU bit to know when a TCP connection has dropped via iscsi_target_sk_state_change(), but the initial PDU processing within iscsi_target_do_login() in iscsi_np context is still running. For this case, it sets LOGIN_FLAGS_CLOSED, but doesn't invoke schedule_delayed_work(). The original NULL pointer dereference case reported by MNC is now handled by iscsi_target_do_login() doing a iscsi_target_sk_check_close() before transitioning to FFP to determine when the socket has already closed, or iscsi_target_start_negotiation() if the login needs to exchange more PDUs (eg: iscsi_target_do_login returned 0) but the socket has closed. For both of these cases, the cleanup up of remaining connection resources will occur in iscsi_target_start_negotiation() from iscsi_np process context once the failure is detected. Finally, to handle to case where iscsi_target_sk_state_change() is called after the initial PDU procesing is complete, it now invokes conn->login_work -> iscsi_target_do_login_rx() to perform cleanup once existing iscsi_target_sk_check_close() checks detect connection failure. For this case, the cleanup of remaining connection resources will occur in iscsi_target_do_login_rx() from delayed workqueue process context once the failure is detected. Reported-by: Mike Christie Reviewed-by: Mike Christie Tested-by: Mike Christie Cc: Mike Christie Reported-by: Hannes Reinecke Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Varun Prakash Cc: # v3.12+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_nego.c | 196 +++++++++++++++-------- include/target/iscsi/iscsi_target_core.h | 1 + 2 files changed, 134 insertions(+), 63 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 7ccc9c1cbfd1..6f88b31242b0 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -493,14 +493,60 @@ static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn) static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *); -static bool iscsi_target_sk_state_check(struct sock *sk) +static bool __iscsi_target_sk_check_close(struct sock *sk) { if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) { - pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE," + pr_debug("__iscsi_target_sk_check_close: TCP_CLOSE_WAIT|TCP_CLOSE," "returning FALSE\n"); - return false; + return true; } - return true; + return false; +} + +static bool iscsi_target_sk_check_close(struct iscsi_conn *conn) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_flag(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + read_lock_bh(&sk->sk_callback_lock); + state = test_bit(flag, &conn->login_flags); + read_unlock_bh(&sk->sk_callback_lock); + } + return state; +} + +static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned int flag) +{ + bool state = false; + + if (conn->sock) { + struct sock *sk = conn->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + state = (__iscsi_target_sk_check_close(sk) || + test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)); + if (!state) + clear_bit(flag, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + } + return state; } static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login) @@ -540,6 +586,20 @@ static void iscsi_target_do_login_rx(struct work_struct *work) pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n", conn, current->comm, current->pid); + /* + * If iscsi_target_do_login_rx() has been invoked by ->sk_data_ready() + * before initial PDU processing in iscsi_target_start_negotiation() + * has completed, go ahead and retry until it's cleared. + * + * Otherwise if the TCP connection drops while this is occuring, + * iscsi_target_start_negotiation() will detect the failure, call + * cancel_delayed_work_sync(&conn->login_work), and cleanup the + * remaining iscsi connection resources from iscsi_np process context. + */ + if (iscsi_target_sk_check_flag(conn, LOGIN_FLAGS_INITIAL_PDU)) { + schedule_delayed_work(&conn->login_work, msecs_to_jiffies(10)); + return; + } spin_lock(&tpg->tpg_state_lock); state = (tpg->tpg_state == TPG_STATE_ACTIVE); @@ -547,26 +607,12 @@ static void iscsi_target_do_login_rx(struct work_struct *work) if (!state) { pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; + goto err; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (iscsi_target_sk_check_close(conn)) { + pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n"); + goto err; } conn->login_kworker = current; @@ -584,34 +630,29 @@ static void iscsi_target_do_login_rx(struct work_struct *work) flush_signals(current); conn->login_kworker = NULL; - if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); - return; - } + if (rc < 0) + goto err; pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n", conn, current->comm, current->pid); rc = iscsi_target_do_login(conn, login); if (rc < 0) { - iscsi_target_restore_sock_callbacks(conn); - iscsi_target_login_drop(conn, login); - iscsit_deaccess_np(np, tpg, tpg_np); + goto err; } else if (!rc) { - if (conn->sock) { - struct sock *sk = conn->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags); - write_unlock_bh(&sk->sk_callback_lock); - } + if (iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_READ_ACTIVE)) + goto err; } else if (rc == 1) { iscsi_target_nego_release(conn); iscsi_post_login_handler(np, conn, zero_tsih); iscsit_deaccess_np(np, tpg, tpg_np); } + return; + +err: + iscsi_target_restore_sock_callbacks(conn); + iscsi_target_login_drop(conn, login); + iscsit_deaccess_np(np, tpg, tpg_np); } static void iscsi_target_do_cleanup(struct work_struct *work) @@ -659,31 +700,54 @@ static void iscsi_target_sk_state_change(struct sock *sk) orig_state_change(sk); return; } + state = __iscsi_target_sk_check_close(sk); + pr_debug("__iscsi_target_sk_close_change: state: %d\n", state); + if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change" " conn: %p\n", conn); + if (state) + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } - if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { + if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) { pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n", conn); write_unlock_bh(&sk->sk_callback_lock); orig_state_change(sk); return; } - - state = iscsi_target_sk_state_check(sk); - write_unlock_bh(&sk->sk_callback_lock); - - pr_debug("iscsi_target_sk_state_change: state: %d\n", state); - - if (!state) { + /* + * If the TCP connection has dropped, go ahead and set LOGIN_FLAGS_CLOSED, + * but only queue conn->login_work -> iscsi_target_do_login_rx() + * processing if LOGIN_FLAGS_INITIAL_PDU has already been cleared. + * + * When iscsi_target_do_login_rx() runs, iscsi_target_sk_check_close() + * will detect the dropped TCP connection from delayed workqueue context. + * + * If LOGIN_FLAGS_INITIAL_PDU is still set, which means the initial + * iscsi_target_start_negotiation() is running, iscsi_target_do_login() + * via iscsi_target_sk_check_close() or iscsi_target_start_negotiation() + * via iscsi_target_sk_check_and_clear() is responsible for detecting the + * dropped TCP connection in iscsi_np process context, and cleaning up + * the remaining iscsi connection resources. + */ + if (state) { pr_debug("iscsi_target_sk_state_change got failed state\n"); - schedule_delayed_work(&conn->login_cleanup_work, 0); + set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags); + state = test_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); + write_unlock_bh(&sk->sk_callback_lock); + + orig_state_change(sk); + + if (!state) + schedule_delayed_work(&conn->login_work, 0); return; } + write_unlock_bh(&sk->sk_callback_lock); + orig_state_change(sk); } @@ -946,6 +1010,15 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo if (iscsi_target_handle_csg_one(conn, login) < 0) return -1; if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + /* + * Check to make sure the TCP connection has not + * dropped asynchronously while session reinstatement + * was occuring in this kthread context, before + * transitioning to full feature phase operation. + */ + if (iscsi_target_sk_check_close(conn)) + return -1; + login->tsih = conn->sess->tsih; login->login_complete = 1; iscsi_target_restore_sock_callbacks(conn); @@ -972,21 +1045,6 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo break; } - if (conn->sock) { - struct sock *sk = conn->sock->sk; - bool state; - - read_lock_bh(&sk->sk_callback_lock); - state = iscsi_target_sk_state_check(sk); - read_unlock_bh(&sk->sk_callback_lock); - - if (!state) { - pr_debug("iscsi_target_do_login() failed state for" - " conn: %p\n", conn); - return -1; - } - } - return 0; } @@ -1255,10 +1313,22 @@ int iscsi_target_start_negotiation( write_lock_bh(&sk->sk_callback_lock); set_bit(LOGIN_FLAGS_READY, &conn->login_flags); + set_bit(LOGIN_FLAGS_INITIAL_PDU, &conn->login_flags); write_unlock_bh(&sk->sk_callback_lock); } - + /* + * If iscsi_target_do_login returns zero to signal more PDU + * exchanges are required to complete the login, go ahead and + * clear LOGIN_FLAGS_INITIAL_PDU but only if the TCP connection + * is still active. + * + * Otherwise if TCP connection dropped asynchronously, go ahead + * and perform connection cleanup now. + */ ret = iscsi_target_do_login(conn, login); + if (!ret && iscsi_target_sk_check_and_clear(conn, LOGIN_FLAGS_INITIAL_PDU)) + ret = -1; + if (ret < 0) { cancel_delayed_work_sync(&conn->login_work); cancel_delayed_work_sync(&conn->login_cleanup_work); diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 275581d483dd..5f17fb770477 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -557,6 +557,7 @@ struct iscsi_conn { #define LOGIN_FLAGS_READ_ACTIVE 1 #define LOGIN_FLAGS_CLOSED 2 #define LOGIN_FLAGS_READY 4 +#define LOGIN_FLAGS_INITIAL_PDU 8 unsigned long login_flags; struct delayed_work login_work; struct delayed_work login_cleanup_work; From 5e0cf5e6c43b9e19fc0284f69e5cd2b4a47523b0 Mon Sep 17 00:00:00 2001 From: Jiang Yi Date: Tue, 16 May 2017 17:57:55 +0800 Subject: [PATCH 148/249] iscsi-target: Always wait for kthread_should_stop() before kthread exit There are three timing problems in the kthread usages of iscsi_target_mod: - np_thread of struct iscsi_np - rx_thread and tx_thread of struct iscsi_conn In iscsit_close_connection(), it calls send_sig(SIGINT, conn->tx_thread, 1); kthread_stop(conn->tx_thread); In conn->tx_thread, which is iscsi_target_tx_thread(), when it receive SIGINT the kthread will exit without checking the return value of kthread_should_stop(). So if iscsi_target_tx_thread() exit right between send_sig(SIGINT...) and kthread_stop(...), the kthread_stop() will try to stop an already stopped kthread. This is invalid according to the documentation of kthread_stop(). (Fix -ECONNRESET logout handling in iscsi_target_tx_thread and early iscsi_target_rx_thread failure case - nab) Signed-off-by: Jiang Yi Cc: # v3.12+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 30 ++++++++++++++++++----- drivers/target/iscsi/iscsi_target_erl0.c | 6 ++++- drivers/target/iscsi/iscsi_target_erl0.h | 2 +- drivers/target/iscsi/iscsi_target_login.c | 4 +++ 4 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 26a9bcd5ee6a..0d8f81591bed 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3790,6 +3790,8 @@ int iscsi_target_tx_thread(void *arg) { int ret = 0; struct iscsi_conn *conn = arg; + bool conn_freed = false; + /* * Allow ourselves to be interrupted by SIGINT so that a * connection recovery / failure event can be triggered externally. @@ -3815,12 +3817,14 @@ int iscsi_target_tx_thread(void *arg) goto transport_err; ret = iscsit_handle_response_queue(conn); - if (ret == 1) + if (ret == 1) { goto get_immediate; - else if (ret == -ECONNRESET) + } else if (ret == -ECONNRESET) { + conn_freed = true; goto out; - else if (ret < 0) + } else if (ret < 0) { goto transport_err; + } } transport_err: @@ -3830,8 +3834,13 @@ int iscsi_target_tx_thread(void *arg) * responsible for cleaning up the early connection failure. */ if (conn->conn_state != TARG_CONN_STATE_IN_LOGIN) - iscsit_take_action_for_connection_exit(conn); + iscsit_take_action_for_connection_exit(conn, &conn_freed); out: + if (!conn_freed) { + while (!kthread_should_stop()) { + msleep(100); + } + } return 0; } @@ -4004,6 +4013,7 @@ int iscsi_target_rx_thread(void *arg) { int rc; struct iscsi_conn *conn = arg; + bool conn_freed = false; /* * Allow ourselves to be interrupted by SIGINT so that a @@ -4016,7 +4026,7 @@ int iscsi_target_rx_thread(void *arg) */ rc = wait_for_completion_interruptible(&conn->rx_login_comp); if (rc < 0 || iscsi_target_check_conn_state(conn)) - return 0; + goto out; if (!conn->conn_transport->iscsit_get_rx_pdu) return 0; @@ -4025,7 +4035,15 @@ int iscsi_target_rx_thread(void *arg) if (!signal_pending(current)) atomic_set(&conn->transport_failed, 1); - iscsit_take_action_for_connection_exit(conn); + iscsit_take_action_for_connection_exit(conn, &conn_freed); + +out: + if (!conn_freed) { + while (!kthread_should_stop()) { + msleep(100); + } + } + return 0; } diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index 9a96e17bf7cd..7fe2aa73cff6 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -930,8 +930,10 @@ static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn) } } -void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) +void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn, bool *conn_freed) { + *conn_freed = false; + spin_lock_bh(&conn->state_lock); if (atomic_read(&conn->connection_exit)) { spin_unlock_bh(&conn->state_lock); @@ -942,6 +944,7 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) { spin_unlock_bh(&conn->state_lock); iscsit_close_connection(conn); + *conn_freed = true; return; } @@ -955,4 +958,5 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) spin_unlock_bh(&conn->state_lock); iscsit_handle_connection_cleanup(conn); + *conn_freed = true; } diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h index 60e69e2af6ed..3822d9cd1230 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.h +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -15,6 +15,6 @@ extern int iscsit_stop_time2retain_timer(struct iscsi_session *); extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *); extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); extern void iscsit_fall_back_to_erl0(struct iscsi_session *); -extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *); +extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *, bool *); #endif /*** ISCSI_TARGET_ERL0_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 66238477137b..92b96b51d506 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1464,5 +1464,9 @@ int iscsi_target_login_thread(void *arg) break; } + while (!kthread_should_stop()) { + msleep(100); + } + return 0; } From d2c3b14e1f0dcebdb695617c0c1342a36b914a47 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 1 Jun 2017 09:35:30 +0200 Subject: [PATCH 149/249] ALSA: hda - Fix applying MSI dual-codec mobo quirk The previous commit [63691587f7b0: ALSA: hda - Apply dual-codec quirk for MSI Z270-Gaming mobo] attempted to apply the existing dual-codec quirk for a MSI mobo. But it turned out that this isn't applied properly due to the MSI-vendor quirk before this entry. I overlooked such two MSI entries just because they were put in the wrong position, although we have a list ordered by PCI SSID numbers. This patch fixes it by rearranging the unordered entries. Fixes: 63691587f7b0 ("ALSA: hda - Apply dual-codec quirk for MSI Z270-Gaming mobo") Reported-by: Rudolf Schmidt Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 918e45268915..a57988d617e9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2324,11 +2324,11 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), - SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), - SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), + SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), From d9c1b5431d5f0e07575db785a022bce91051ac1d Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Thu, 1 Jun 2017 10:55:03 +0200 Subject: [PATCH 150/249] KVM: SVM: do not zero out segment attributes if segment is unusable or not present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a fix for the problem [1], where VMCB.CPL was set to 0 and interrupt was taken on userspace stack. The root cause lies in the specific AMD CPU behaviour which manifests itself as unusable segment attributes on SYSRET. The corresponding work around for the kernel is the following: 61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue") In other turn virtualization side treated unusable segment incorrectly and restored CPL from SS attributes, which were zeroed out few lines above. In current patch it is assured only that P bit is cleared in VMCB.save state and segment attributes are not zeroed out if segment is not presented or is unusable, therefore CPL can be safely restored from DPL field. This is only one part of the fix, since QEMU side should be fixed accordingly not to zero out attributes on its side. Corresponding patch will follow. [1] Message id: CAJrWOzD6Xq==b-zYCDdFLgSRMPM-NkNuTSDFEtX=7MreT45i7Q@mail.gmail.com Signed-off-by: Roman Pen Signed-off-by: Mikhail Sennikovskii Cc: Paolo Bonzini Cc: Radim KrÄmář Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a654372efea1..ba9891ac5c56 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1840,6 +1840,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, */ if (var->unusable) var->db = 0; + /* This is symmetric with svm_set_segment() */ var->dpl = to_svm(vcpu)->vmcb->save.cpl; break; } @@ -1980,18 +1981,14 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->base = var->base; s->limit = var->limit; s->selector = var->selector; - if (var->unusable) - s->attrib = 0; - else { - s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); - s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; - s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; - s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; - s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; - s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; - s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; - s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; - } + s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); + s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; + s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; + s->attrib |= ((var->present & 1) && !var->unusable) << SVM_SELECTOR_P_SHIFT; + s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; + s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; + s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; + s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; /* * This is always accurate, except if SYSRET returned to a segment @@ -2000,7 +1997,8 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, * would entail passing the CPL to userspace and back. */ if (seg == VCPU_SREG_SS) - svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; + /* This is symmetric with svm_get_segment() */ + svm->vmcb->save.cpl = (var->dpl & 3); mark_dirty(svm->vmcb, VMCB_SEG); } From 47a66eed99e6f231f4a1d261a9d493f4eee94829 Mon Sep 17 00:00:00 2001 From: ZhuangYanying Date: Fri, 26 May 2017 13:16:48 +0800 Subject: [PATCH 151/249] KVM: x86: Fix nmi injection failure when vcpu got blocked When spin_lock_irqsave() deadlock occurs inside the guest, vcpu threads, other than the lock-holding one, would enter into S state because of pvspinlock. Then inject NMI via libvirt API "inject-nmi", the NMI could not be injected into vm. The reason is: 1 It sets nmi_queued to 1 when calling ioctl KVM_NMI in qemu, and sets cpu->kvm_vcpu_dirty to true in do_inject_external_nmi() meanwhile. 2 It sets nmi_queued to 0 in process_nmi(), before entering guest, because cpu->kvm_vcpu_dirty is true. It's not enough just to check nmi_queued to decide whether to stay in vcpu_block() or not. NMI should be injected immediately at any situation. Add checking nmi_pending, and testing KVM_REQ_NMI replaces nmi_queued in vm_vcpu_has_events(). Do the same change for SMIs. Signed-off-by: Zhuang Yanying Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 02363e37d4a6..a2cd0997343c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8394,10 +8394,13 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (vcpu->arch.pv.pv_unhalted) return true; - if (atomic_read(&vcpu->arch.nmi_queued)) + if (kvm_test_request(KVM_REQ_NMI, vcpu) || + (vcpu->arch.nmi_pending && + kvm_x86_ops->nmi_allowed(vcpu))) return true; - if (kvm_test_request(KVM_REQ_SMI, vcpu)) + if (kvm_test_request(KVM_REQ_SMI, vcpu) || + (vcpu->arch.smi_pending && !is_smm(vcpu))) return true; if (kvm_arch_interrupt_allowed(vcpu) && From c08d517480ea342cc43acdacc5cf4a795e18151d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 1 Jun 2017 15:52:23 +0200 Subject: [PATCH 152/249] Revert "x86/PAT: Fix Xorg regression on CPUs that don't support PAT" This reverts commit cbed27cdf0e3f7ea3b2259e86b9e34df02be3fe4. As Andy Lutomirski observed: "I think this patch is bogus. pat_enabled() sure looks like it's supposed to return true if PAT is *enabled*, and these days PAT is 'enabled' even if there's no HW PAT support." Reported-by: Bernhard Held Reported-by: Chris Wilson Acked-by: Andy Lutomirski Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mikulas Patocka Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: stable@vger.kernel.org # v4.2+ Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 83a59a67757a..9b78685b66e6 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -65,11 +65,9 @@ static int __init nopat(char *str) } early_param("nopat", nopat); -static bool __read_mostly __pat_initialized = false; - bool pat_enabled(void) { - return __pat_initialized; + return !!__pat_enabled; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -227,14 +225,13 @@ static void pat_bsp_init(u64 pat) } wrmsrl(MSR_IA32_CR_PAT, pat); - __pat_initialized = true; __init_cache_modes(pat); } static void pat_ap_init(u64 pat) { - if (!this_cpu_has(X86_FEATURE_PAT)) { + if (!boot_cpu_has(X86_FEATURE_PAT)) { /* * If this happens we are on a secondary CPU, but switched to * PAT on the boot CPU. We have no way to undo PAT. @@ -309,7 +306,7 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; - if (!__pat_enabled) { + if (!pat_enabled()) { init_cache_modes(); return; } From b425e50492583b10cceb388af36ef0bd3bdf842a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 31 May 2017 14:43:45 -0700 Subject: [PATCH 153/249] block: Avoid that blk_exit_rl() triggers a use-after-free Since the introduction of .init_rq_fn() and .exit_rq_fn() it is essential that the memory allocated for struct request_queue stays around until all blk_exit_rl() calls have finished. Hence make blk_init_rl() take a reference on struct request_queue. This patch fixes the following crash: general protection fault: 0000 [#2] SMP CPU: 3 PID: 28 Comm: ksoftirqd/3 Tainted: G D 4.12.0-rc2-dbg+ #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 task: ffff88013a108040 task.stack: ffffc9000071c000 RIP: 0010:free_request_size+0x1a/0x30 RSP: 0018:ffffc9000071fd38 EFLAGS: 00010202 RAX: 6b6b6b6b6b6b6b6b RBX: ffff880067362a88 RCX: 0000000000000003 RDX: ffff880067464178 RSI: ffff880067362a88 RDI: ffff880135ea4418 RBP: ffffc9000071fd40 R08: 0000000000000000 R09: 0000000100180009 R10: ffffc9000071fd38 R11: ffffffff81110800 R12: ffff88006752d3d8 R13: ffff88006752d3d8 R14: ffff88013a108040 R15: 000000000000000a FS: 0000000000000000(0000) GS:ffff88013fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa8ec1edb00 CR3: 0000000138ee8000 CR4: 00000000001406e0 Call Trace: mempool_destroy.part.10+0x21/0x40 mempool_destroy+0xe/0x10 blk_exit_rl+0x12/0x20 blkg_free+0x4d/0xa0 __blkg_release_rcu+0x59/0x170 rcu_process_callbacks+0x260/0x4e0 __do_softirq+0x116/0x250 smpboot_thread_fn+0x123/0x1e0 kthread+0x109/0x140 ret_from_fork+0x31/0x40 Fixes: commit e9c787e65c0c ("scsi: allocate scsi_cmnd structures as part of struct request") Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Cc: Jan Kara Cc: # v4.11+ Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- block/blk-core.c | 10 ++++++++-- block/blk-sysfs.c | 2 +- block/blk.h | 2 +- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 7c2947128f58..0480892e97e5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -74,7 +74,7 @@ static void blkg_free(struct blkcg_gq *blkg) blkcg_policy[i]->pd_free_fn(blkg->pd[i]); if (blkg->blkcg != &blkcg_root) - blk_exit_rl(&blkg->rl); + blk_exit_rl(blkg->q, &blkg->rl); blkg_rwstat_exit(&blkg->stat_ios); blkg_rwstat_exit(&blkg->stat_bytes); diff --git a/block/blk-core.c b/block/blk-core.c index c7068520794b..a7421b772d0e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -648,13 +648,19 @@ int blk_init_rl(struct request_list *rl, struct request_queue *q, if (!rl->rq_pool) return -ENOMEM; + if (rl != &q->root_rl) + WARN_ON_ONCE(!blk_get_queue(q)); + return 0; } -void blk_exit_rl(struct request_list *rl) +void blk_exit_rl(struct request_queue *q, struct request_list *rl) { - if (rl->rq_pool) + if (rl->rq_pool) { mempool_destroy(rl->rq_pool); + if (rl != &q->root_rl) + blk_put_queue(q); + } } struct request_queue *blk_alloc_queue(gfp_t gfp_mask) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 712b018e9f54..283da7fbe034 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -809,7 +809,7 @@ static void blk_release_queue(struct kobject *kobj) blk_free_queue_stats(q->stats); - blk_exit_rl(&q->root_rl); + blk_exit_rl(q, &q->root_rl); if (q->queue_tags) __blk_queue_free_tags(q); diff --git a/block/blk.h b/block/blk.h index 2ed70228e44f..83c8e1100525 100644 --- a/block/blk.h +++ b/block/blk.h @@ -59,7 +59,7 @@ void blk_free_flush_queue(struct blk_flush_queue *q); int blk_init_rl(struct request_list *rl, struct request_queue *q, gfp_t gfp_mask); -void blk_exit_rl(struct request_list *rl); +void blk_exit_rl(struct request_queue *q, struct request_list *rl); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); void blk_queue_bypass_start(struct request_queue *q); From 5a27fec21b70c2c41efbfe30a0fbb0f005b9f7e5 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 5 May 2017 12:05:16 -0500 Subject: [PATCH 154/249] RDMA/i40iw: Don't set 0-length FULPDU RTR indication control flag Don't set control flag for 0-length FULPDU (Send) RTR indication in the enhanced MPA Request/Reply frames, because it isn't supported. Signed-off-by: Tatyana Nikolova Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index f3bc01bce483..e8e864fc1883 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -784,7 +784,6 @@ static void i40iw_build_mpa_v2(struct i40iw_cm_node *cm_node, } ctrl_ird |= IETF_PEER_TO_PEER; - ctrl_ird |= IETF_FLPDU_ZERO_LEN; switch (mpa_key) { case MPA_KEY_REQUEST: From b117f4796306a750ec1ffb928acc5f55bdb1fc7b Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Fri, 5 May 2017 12:05:17 -0500 Subject: [PATCH 155/249] RDMA/i40iw: ACK MPA Reject frame Explicitly ACK the MPA Reject frame so the peer does not retransmit the frame. Signed-off-by: Tatyana Nikolova Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index e8e864fc1883..6ae98aa7f74e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2445,8 +2445,8 @@ static void i40iw_handle_rcv_mpa(struct i40iw_cm_node *cm_node, } else { type = I40IW_CM_EVENT_CONNECTED; cm_node->state = I40IW_CM_STATE_OFFLOADED; - i40iw_send_ack(cm_node); } + i40iw_send_ack(cm_node); break; default: pr_err("%s wrong cm_node state =%d\n", __func__, cm_node->state); From c0c643e16f9b00332cbbf3954556652dfa4ed5a3 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Wed, 10 May 2017 23:32:14 -0500 Subject: [PATCH 156/249] RDMA/i40iw: Fix device initialization error path Some error paths in i40iw_initialize_dev are doing additional and unnecessary work before exiting. Correctly free resources allocated prior to error and return with correct status code. Signed-off-by: Mustafa Ismail Signed-off-by: Henry Orosco Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 2728af3103ce..a3f18a22f5ed 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1319,13 +1319,13 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_QUERY_FPM_BUF_SIZE, I40IW_FPM_QUERY_BUF_ALIGNMENT_MASK); if (status) - goto exit; + goto error; info.fpm_query_buf_pa = mem.pa; info.fpm_query_buf = mem.va; status = i40iw_obj_aligned_mem(iwdev, &mem, I40IW_COMMIT_FPM_BUF_SIZE, I40IW_FPM_COMMIT_BUF_ALIGNMENT_MASK); if (status) - goto exit; + goto error; info.fpm_commit_buf_pa = mem.pa; info.fpm_commit_buf = mem.va; info.hmc_fn_id = ldev->fid; @@ -1347,11 +1347,9 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, info.exception_lan_queue = 1; info.vchnl_send = i40iw_virtchnl_send; status = i40iw_device_init(&iwdev->sc_dev, &info); -exit: - if (status) { - kfree(iwdev->hmc_info_mem); - iwdev->hmc_info_mem = NULL; - } + + if (status) + goto error; memset(&vsi_info, 0, sizeof(vsi_info)); vsi_info.dev = &iwdev->sc_dev; vsi_info.back_vsi = (void *)iwdev; @@ -1362,11 +1360,19 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev, memset(&stats_info, 0, sizeof(stats_info)); stats_info.fcn_id = ldev->fid; stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL); + if (!stats_info.pestat) { + status = I40IW_ERR_NO_MEMORY; + goto error; + } stats_info.stats_initialize = true; if (stats_info.pestat) i40iw_vsi_stats_init(&iwdev->vsi, &stats_info); } return status; +error: + kfree(iwdev->hmc_info_mem); + iwdev->hmc_info_mem = NULL; + return status; } /** From f300ba2d1ef1cb8411daa5e1ae44acfa7b88236c Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Fri, 19 May 2017 16:14:02 -0500 Subject: [PATCH 157/249] RDMA/i40iw: Remove MSS change support MSS change on active QPs is not supported. Store new MSS value for new QPs only. Remove code to modify MSS on the fly. This also resolves a crash on QP modify to QP 0. BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: i40iw_sc_qp_modify+0x22/0x280 [i40iw] Oops: 0000 [#1] SMP KASAN CPU: 2 PID: 1236 Comm: kworker/u16:4 Not tainted 4.12.0-rc1 #5 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Q87M-D2H, BIOS F7 01/17/2014 Workqueue: l2params i40iw_l2params_worker [i40iw] task: ffff88070f5a9b40 task.stack: ffff88070f5a0000 RIP: 0010:i40iw_sc_qp_modify+0x22/0x280 [i40iw] ... Call Trace: i40iw_exec_cqp_cmd+0x2ce/0x410 [i40iw] ? _raw_spin_lock_irqsave+0x6f/0x80 ? i40iw_process_cqp_cmd+0x1d/0x80 [i40iw] i40iw_process_cqp_cmd+0x7c/0x80 [i40iw] i40iw_handle_cqp_op+0x2f/0x200 [i40iw] ? trace_hardirqs_off+0xd/0x10 ? _raw_spin_unlock_irqrestore+0x46/0x50 i40iw_hw_modify_qp+0x5e/0x90 [i40iw] i40iw_qp_mss_modify+0x52/0x60 [i40iw] i40iw_change_l2params+0x145/0x160 [i40iw] i40iw_l2params_worker+0x1f/0x40 [i40iw] process_one_work+0x1f5/0x650 ? process_one_work+0x161/0x650 worker_thread+0x48/0x3b0 kthread+0x112/0x150 ? process_one_work+0x650/0x650 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x2e/0x40 Code: 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 41 56 41 55 41 89 cd 41 54 49 89 fc 53 48 89 f3 48 89 d6 48 83 ec 08 48 8b 87 10 01 00 00 <48> 8b 40 08 4c 8b b0 40 04 00 00 4c 89 f7 e8 1b e5 ff ff 48 85 RIP: i40iw_sc_qp_modify+0x22/0x280 [i40iw] RSP: ffff88070f5a7c28 CR2: 0000000000000008 ---[ end trace 77a405931e296060 ]--- Reported-by: Stefan Assmann Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_ctrl.c | 12 +----------- drivers/infiniband/hw/i40iw/i40iw_osdep.h | 1 - drivers/infiniband/hw/i40iw/i40iw_type.h | 2 -- drivers/infiniband/hw/i40iw/i40iw_utils.c | 17 ----------------- 4 files changed, 1 insertion(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index f82483b3d1e7..a027e2072477 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -285,28 +285,20 @@ void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2pa struct i40iw_sc_dev *dev = vsi->dev; struct i40iw_sc_qp *qp = NULL; bool qs_handle_change = false; - bool mss_change = false; unsigned long flags; u16 qs_handle; int i; - if (vsi->mss != l2params->mss) { - mss_change = true; - vsi->mss = l2params->mss; - } + vsi->mss = l2params->mss; i40iw_fill_qos_list(l2params->qs_handle_list); for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) { qs_handle = l2params->qs_handle_list[i]; if (vsi->qos[i].qs_handle != qs_handle) qs_handle_change = true; - else if (!mss_change) - continue; /* no MSS nor qs handle change */ spin_lock_irqsave(&vsi->qos[i].lock, flags); qp = i40iw_get_qp(&vsi->qos[i].qplist, qp); while (qp) { - if (mss_change) - i40iw_qp_mss_modify(dev, qp); if (qs_handle_change) { qp->qs_handle = qs_handle; /* issue cqp suspend command */ @@ -2395,7 +2387,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify( set_64bit_val(wqe, 8, - LS_64(info->new_mss, I40IW_CQPSQ_QP_NEWMSS) | LS_64(term_len, I40IW_CQPSQ_QP_TERMLEN)); set_64bit_val(wqe, 16, qp->hw_host_ctx_pa); @@ -2410,7 +2401,6 @@ static enum i40iw_status_code i40iw_sc_qp_modify( LS_64(info->cq_num_valid, I40IW_CQPSQ_QP_CQNUMVALID) | LS_64(info->force_loopback, I40IW_CQPSQ_QP_FORCELOOPBACK) | LS_64(qp->qp_type, I40IW_CQPSQ_QP_QPTYPE) | - LS_64(info->mss_change, I40IW_CQPSQ_QP_MSSCHANGE) | LS_64(info->static_rsrc, I40IW_CQPSQ_QP_STATRSRC) | LS_64(info->remove_hash_idx, I40IW_CQPSQ_QP_REMOVEHASHENTRY) | LS_64(term_actions, I40IW_CQPSQ_QP_TERMACT) | diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h index aa66c1c63dfa..f27be3e7830b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h +++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h @@ -199,7 +199,6 @@ void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx); void *i40iw_remove_head(struct list_head *list); void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend); -void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp); void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len); void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred); diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 7b76259752b0..959ec81fba99 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -541,7 +541,6 @@ struct i40iw_create_qp_info { struct i40iw_modify_qp_info { u64 rx_win0; u64 rx_win1; - u16 new_mss; u8 next_iwarp_state; u8 termlen; bool ord_valid; @@ -554,7 +553,6 @@ struct i40iw_modify_qp_info { bool dont_send_term; bool dont_send_fin; bool cached_var_valid; - bool mss_change; bool force_loopback; }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 409a3781e735..56d986924a4c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -756,23 +756,6 @@ void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, b i40iw_pr_err("CQP-OP QP Suspend/Resume fail"); } -/** - * i40iw_qp_mss_modify - modify mss for qp - * @dev: hardware control device structure - * @qp: hardware control qp - */ -void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) -{ - struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev; - struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; - struct i40iw_modify_qp_info info; - - memset(&info, 0, sizeof(info)); - info.mss_change = true; - info.new_mss = qp->vsi->mss; - i40iw_hw_modify_qp(iwdev, iwqp, &info, false); -} - /** * i40iw_term_modify_qp - modify qp for term message * @qp: hardware control qp From e80bd98d1ff011beec872a8ebbb73930507d6a13 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 18 May 2017 13:11:17 -0500 Subject: [PATCH 158/249] RDMA/i40iw: fix duplicated code for different branches Refactor code to avoid identical code for different branches. Addresses-Coverity-ID: 1357356 Reviewed-by: Yuval Shaia Signed-off-by: Gustavo A. R. Silva Acked-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_virtchnl.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index f4d13683a403..48fd327f876b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -443,10 +443,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev, if (!dev->vchnl_up) return I40IW_ERR_NOT_READY; if (vchnl_msg->iw_op_code == I40IW_VCHNL_OP_GET_VER) { - if (vchnl_msg->iw_op_ver != I40IW_VCHNL_OP_GET_VER_V0) - vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); - else - vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); + vchnl_pf_send_get_ver_resp(dev, vf_id, vchnl_msg); return I40IW_SUCCESS; } for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) { From 0e5fc903513a652a2cf890a3d3b91c1779e16e97 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 10 May 2017 16:46:39 -0500 Subject: [PATCH 159/249] RDMA/nes: Don't set 0-length FULPDU RTR indication control flag Don't set control flag for 0-length FULPDU (Send) RTR indication in the enhanced MPA Request/Reply frames, because it isn't supported. Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index fb983df7c157..06a55f6fb708 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -610,7 +610,6 @@ static void build_mpa_v2(struct nes_cm_node *cm_node, ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD; } ctrl_ird |= IETF_PEER_TO_PEER; - ctrl_ird |= IETF_FLPDU_ZERO_LEN; switch (mpa_key) { case MPA_KEY_REQUEST: From f863de7de34025ee536cb3cc382bfc3cafaa9f0a Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 10 May 2017 16:46:40 -0500 Subject: [PATCH 160/249] RDMA/nes: ACK MPA Reply frame Explicitly ACK the MPA Reply frame so the peer does not retransmit the frame. Signed-off-by: Tatyana Nikolova Signed-off-by: Doug Ledford --- drivers/infiniband/hw/nes/nes_cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 06a55f6fb708..30b256a2c54e 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1825,7 +1825,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) type = NES_CM_EVENT_CONNECTED; cm_node->state = NES_CM_STATE_TSA; } - + send_ack(cm_node, NULL); break; default: WARN_ON(1); From 1dad0ebeea1cd890b8892523f736916e245b0aef Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 15 May 2017 06:40:39 +0000 Subject: [PATCH 161/249] RDMA/iw_cxgb4: Avoid touch after free error in ARP failure handlers The patch 761e19a504af (RDMA/iw_cxgb4: Handle return value of c4iw_ofld_send() in abort_arp_failure()) from May 6, 2016 leads to the following static checker warning: drivers/infiniband/hw/cxgb4/cm.c:575 abort_arp_failure() warn: passing freed memory 'skb' Also fixes skb leak when l2t resolution fails Fixes: 761e19a504afa55 (RDMA/iw_cxgb4: Handle return value of c4iw_ofld_send() in abort_arp_failure()) Reported-by: Dan Carpenter Cc: Dan Carpenter Signed-off-by: Raju Rangoju Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b6fe45924c6e..06b110213e92 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -488,6 +488,7 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); release_ep_resources(ep); + kfree_skb(skb); return 0; } @@ -498,6 +499,7 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); c4iw_put_ep(&ep->parent_ep->com); release_ep_resources(ep); + kfree_skb(skb); return 0; } @@ -569,11 +571,13 @@ static void abort_arp_failure(void *handle, struct sk_buff *skb) pr_debug("%s rdev %p\n", __func__, rdev); req->cmd = CPL_ABORT_NO_RST; + skb_get(skb); ret = c4iw_ofld_send(rdev, skb); if (ret) { __state_set(&ep->com, DEAD); queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE); - } + } else + kfree_skb(skb); } static int send_flowc(struct c4iw_ep *ep) From 4bbfabede50849f87b913a1fc07c02ecc96b6c57 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 19 May 2017 14:48:42 +0530 Subject: [PATCH 162/249] RDMA/iw_cxgb4: calculate t4_eq_status_entries properly use egrstatuspagesize to calculate t4_eq_status_entries. Fixes: bb58d07964f2 ("cxgb4: Update IngPad and IngPack values") Reported-by: Logan Gunthorpe Signed-off-by: Potnuri Bharat Teja Signed-off-by: Ganesh Goudar Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 329fb65e8fb0..f96a96dbcf1f 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -971,7 +971,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) devp->rdev.lldi.sge_egrstatuspagesize); devp->rdev.hw_queue.t4_eq_status_entries = - devp->rdev.lldi.sge_ingpadboundary > 64 ? 2 : 1; + devp->rdev.lldi.sge_egrstatuspagesize / 64; devp->rdev.hw_queue.t4_max_eq_size = 65520; devp->rdev.hw_queue.t4_max_iq_size = 65520; devp->rdev.hw_queue.t4_max_rq_size = 8192 - From 98b80a2a73a26f0e259632da519ccedb0cf37617 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Wed, 31 May 2017 12:06:58 +0530 Subject: [PATCH 163/249] RDMA/iw_cxgb4: fix the calculation of ipv6 header size Take care of ipv6 checks while computing header length for deducing mtu size of ipv6 servers. Due to the incorrect header length computation for ipv6 servers, wrong mss is reported to the peer (client). Signed-off-by: Raju Rangoju Signed-off-by: Ganesh Goudar Acked-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 06b110213e92..0910faf3587b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2521,7 +2521,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) + + hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) + + sizeof(struct tcphdr) + ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) child_ep->mtu = peer_mss + hdrs; From c4dd4b69f55abcc8dd079f8de55d9d8c2ddbefce Mon Sep 17 00:00:00 2001 From: "Steven L. Roberts" Date: Wed, 10 May 2017 10:54:12 -0500 Subject: [PATCH 164/249] RDMA/hfi1: fix array termination by appending NULL to attr array This fixes a kernel panic when loading the hfi driver as a dynamic module. Signed-off-by: Steven L Roberts Reviewed-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/sysfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 50d140d25e38..2f3bbcac1e34 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -196,7 +196,8 @@ static const struct sysfs_ops port_cc_sysfs_ops = { }; static struct attribute *port_cc_default_attributes[] = { - &cc_prescan_attr.attr + &cc_prescan_attr.attr, + NULL }; static struct kobj_type port_cc_ktype = { From e4785b0633574a607daaa04bf2fe43550055194f Mon Sep 17 00:00:00 2001 From: "Steven L. Roberts" Date: Wed, 10 May 2017 14:58:13 -0500 Subject: [PATCH 165/249] RDMA/hfi1: change PCI bar addr assignments to Linux API functions The Omni-Path adapter driver fails to load on the ppc64le platform due to invalid PCI setup. This patch makes the PCI configuration more robust and will fix 64 bit addressing for ppc64le. Signed-off-by: Steven L Roberts Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pcie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 93faf86d54b6..6a9f6f9819e1 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -207,8 +207,8 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) /* * Save BARs and command to rewrite after device reset. */ - dd->pcibar0 = addr; - dd->pcibar1 = addr >> 32; + pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_0, &dd->pcibar0); + pci_read_config_dword(dd->pcidev, PCI_BASE_ADDRESS_1, &dd->pcibar1); pci_read_config_dword(dd->pcidev, PCI_ROM_ADDRESS, &dd->pci_rom); pci_read_config_word(dd->pcidev, PCI_COMMAND, &dd->pci_command); pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &dd->pcie_devctl); From b3e6b4bdbb609762d8401ac4a959d590b4e4e3b8 Mon Sep 17 00:00:00 2001 From: "Byczkowski, Jakub" Date: Fri, 12 May 2017 09:01:37 -0700 Subject: [PATCH 166/249] RDMA/hfi1: Defer setting VL15 credits to link-up interrupt Keep VL15 credits at 0 during LNI, before link-up. Store VL15 credits value during verify cap interrupt and set in after link-up. This addresses an issue where VL15 MAD packets could be sent by one side of the link before the other side is ready to receive them. Reviewed-by: Mike Marciniszyn Reviewed-by: Dean Luick Reviewed-by: Dennis Dalessandro Signed-off-by: Jakub Byczkowski Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 67 +++++++++++++++------ drivers/infiniband/hw/hfi1/chip_registers.h | 2 + drivers/infiniband/hw/hfi1/hfi.h | 11 +++- drivers/infiniband/hw/hfi1/intr.c | 3 +- 4 files changed, 64 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 5d6b1eeaa9a0..2ba00b89df6a 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6312,25 +6312,38 @@ static void handle_8051_request(struct hfi1_pportdata *ppd) } } -static void write_global_credit(struct hfi1_devdata *dd, - u8 vau, u16 total, u16 shared) +/* + * Set up allocation unit vaulue. + */ +void set_up_vau(struct hfi1_devdata *dd, u8 vau) { - write_csr(dd, SEND_CM_GLOBAL_CREDIT, - ((u64)total << - SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT) | - ((u64)shared << - SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT) | - ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT)); + u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT); + + /* do not modify other values in the register */ + reg &= ~SEND_CM_GLOBAL_CREDIT_AU_SMASK; + reg |= (u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT; + write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg); } /* * Set up initial VL15 credits of the remote. Assumes the rest of - * the CM credit registers are zero from a previous global or credit reset . + * the CM credit registers are zero from a previous global or credit reset. + * Shared limit for VL15 will always be 0. */ -void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf) +void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf) { - /* leave shared count at zero for both global and VL15 */ - write_global_credit(dd, vau, vl15buf, 0); + u64 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT); + + /* set initial values for total and shared credit limit */ + reg &= ~(SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK | + SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK); + + /* + * Set total limit to be equal to VL15 credits. + * Leave shared limit at 0. + */ + reg |= (u64)vl15buf << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT; + write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg); write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); @@ -6348,9 +6361,11 @@ void reset_link_credits(struct hfi1_devdata *dd) for (i = 0; i < TXE_NUM_DATA_VL; i++) write_csr(dd, SEND_CM_CREDIT_VL + (8 * i), 0); write_csr(dd, SEND_CM_CREDIT_VL15, 0); - write_global_credit(dd, 0, 0, 0); + write_csr(dd, SEND_CM_GLOBAL_CREDIT, 0); /* reset the CM block */ pio_send_control(dd, PSC_CM_RESET); + /* reset cached value */ + dd->vl15buf_cached = 0; } /* convert a vCU to a CU */ @@ -6839,24 +6854,35 @@ void handle_link_up(struct work_struct *work) { struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, link_up_work); + struct hfi1_devdata *dd = ppd->dd; + set_link_state(ppd, HLS_UP_INIT); /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */ - read_ltp_rtt(ppd->dd); + read_ltp_rtt(dd); /* * OPA specifies that certain counters are cleared on a transition * to link up, so do that. */ - clear_linkup_counters(ppd->dd); + clear_linkup_counters(dd); /* * And (re)set link up default values. */ set_linkup_defaults(ppd); + /* + * Set VL15 credits. Use cached value from verify cap interrupt. + * In case of quick linkup or simulator, vl15 value will be set by + * handle_linkup_change. VerifyCap interrupt handler will not be + * called in those scenarios. + */ + if (!(quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) + set_up_vl15(dd, dd->vl15buf_cached); + /* enforce link speed enabled */ if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) { /* oops - current speed is not enabled, bounce */ - dd_dev_err(ppd->dd, + dd_dev_err(dd, "Link speed active 0x%x is outside enabled 0x%x, downing link\n", ppd->link_speed_active, ppd->link_speed_enabled); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, @@ -7357,7 +7383,14 @@ void handle_verify_cap(struct work_struct *work) */ if (vau == 0) vau = 1; - set_up_vl15(dd, vau, vl15buf); + set_up_vau(dd, vau); + + /* + * Set VL15 credits to 0 in global credit register. Cache remote VL15 + * credits value and wait for link-up interrupt ot set it. + */ + set_up_vl15(dd, 0); + dd->vl15buf_cached = vl15buf; /* set up the LCB CRC mode */ crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc; diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 5bfa839d1c48..793514f1d15f 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -839,7 +839,9 @@ #define SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK 0x8ull #define SEND_CM_CTRL_RESETCSR 0x0000000000000020ull #define SEND_CM_GLOBAL_CREDIT (TXE + 0x000000000508) +#define SEND_CM_GLOBAL_CREDIT_AU_MASK 0x7ull #define SEND_CM_GLOBAL_CREDIT_AU_SHIFT 16 +#define SEND_CM_GLOBAL_CREDIT_AU_SMASK 0x70000ull #define SEND_CM_GLOBAL_CREDIT_RESETCSR 0x0000094000030000ull #define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK 0xFFFFull #define SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT 0 diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index da322e6668cc..414a04a481c2 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1045,6 +1045,14 @@ struct hfi1_devdata { /* initial vl15 credits to use */ u16 vl15_init; + /* + * Cached value for vl15buf, read during verify cap interrupt. VL15 + * credits are to be kept at 0 and set when handling the link-up + * interrupt. This removes the possibility of receiving VL15 MAD + * packets before this HFI is ready. + */ + u16 vl15buf_cached; + /* Misc small ints */ u8 n_krcv_queues; u8 qos_shift; @@ -1598,7 +1606,8 @@ int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encode); int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t); int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t); -void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); +void set_up_vau(struct hfi1_devdata *dd, u8 vau); +void set_up_vl15(struct hfi1_devdata *dd, u16 vl15buf); void reset_link_credits(struct hfi1_devdata *dd); void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu); diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index ba265d0ae93b..04a5082d5ac5 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -130,7 +130,8 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) * the remote values. Both sides must be using the values. */ if (quick_linkup || dd->icode == ICODE_FUNCTIONAL_SIMULATOR) { - set_up_vl15(dd, dd->vau, dd->vl15_init); + set_up_vau(dd, dd->vau); + set_up_vl15(dd, dd->vl15_init); assign_remote_cm_au_table(dd, dd->vcu); } From 1feb40067cf04ae48d65f728d62ca255c9449178 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 12 May 2017 09:02:00 -0700 Subject: [PATCH 167/249] RDMA/qib,hfi1: Fix MR reference count leak on write with immediate The handling of IB_RDMA_WRITE_ONLY_WITH_IMMEDIATE will leak a memory reference when a buffer cannot be allocated for returning the immediate data. The issue is that the rkey validation has already occurred and the RNR nak fails to release the reference that was fruitlessly gotten. The the peer will send the identical single packet request when its RNR timer pops. The fix is to release the held reference prior to the rnr nak exit. This is the only sequence the requires both rkey validation and the buffer allocation on the same packet. Cc: Stable # 4.7+ Tested-by: Tadeusz Struk Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 5 ++++- drivers/infiniband/hw/qib/qib_rc.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 069bdaf061ab..1080778a1f7c 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2159,8 +2159,11 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; - if (!ret) + if (!ret) { + /* peer will send again */ + rvt_put_ss(&qp->r_sge); goto rnr_nak; + } wc.ex.imm_data = ohdr->u.rc.imm_data; wc.wc_flags = IB_WC_WITH_IMM; goto send_last; diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index fc8b88514da5..4ddbcac5eabe 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1956,8 +1956,10 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, ret = qib_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; - if (!ret) + if (!ret) { + rvt_put_ss(&qp->r_sge); goto rnr_nak; + } wc.ex.imm_data = ohdr->u.rc.imm_data; hdrsize += 4; wc.wc_flags = IB_WC_WITH_IMM; From eed7624552ca55c346a4e9ccaa9ab0723841aee6 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 21 May 2017 19:11:13 +0300 Subject: [PATCH 168/249] RDMA/mlx4: Fix MAD tunneling when SRIOV is enabled The cited patch added a type field to structures ib_ah and rdma_ah_attr. Function mlx4_ib_query_ah() builds an rdma_ah_attr structure from the data in an mlx4_ib_ah structure (which contains both an ib_ah structure and an address vector). For mlx4_ib_query_ah() to work properly, the type field in the contained ib_ah structure must be set correctly. In the outgoing MAD tunneling flow, procedure mlx4_ib_multiplex_mad() paravirtualizes a MAD received from a slave and sends the processed mad out over the wire. During this processing, it populates an mlx4_ib_ah structure and calls mlx4_ib_query_ah(). The cited commit overlooked setting the type field in the contained ib_ah structure before invoking mlx4_ib_query_ah(). As a result, the type field remained uninitialized, and the rdma_ah_attr structure was incorrectly built. This resulted in improperly built MADs being sent out over the wire. This patch properly initializes the type field in the contained ib_ah structure before calling mlx4_ib_query_ah(). The rdma_ah_attr structure is then generated correctly. Fixes: 44c58487d51a ("IB/core: Define 'ib' and 'roce' rdma_ah_attr types") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index b4694717f6f3..21d31cb1325f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1578,6 +1578,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc if (port < 0) return; ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff)); + ah.ibah.type = rdma_ah_find_type(&dev->ib_dev, port); mlx4_ib_query_ah(&ah.ibah, &ah_attr); if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH) From 1410a90ae449061b7e1ae19d275148f36948801b Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 28 May 2017 10:53:10 +0300 Subject: [PATCH 169/249] net/mlx5: Define interface bits for fencing UMR wqe HW can implement UMR wqe re-transmission in various ways. Thus, add HCA cap to distinguish the needed fence for UMR to make sure that the wqe wouldn't fail on mkey checks. Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 32de0724b400..edafedb7b509 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -766,6 +766,12 @@ enum { MLX5_CAP_PORT_TYPE_ETH = 0x1, }; +enum { + MLX5_CAP_UMR_FENCE_STRONG = 0x0, + MLX5_CAP_UMR_FENCE_SMALL = 0x1, + MLX5_CAP_UMR_FENCE_NONE = 0x2, +}; + struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x80]; @@ -875,7 +881,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_202[0x1]; u8 ipoib_enhanced_offloads[0x1]; u8 ipoib_basic_offloads[0x1]; - u8 reserved_at_205[0xa]; + u8 reserved_at_205[0x5]; + u8 umr_fence[0x2]; + u8 reserved_at_20c[0x3]; u8 drain_sigerr[0x1]; u8 cmdif_checksum[0x2]; u8 sigerr_cqe[0x1]; From 6e8484c5cf07c7ee632587e98c1a12d319dacb7c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 28 May 2017 10:53:11 +0300 Subject: [PATCH 170/249] RDMA/mlx5: set UMR wqe fence according to HCA cap Cache the needed umr_fence and set the wqe ctrl segmennt accordingly. Signed-off-by: Max Gurtovoy Acked-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 14 +++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +- drivers/infiniband/hw/mlx5/qp.c | 59 +++++++++++----------------- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d45772da0963..0c79983c8b1a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2979,6 +2979,18 @@ static int create_umr_res(struct mlx5_ib_dev *dev) return ret; } +static u8 mlx5_get_umr_fence(u8 umr_fence_cap) +{ + switch (umr_fence_cap) { + case MLX5_CAP_UMR_FENCE_NONE: + return MLX5_FENCE_MODE_NONE; + case MLX5_CAP_UMR_FENCE_SMALL: + return MLX5_FENCE_MODE_INITIATOR_SMALL; + default: + return MLX5_FENCE_MODE_STRONG_ORDERING; + } +} + static int create_dev_resources(struct mlx5_ib_resources *devr) { struct ib_srq_init_attr attr; @@ -3693,6 +3705,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) mlx5_ib_internal_fill_odp_caps(dev); + dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); + if (MLX5_CAP_GEN(mdev, imaicl)) { dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 38c877bc45e5..bdcf25410c99 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -349,7 +349,7 @@ struct mlx5_ib_qp { struct mlx5_ib_wq rq; u8 sq_signal_bits; - u8 fm_cache; + u8 next_fence; struct mlx5_ib_wq sq; /* serialize qp state modifications @@ -654,6 +654,7 @@ struct mlx5_ib_dev { struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; struct mlx5_sq_bfreg fp_bfreg; + u8 umr_fence; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 93959e1e43a3..ebb6768684de 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3738,24 +3738,6 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } } -static u8 get_fence(u8 fence, struct ib_send_wr *wr) -{ - if (unlikely(wr->opcode == IB_WR_LOCAL_INV && - wr->send_flags & IB_SEND_FENCE)) - return MLX5_FENCE_MODE_STRONG_ORDERING; - - if (unlikely(fence)) { - if (wr->send_flags & IB_SEND_FENCE) - return MLX5_FENCE_MODE_SMALL_AND_FENCE; - else - return fence; - } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { - return MLX5_FENCE_MODE_FENCE; - } - - return 0; -} - static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, struct ib_send_wr *wr, unsigned *idx, @@ -3784,8 +3766,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u8 next_fence, - u32 mlx5_opcode) + int nreq, u8 fence, u32 mlx5_opcode) { u8 opmod = 0; @@ -3793,7 +3774,6 @@ static void finish_wqe(struct mlx5_ib_qp *qp, mlx5_opcode | ((u32)opmod << 24)); ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8)); ctrl->fm_ce_se |= fence; - qp->fm_cache = next_fence; if (unlikely(qp->wq_sig)) ctrl->signature = wq_sig(ctrl); @@ -3853,7 +3833,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - fence = qp->fm_cache; num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); @@ -3870,6 +3849,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } + if (wr->opcode == IB_WR_LOCAL_INV || + wr->opcode == IB_WR_REG_MR) { + fence = dev->umr_fence; + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + } else if (wr->send_flags & IB_SEND_FENCE) { + if (qp->next_fence) + fence = MLX5_FENCE_MODE_SMALL_AND_FENCE; + else + fence = MLX5_FENCE_MODE_FENCE; + } else { + fence = qp->next_fence; + } + switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -3896,7 +3888,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; case IB_WR_LOCAL_INV: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); set_linv_wr(qp, &seg, &size); @@ -3904,7 +3895,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; case IB_WR_REG_MR: - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size); @@ -3927,9 +3917,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error @@ -3954,9 +3943,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); if (err) { @@ -3966,7 +3954,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, mr->sig->psv_wire.psv_idx, &seg, &size); @@ -3976,9 +3963,9 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, - nreq, get_fence(fence, wr), - next_fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + fence, MLX5_OPCODE_SET_PSV); + qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4089,8 +4076,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - get_fence(fence, wr), next_fence, + qp->next_fence = next_fence; + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) From d38d7fdafaf89d1756f27e9b4054dd19a1d2f545 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 22 May 2017 13:30:15 -0500 Subject: [PATCH 171/249] RDMA/qedr: add null check before pointer dereference Add null check before dereferencing pointer sgid_attr.ndev inside function rdma_vlan_dev_vlan_id(). Addresses-Coverity-ID: 1373979 Signed-off-by: Gustavo A. R. Silva Acked-by: Ram Amrani Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qedr/qedr_cm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c index 3d7705cec770..d86dbe814d98 100644 --- a/drivers/infiniband/hw/qedr/qedr_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -270,11 +270,13 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev, return rc; } - vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); - if (vlan_id < VLAN_CFI_MASK) - has_vlan = true; - if (sgid_attr.ndev) + if (sgid_attr.ndev) { + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + if (vlan_id < VLAN_CFI_MASK) + has_vlan = true; + dev_put(sgid_attr.ndev); + } if (!memcmp(&sgid, &zgid, sizeof(sgid))) { DP_ERR(dev, "gsi post send: GID not found GID index %d\n", From 8c490669deb0133dfb6bfe7a80d7cfef3bbbd92f Mon Sep 17 00:00:00 2001 From: Honggang Li Date: Thu, 11 May 2017 20:14:28 +0800 Subject: [PATCH 172/249] RDMA/IPoIB: Replace netdev_priv with ipoib_priv for ipoib_get_link_ksettings ipoib_dev_init accesses the wrong private data for the IPoIB device. Commit cd565b4b51e5 (IB/IPoIB: Support acceleration options callbacks) changed ipoib_priv from being identical to netdev_priv to being an area inside of, but not the same pointer as, the netdev_priv pointer. As such, the struct we want is the ipoib_priv area, not the netdev_priv area, so use the right accessor, otherwise we kernel panic. [ 27.271938] IPv6: ADDRCONF(NETDEV_CHANGE): mlx5_ib0.8006: link becomes ready [ 28.156790] BUG: unable to handle kernel NULL pointer dereference at 000000000000067c [ 28.166309] IP: ib_query_port+0x30/0x180 [ib_core] ... [ 28.306282] RIP: 0010:ib_query_port+0x30/0x180 [ib_core] ... [ 28.393337] Call Trace: [ 28.397594] ipoib_get_link_ksettings+0x66/0xe0 [ib_ipoib] [ 28.405274] __ethtool_get_link_ksettings+0xa0/0x1c0 [ 28.412353] speed_show+0x74/0xa0 [ 28.417503] dev_attr_show+0x20/0x50 [ 28.422922] ? mutex_lock+0x12/0x40 [ 28.428179] sysfs_kf_seq_show+0xbf/0x1a0 [ 28.434002] kernfs_seq_show+0x21/0x30 [ 28.439470] seq_read+0x116/0x3b0 [ 28.444445] ? do_filp_open+0xa5/0x100 [ 28.449774] kernfs_fop_read+0xff/0x180 [ 28.455220] __vfs_read+0x37/0x150 [ 28.460167] ? security_file_permission+0x9d/0xc0 [ 28.466560] vfs_read+0x8c/0x130 [ 28.471318] SyS_read+0x55/0xc0 [ 28.475950] do_syscall_64+0x67/0x150 [ 28.481163] entry_SYSCALL64_slow_path+0x25/0x25 ... [ 28.584493] ---[ end trace 3549968a4bf0aa5d ]--- Fixes: cd565b4b51e5 (IB/IPoIB: Support acceleration options callbacks) Fixes: 0d7e2d2166f6 (IB/ipoib: add get_link_ksettings in ethtool) Signed-off-by: Honggang Li Reviewed-by: Yuval Shaia Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 874b24366e4d..7871379342f4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -178,7 +178,7 @@ static inline int ib_speed_enum_to_int(int speed) static int ipoib_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct ipoib_dev_priv *priv = netdev_priv(netdev); + struct ipoib_dev_priv *priv = ipoib_priv(netdev); struct ib_port_attr attr; int ret, speed, width; From 0a1a972630c77edb39adf310699e404b8bf9176e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 14 May 2017 13:32:06 +0300 Subject: [PATCH 173/249] RDMA/IPoIB: Limit the ipoib_dev_uninit_default scope ipoib_dev_uninit_default() call is used in ipoib_main.c file only and it generates the following warning from smatch tool: drivers/infiniband/ulp/ipoib/ipoib_main.c:1593:6: warning: symbol 'ipoib_dev_uninit_default' was not declared. Should it be static? so let's declare that function as static. Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2869d1adb1de..a115c0b7a310 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1590,7 +1590,7 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev) wait_for_completion(&priv->ntbl.deleted); } -void ipoib_dev_uninit_default(struct net_device *dev) +static void ipoib_dev_uninit_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); From 95c2ef50c726a51d580c35ae8dccd383abaa8701 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 11 May 2017 18:52:36 +0300 Subject: [PATCH 174/249] RDMA/srp: Fix NULL deref at srp_destroy_qp() If srp_init_qp() fails at srp_create_ch_ib() then ch->send_cq may be NULL. Calling directly to ib_destroy_qp() is sufficient because no work requests were posted on the created qp. Fixes: 9294000d6d89 ("IB/srp: Drain the send queue before destroying a QP") Cc: Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Bart van Assche -- Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index def723a5df29..4306285fb155 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -575,7 +575,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) return 0; err_qp: - srp_destroy_qp(ch, qp); + ib_destroy_qp(qp); err_send_cq: ib_free_cq(send_cq); From 233c1955835bd8649003be9bb3d8e79788b08be1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 14 May 2017 15:49:57 +0300 Subject: [PATCH 175/249] RDMA/netlink: Reduce exposure of RDMA netlink functions RDMA netlink is part of ib_core, hence ibnl_chk_listeners(), ibnl_init() and ibnl_cleanup() don't need to be published in public header file. Let's remove EXPORT_SYMBOL from ibnl_chk_listeners() and move all these functions to private header file. CC: Yuval Shaia Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/core/core_priv.h | 10 ++++++++++ drivers/infiniband/core/netlink.c | 2 +- include/rdma/rdma_netlink.h | 10 ---------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index cb7d372e4bdf..d92ab4eaa8f3 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -169,6 +169,16 @@ void ib_mad_cleanup(void); int ib_sa_init(void); void ib_sa_cleanup(void); +int ibnl_init(void); +void ibnl_cleanup(void); + +/** + * Check if there are any listeners to the netlink group + * @group: the netlink group ID + * Returns 0 on success or a negative for no listeners. + */ +int ibnl_chk_listeners(unsigned int group); + int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct netlink_callback *cb); int ib_nl_handle_set_timeout(struct sk_buff *skb, diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index b784055423c8..94931c474d41 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -37,6 +37,7 @@ #include #include #include +#include "core_priv.h" struct ibnl_client { struct list_head list; @@ -55,7 +56,6 @@ int ibnl_chk_listeners(unsigned int group) return -1; return 0; } -EXPORT_SYMBOL(ibnl_chk_listeners); int ibnl_add_client(int index, int nops, const struct ibnl_client_cbs cb_table[]) diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 585266144329..348c102cb5f6 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -10,9 +10,6 @@ struct ibnl_client_cbs { struct module *module; }; -int ibnl_init(void); -void ibnl_cleanup(void); - /** * Add a a client to the list of IB netlink exporters. * @index: Index of the added client @@ -77,11 +74,4 @@ int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned int group, gfp_t flags); -/** - * Check if there are any listeners to the netlink group - * @group: the netlink group ID - * Returns 0 on success or a negative for no listeners. - */ -int ibnl_chk_listeners(unsigned int group); - #endif /* _RDMA_NETLINK_H */ From f937d93a9122d1510ca6e4bb8d860aedcf9408c3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 18 May 2017 07:40:33 +0300 Subject: [PATCH 176/249] RDMA/uverbs: Declare local function static and add brackets to sizeof Commit 57520751445b ("IB/SA: Add OPA path record type") introduced new local function __ib_copy_path_rec_to_user, but didn't limit its scope. This produces the following sparse warning: drivers/infiniband/core/uverbs_marshall.c:99:6: warning: symbol '__ib_copy_path_rec_to_user' was not declared. Should it be static? In addition, it used sizeof ... notations instead of sizeof(...), which is correct in C, but a little bit misleading. Let's change it too. Fixes: 57520751445b ("IB/SA: Add OPA path record type") Reviewed-by: Yuval Shaia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_marshall.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index 8b9587fe2303..94fd989c9060 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -96,11 +96,11 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst, } EXPORT_SYMBOL(ib_copy_qp_attr_to_user); -void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, - struct sa_path_rec *src) +static void __ib_copy_path_rec_to_user(struct ib_user_path_rec *dst, + struct sa_path_rec *src) { - memcpy(dst->dgid, src->dgid.raw, sizeof src->dgid); - memcpy(dst->sgid, src->sgid.raw, sizeof src->sgid); + memcpy(dst->dgid, src->dgid.raw, sizeof(src->dgid)); + memcpy(dst->sgid, src->sgid.raw, sizeof(src->sgid)); dst->dlid = htons(ntohl(sa_path_get_dlid(src))); dst->slid = htons(ntohl(sa_path_get_slid(src))); From 53376fedb9da54c0d3b0bd3a6edcbeb681692909 Mon Sep 17 00:00:00 2001 From: Qing Huang Date: Thu, 18 May 2017 16:33:53 -0700 Subject: [PATCH 177/249] RDMA/core: not to set page dirty bit if it's already set. This change will optimize kernel memory deregistration operations. __ib_umem_release() used to call set_page_dirty_lock() against every writable page in its memory region. Its purpose is to keep data synced between CPU and DMA device when swapping happens after mem deregistration ops. Now we choose not to set page dirty bit if it's already set by kernel prior to calling __ib_umem_release(). This reduces memory deregistration time by half or even more when we ran application simulation test program. Signed-off-by: Qing Huang Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 3dbf811d3c51..21e60b1e2ff4 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -58,7 +58,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { page = sg_page(sg); - if (umem->writable && dirty) + if (!PageDirty(page) && umem->writable && dirty) set_page_dirty_lock(page); put_page(page); } From 79bb5b7ee1776a244484a1be6671d89fbd7c0c9f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 21 May 2017 19:08:09 +0300 Subject: [PATCH 178/249] RDMA/umem: Fix missing mmap_sem in get umem ODP call Add mmap_sem lock around VMA inspection in ib_umem_odp_get(). Fixes: 0008b84ea9af ('IB/umem: Add support to huge ODP') Signed-off-by: Artemy Kovalyov Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem_odp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 0780b1afefa9..8c4ec564e495 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -321,11 +321,15 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, struct vm_area_struct *vma; struct hstate *h; + down_read(&mm->mmap_sem); vma = find_vma(mm, ib_umem_start(umem)); - if (!vma || !is_vm_hugetlb_page(vma)) + if (!vma || !is_vm_hugetlb_page(vma)) { + up_read(&mm->mmap_sem); return -EINVAL; + } h = hstate_vma(vma); umem->page_shift = huge_page_shift(h); + up_read(&mm->mmap_sem); umem->hugetlb = 1; } else { umem->hugetlb = 0; From d3957b86a40612826ef935f474b31359d66cbdca Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 21 May 2017 19:09:54 +0300 Subject: [PATCH 179/249] RDMA/SA: Fix kernel panic in CMA request handler flow Commit 9fdca4da4d8c (IB/SA: Split struct sa_path_rec based on IB and ROCE specific fields) moved the service_id to be specific attribute for IB and OPA SA Path Record, and thus wasn't assigned for RoCE. This caused to the following kernel panic in the CMA request handler flow: [ 27.074594] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 27.074731] IP: __radix_tree_lookup+0x1d/0xe0 ... [ 27.075356] Workqueue: ib_cm cm_work_handler [ib_cm] [ 27.075401] task: ffff88022e3b8000 task.stack: ffffc90001298000 [ 27.075449] RIP: 0010:__radix_tree_lookup+0x1d/0xe0 ... [ 27.075979] Call Trace: [ 27.076015] radix_tree_lookup+0xd/0x10 [ 27.076055] cma_ps_find+0x59/0x70 [rdma_cm] [ 27.076097] cma_id_from_event+0xd2/0x470 [rdma_cm] [ 27.076144] ? ib_init_ah_from_path+0x39a/0x590 [ib_core] [ 27.076193] cma_req_handler+0x25/0x480 [rdma_cm] [ 27.076237] cm_process_work+0x25/0x120 [ib_cm] [ 27.076280] ? cm_get_bth_pkey.isra.62+0x3c/0xa0 [ib_cm] [ 27.076350] cm_req_handler+0xb03/0xd40 [ib_cm] [ 27.076430] ? sched_clock_cpu+0x11/0xb0 [ 27.076478] cm_work_handler+0x194/0x1588 [ib_cm] [ 27.076525] process_one_work+0x160/0x410 [ 27.076565] worker_thread+0x137/0x4a0 [ 27.076614] kthread+0x112/0x150 [ 27.076684] ? max_active_store+0x60/0x60 [ 27.077642] ? kthread_park+0x90/0x90 [ 27.078530] ret_from_fork+0x2c/0x40 This patch moves it back to the common SA Path Record structure and removes the redundant setter and getter. Tested on Connect-IB and Connect-X4 in Infiniband and RoCE respectively. Fixes: 9fdca4da4d8c (IB/SA: Split struct sa_path_rec based on IB ands ROCE specific fields) Signed-off-by: Majd Dibbiny Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- drivers/infiniband/core/cma.c | 13 ++++++------- drivers/infiniband/core/sa_query.c | 6 +++--- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_sa.h | 25 +++---------------------- 5 files changed, 15 insertions(+), 35 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1844770f3ae8..2b4d613a3474 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1429,7 +1429,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, primary_path->packet_life_time = cm_req_get_primary_local_ack_timeout(req_msg); primary_path->packet_life_time -= (primary_path->packet_life_time > 0); - sa_path_set_service_id(primary_path, req_msg->service_id); + primary_path->service_id = req_msg->service_id; if (req_msg->alt_local_lid) { alt_path->dgid = req_msg->alt_local_gid; @@ -1452,7 +1452,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, alt_path->packet_life_time = cm_req_get_alt_local_ack_timeout(req_msg); alt_path->packet_life_time -= (alt_path->packet_life_time > 0); - sa_path_set_service_id(alt_path, req_msg->service_id); + alt_path->service_id = req_msg->service_id; } } diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 91b7a2fe5a55..31bb82d8ecd7 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1140,7 +1140,7 @@ static void cma_save_ib_info(struct sockaddr *src_addr, ib->sib_pkey = path->pkey; ib->sib_flowinfo = path->flow_label; memcpy(&ib->sib_addr, &path->sgid, 16); - ib->sib_sid = sa_path_get_service_id(path); + ib->sib_sid = path->service_id; ib->sib_scope_id = 0; } else { ib->sib_pkey = listen_ib->sib_pkey; @@ -1274,8 +1274,7 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event, memcpy(&req->local_gid, &req_param->primary_path->sgid, sizeof(req->local_gid)); req->has_gid = true; - req->service_id = - sa_path_get_service_id(req_param->primary_path); + req->service_id = req_param->primary_path->service_id; req->pkey = be16_to_cpu(req_param->primary_path->pkey); if (req->pkey != req_param->bth_pkey) pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" @@ -1827,7 +1826,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct rdma_route *rt; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; struct sa_path_rec *path = ib_event->param.req_rcvd.primary_path; - const __be64 service_id = sa_path_get_service_id(path); + const __be64 service_id = + ib_event->param.req_rcvd.primary_path->service_id; int ret; id = rdma_create_id(listen_id->route.addr.dev_addr.net, @@ -2345,9 +2345,8 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; - sa_path_set_service_id(&path_rec, - rdma_get_service_id(&id_priv->id, - cma_dst_addr(id_priv))); + path_rec.service_id = rdma_get_service_id(&id_priv->id, + cma_dst_addr(id_priv)); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index e335b09c022e..fb7aec4047c8 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -194,7 +194,7 @@ static u32 tid; .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { - { PATH_REC_FIELD(ib.service_id), + { PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, @@ -296,7 +296,7 @@ static const struct ib_field path_rec_table[] = { .field_name = "sa_path_rec:" #field static const struct ib_field opa_path_rec_table[] = { - { OPA_PATH_REC_FIELD(opa.service_id), + { OPA_PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, @@ -774,7 +774,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Now build the attributes */ if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) { - val64 = be64_to_cpu(sa_path_get_service_id(sa_rec)); + val64 = be64_to_cpu(sa_rec->service_id); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID, sizeof(val64), &val64); } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 4306285fb155..2354c742caa1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -320,7 +320,7 @@ static int srp_new_cm_id(struct srp_rdma_ch *ch) ch->path.sgid = target->sgid; ch->path.dgid = target->orig_dgid; ch->path.pkey = target->pkey; - sa_path_set_service_id(&ch->path, target->service_id); + ch->path.service_id = target->service_id; return 0; } diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index f5f70e345318..355b81f4242d 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -158,7 +158,6 @@ enum sa_path_rec_type { }; struct sa_path_rec_ib { - __be64 service_id; __be16 dlid; __be16 slid; u8 raw_traffic; @@ -174,7 +173,6 @@ struct sa_path_rec_roce { }; struct sa_path_rec_opa { - __be64 service_id; __be32 dlid; __be32 slid; u8 raw_traffic; @@ -189,6 +187,7 @@ struct sa_path_rec_opa { struct sa_path_rec { union ib_gid dgid; union ib_gid sgid; + __be64 service_id; /* reserved */ __be32 flow_label; u8 hop_limit; @@ -262,7 +261,7 @@ static inline void path_conv_opa_to_ib(struct sa_path_rec *ib, ib->ib.dlid = htons(ntohl(opa->opa.dlid)); ib->ib.slid = htons(ntohl(opa->opa.slid)); } - ib->ib.service_id = opa->opa.service_id; + ib->service_id = opa->service_id; ib->ib.raw_traffic = opa->opa.raw_traffic; } @@ -281,7 +280,7 @@ static inline void path_conv_ib_to_opa(struct sa_path_rec *opa, } opa->opa.slid = slid; opa->opa.dlid = dlid; - opa->opa.service_id = ib->ib.service_id; + opa->service_id = ib->service_id; opa->opa.raw_traffic = ib->ib.raw_traffic; } @@ -591,15 +590,6 @@ static inline bool sa_path_is_roce(struct sa_path_rec *rec) (rec->rec_type == SA_PATH_REC_TYPE_ROCE_V2)); } -static inline void sa_path_set_service_id(struct sa_path_rec *rec, - __be64 service_id) -{ - if (rec->rec_type == SA_PATH_REC_TYPE_IB) - rec->ib.service_id = service_id; - else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - rec->opa.service_id = service_id; -} - static inline void sa_path_set_slid(struct sa_path_rec *rec, __be32 slid) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) @@ -625,15 +615,6 @@ static inline void sa_path_set_raw_traffic(struct sa_path_rec *rec, rec->opa.raw_traffic = raw_traffic; } -static inline __be64 sa_path_get_service_id(struct sa_path_rec *rec) -{ - if (rec->rec_type == SA_PATH_REC_TYPE_IB) - return rec->ib.service_id; - else if (rec->rec_type == SA_PATH_REC_TYPE_OPA) - return rec->opa.service_id; - return 0; -} - static inline __be32 sa_path_get_slid(struct sa_path_rec *rec) { if (rec->rec_type == SA_PATH_REC_TYPE_IB) From c4beedb8a914af9c8c1b6e67c753adf411e05160 Mon Sep 17 00:00:00 2001 From: Andi Shyti Date: Thu, 1 Jun 2017 22:05:40 -0700 Subject: [PATCH 180/249] Input: tm2-touchkey - use LEN_ON as boolean value instead of LED_FULL Commit 4e552c8cb5bc ("leds: add LED_ON brightness as boolean value") has introduced the LED_ON enumeration value that can be used instead of LED_FULL which has more of a linear value. Because the tm2-touchscreen doesn't have brightness levels, but it's a simple on/off led, use LED_ON instead of LED_FULL. Signed-off-by: Andi Shyti Reviewed-by: Jaechul Lee Tested-by: Jaechul Lee Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tm2-touchkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/tm2-touchkey.c b/drivers/input/keyboard/tm2-touchkey.c index 485900f953e0..abc266e40e17 100644 --- a/drivers/input/keyboard/tm2-touchkey.c +++ b/drivers/input/keyboard/tm2-touchkey.c @@ -213,7 +213,7 @@ static int tm2_touchkey_probe(struct i2c_client *client, /* led device */ touchkey->led_dev.name = TM2_TOUCHKEY_DEV_NAME; touchkey->led_dev.brightness = LED_FULL; - touchkey->led_dev.max_brightness = LED_FULL; + touchkey->led_dev.max_brightness = LED_ON; touchkey->led_dev.brightness_set = tm2_touchkey_led_brightness_set; error = devm_led_classdev_register(&client->dev, &touchkey->led_dev); From ebcdaee4cebb3a8d0d702ab5e9392373672ec1de Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 1 Jun 2017 19:22:01 +0100 Subject: [PATCH 181/249] dmaengine: pl330: fix warning in pl330_remove When removing a device with less than 9 IRQs (AMBA_NR_IRQS), we'll get a big WARN_ON from devres.c because pl330_remove calls devm_free_irqs for unallocated irqs. Similarly to pl330_probe, check that IRQ number is present before calling devm_free_irq. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 8b0da7fa520d..e90a7a0d760a 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -3008,7 +3008,8 @@ static int pl330_remove(struct amba_device *adev) for (i = 0; i < AMBA_NR_IRQS; i++) { irq = adev->irq[i]; - devm_free_irq(&adev->dev, irq, pl330); + if (irq) + devm_free_irq(&adev->dev, irq, pl330); } dma_async_device_unregister(&pl330->ddma); From 715e944f8a7a1059db5d61ebf197e6f348d747e2 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Tue, 30 May 2017 22:39:46 +0200 Subject: [PATCH 182/249] HID: asus: Stop underlying hardware on remove We are missing a call to hid_hw_stop() on the remove hook. Among other things this is causing an Oops when (re-)starting GNOME / upowerd / ... after the module has been already rmmod-ed. Signed-off-by: Carlo Caione Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 101ab2e63d18..a6268f2f7408 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -537,6 +537,8 @@ static void asus_remove(struct hid_device *hdev) drvdata->kbd_backlight->removed = true; cancel_work_sync(&drvdata->kbd_backlight->work); } + + hid_hw_stop(hdev); } static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc, From cb7cf772d83d2d4e6995c5bb9e0fb59aea8f7080 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 26 May 2017 17:40:02 +0100 Subject: [PATCH 183/249] ARM64/ACPI: Fix BAD_MADT_GICC_ENTRY() macro implementation The BAD_MADT_GICC_ENTRY() macro checks if a GICC MADT entry passes muster from an ACPI specification standpoint. Current macro detects the MADT GICC entry length through ACPI firmware version (it changed from 76 to 80 bytes in the transition from ACPI 5.1 to ACPI 6.0 specification) but always uses (erroneously) the ACPICA (latest) struct (ie struct acpi_madt_generic_interrupt - that is 80-bytes long) length to check if the current GICC entry memory record exceeds the MADT table end in memory as defined by the MADT table header itself, which may result in false negatives depending on the ACPI firmware version and how the MADT entries are laid out in memory (ie on ACPI 5.1 firmware MADT GICC entries are 76 bytes long, so by adding 80 to a GICC entry start address in memory the resulting address may well be past the actual MADT end, triggering a false negative). Fix the BAD_MADT_GICC_ENTRY() macro by reshuffling the condition checks and update them to always use the firmware version specific MADT GICC entry length in order to carry out boundary checks. Fixes: b6cfb277378e ("ACPI / ARM64: add BAD_MADT_GICC_ENTRY() macro") Reported-by: Julien Grall Acked-by: Will Deacon Acked-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Cc: Julien Grall Cc: Hanjun Guo Cc: Al Stone Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 0e99978da3f0..59cca1d6ec54 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -23,9 +23,9 @@ #define ACPI_MADT_GICC_LENGTH \ (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) -#define BAD_MADT_GICC_ENTRY(entry, end) \ - (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ - (entry)->header.length != ACPI_MADT_GICC_LENGTH) +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \ + (unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end)) /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI From a7306c3436e9c8e584a4b9fad5f3dc91be2a6076 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 2 Jun 2017 14:46:11 -0700 Subject: [PATCH 184/249] ksm: prevent crash after write_protect_page fails "err" needs to be left set to -EFAULT if split_huge_page succeeds. Otherwise if "err" gets clobbered with zero and write_protect_page fails, try_to_merge_one_page() will succeed instead of returning -EFAULT and then try_to_merge_with_ksm_page() will continue thinking kpage is a PageKsm when in fact it's still an anonymous page. Eventually it'll crash in page_add_anon_rmap. This has been reproduced on Fedora25 kernel but I can reproduce with upstream too. The bug was introduced in commit f765f540598a ("ksm: prepare to new THP semantics") introduced in v4.5. page:fffff67546ce1cc0 count:4 mapcount:2 mapping:ffffa094551e36e1 index:0x7f0f46673 flags: 0x2ffffc0004007c(referenced|uptodate|dirty|lru|active|swapbacked) page dumped because: VM_BUG_ON_PAGE(!PageLocked(page)) page->mem_cgroup:ffffa09674bf0000 ------------[ cut here ]------------ kernel BUG at mm/rmap.c:1222! CPU: 1 PID: 76 Comm: ksmd Not tainted 4.9.3-200.fc25.x86_64 #1 RIP: do_page_add_anon_rmap+0x1c4/0x240 Call Trace: page_add_anon_rmap+0x18/0x20 try_to_merge_with_ksm_page+0x50b/0x780 ksm_scan_thread+0x1211/0x1410 ? prepare_to_wait_event+0x100/0x100 ? try_to_merge_with_ksm_page+0x780/0x780 kthread+0xd9/0xf0 ? kthread_park+0x60/0x60 ret_from_fork+0x25/0x30 Fixes: f765f54059 ("ksm: prepare to new THP semantics") Link: http://lkml.kernel.org/r/20170513131040.21732-1-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Reported-by: Federico Simoncelli Acked-by: Kirill A. Shutemov Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/ksm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index d9fc0e456128..216184af0e19 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1028,8 +1028,7 @@ static int try_to_merge_one_page(struct vm_area_struct *vma, goto out; if (PageTransCompound(page)) { - err = split_huge_page(page); - if (err) + if (split_huge_page(page)) goto out_unlock; } From 1bde33e051233f0ed93a8bc67137016ab38c3d2d Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Jun 2017 14:46:13 -0700 Subject: [PATCH 185/249] include/linux/gfp.h: fix ___GFP_NOLOCKDEP value Igor Stoppa has noticed that __GFP_NOLOCKDEP can use a lower bit. At the time commit 7e7844226f10 ("lockdep: allow to disable reclaim lockup detection") was written we still had __GFP_OTHER_NODE but I have removed it in commit 41b6167e8f74 ("mm: get rid of __GFP_OTHER_NODE") and forgot to lower the bit value. The current value is outside of __GFP_BITS_SHIFT so it cannot be used actually. Fixes: 7e7844226f10 ("lockdep: allow to disable reclaim lockup detection") Signed-off-by: Michal Hocko Reported-by: Igor Stoppa Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2b1a44f5bdb6..a89d37e8b387 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -41,7 +41,7 @@ struct vm_area_struct; #define ___GFP_WRITE 0x800000u #define ___GFP_KSWAPD_RECLAIM 0x1000000u #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x4000000u +#define ___GFP_NOLOCKDEP 0x2000000u #else #define ___GFP_NOLOCKDEP 0 #endif From 60b0a8c3d2480f3b57282b47b7cae7ee71c48635 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Fri, 2 Jun 2017 14:46:16 -0700 Subject: [PATCH 186/249] frv: declare jiffies to be located in the .data section Commit 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") removed a section specification from the jiffies declaration that caused conflicts on some platforms. Unfortunately this change broke the build for frv: kernel/built-in.o: In function `__do_softirq': (.text+0x6460): relocation truncated to fit: R_FRV_GPREL12 against symbol `jiffies' defined in *ABS* section in .tmp_vmlinux1 kernel/built-in.o: In function `__do_softirq': (.text+0x6574): relocation truncated to fit: R_FRV_GPREL12 against symbol `jiffies' defined in *ABS* section in .tmp_vmlinux1 kernel/built-in.o: In function `pwq_activate_delayed_work': workqueue.c:(.text+0x15b9c): relocation truncated to fit: R_FRV_GPREL12 against symbol `jiffies' defined in *ABS* section in .tmp_vmlinux1 ... Add __jiffy_arch_data to the declaration of jiffies and use it on frv to include the section specification. For all other platforms __jiffy_arch_data (currently) has no effect. Fixes: 7c30f352c852 ("jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp") Link: http://lkml.kernel.org/r/20170516221333.177280-1-mka@chromium.org Signed-off-by: Matthias Kaehlcke Reported-by: Guenter Roeck Tested-by: Guenter Roeck Reviewed-by: David Howells Cc: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/include/asm/timex.h | 6 ++++++ include/linux/jiffies.h | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/frv/include/asm/timex.h b/arch/frv/include/asm/timex.h index a89bddefdacf..139093fab326 100644 --- a/arch/frv/include/asm/timex.h +++ b/arch/frv/include/asm/timex.h @@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void) #define vxtime_lock() do {} while (0) #define vxtime_unlock() do {} while (0) +/* This attribute is used in include/linux/jiffies.h alongside with + * __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp + * for frv does not contain another section specification. + */ +#define __jiffy_arch_data __attribute__((__section__(".data"))) + #endif diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 36872fbb815d..734377ad42e9 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -64,13 +64,17 @@ extern int register_refined_jiffies(long clock_tick_rate); /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) +#ifndef __jiffy_arch_data +#define __jiffy_arch_data +#endif + /* * The 64-bit value is not atomic - you MUST NOT read it * without sampling the sequence number in jiffies_lock. * get_jiffies_64() will do this for you as appropriate. */ extern u64 __cacheline_aligned_in_smp jiffies_64; -extern unsigned long volatile __cacheline_aligned_in_smp jiffies; +extern unsigned long volatile __cacheline_aligned_in_smp __jiffy_arch_data jiffies; #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); From 4f4f2ba9c531b3d7cee293dd3654ba3b86e7d220 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Jun 2017 14:46:19 -0700 Subject: [PATCH 187/249] mm: clarify why we want kmalloc before falling backto vmallock While converting drm_[cm]alloc* helpers to kvmalloc* variants Chris Wilson has wondered why we want to try kmalloc before vmalloc fallback even for larger allocations requests. Let's clarify that one larger physically contiguous block is less likely to fragment memory than many scattered pages which can prevent more large blocks from being created. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170517080932.21423-1-mhocko@kernel.org Signed-off-by: Michal Hocko Suggested-by: Chris Wilson Reviewed-by: Chris Wilson Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/util.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index 464df3489903..26be6407abd7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -357,8 +357,11 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); /* - * Make sure that larger requests are not too disruptive - no OOM - * killer and no allocation failure warnings as we have a fallback + * We want to attempt a large physically contiguous block first because + * it is less likely to fragment multiple larger blocks and therefore + * contribute to a long term fragmentation less than vmalloc fallback. + * However make sure that larger requests are not too disruptive - no + * OOM killer and no allocation failure warnings as we have a fallback. */ if (size > PAGE_SIZE) { kmalloc_flags |= __GFP_NOWARN; From 57ddfdaa9a72fe726a44d26d99db31bc137dbeff Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 2 Jun 2017 14:46:22 -0700 Subject: [PATCH 188/249] initramfs: fix disabling of initramfs (and its compression) Commit db2aa7fd15e8 ("initramfs: allow again choice of the embedded initram compression algorithm") introduced the possibility to select the initramfs compression algorithm from Kconfig and while this is a nice feature it broke the use case described below. Here is what my build system does: - kernel is initially configured not to have an initramfs included - build the user space root file system - re-configure the kernel to have an initramfs included (CONFIG_INITRAMFS_SOURCE="/path/to/romfs") and set relevant CONFIG_INITRAMFS options, in my case, no compression option (CONFIG_INITRAMFS_COMPRESSION_NONE) - kernel is re-built with these options -> kernel+initramfs image is copied - kernel is re-built again without these options -> kernel image is copied Building a kernel without an initramfs means setting this option: CONFIG_INITRAMFS_SOURCE="" (and this one only) whereas building a kernel with an initramfs means setting these options: CONFIG_INITRAMFS_SOURCE="/home/fainelli/work/uclinux-rootfs/romfs /home/fainelli/work/uclinux-rootfs/misc/initramfs.dev" CONFIG_INITRAMFS_ROOT_UID=1000 CONFIG_INITRAMFS_ROOT_GID=1000 CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_INITRAMFS_COMPRESSION="" Commit db2aa7fd15e85 ("initramfs: allow again choice of the embedded initram compression algorithm") is problematic because CONFIG_INITRAMFS_COMPRESSION which is used to determine the initramfs_data.cpio extension/compression is a string, and due to how Kconfig works it will evaluate in order, how to assign it. Setting CONFIG_INITRAMFS_COMPRESSION_NONE with CONFIG_INITRAMFS_SOURCE="" cannot possibly work (because of the depends on INITRAMFS_SOURCE!="" imposed on CONFIG_INITRAMFS_COMPRESSION ) yet we still get CONFIG_INITRAMFS_COMPRESSION assigned to ".gz" because CONFIG_RD_GZIP=y is set in my kernel, even when there is no initramfs being built. So we basically end-up generating two initramfs_data.cpio* files, one without extension, and one with .gz. This causes usr/Makefile to track usr/initramfs_data.cpio.gz, and not usr/initramfs_data.cpio anymore, that is also largely problematic after 9e3596b0c6539e ("kbuild: initramfs cleanup, set target from Kconfig") because we used to track all possible initramfs_data files in the $(targets) variable before that commit. The end result is that the kernel with an initramfs clearly does not contain what we expect it to, it has a stale initramfs_data.cpio file built into it, and we keep re-generating an initramfs_data.cpio.gz file which is not the one that we want to include in the kernel image proper. The fix consists in hiding CONFIG_INITRAMFS_COMPRESSION when CONFIG_INITRAMFS_SOURCE="". This puts us back in a state to the pre-4.10 behavior where we can properly disable and re-enable initramfs within the same kernel .config file, and be in control of what CONFIG_INITRAMFS_COMPRESSION is set to. Fixes: db2aa7fd15e8 ("initramfs: allow again choice of the embedded initram compression algorithm") Fixes: 9e3596b0c653 ("kbuild: initramfs cleanup, set target from Kconfig") Link: http://lkml.kernel.org/r/20170521033337.6197-1-f.fainelli@gmail.com Signed-off-by: Florian Fainelli Acked-by: Nicholas Piggin Cc: P J P Cc: Paul Bolle Cc: Michal Marek Cc: Daniel Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- usr/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/usr/Kconfig b/usr/Kconfig index c0c48507e44e..ad0543e21760 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -220,6 +220,7 @@ config INITRAMFS_COMPRESSION_LZ4 endchoice config INITRAMFS_COMPRESSION + depends on INITRAMFS_SOURCE!="" string default "" if INITRAMFS_COMPRESSION_NONE default ".gz" if INITRAMFS_COMPRESSION_GZIP From 478fe3037b2278d276d4cd9cd0ab06c4cb2e9b32 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 2 Jun 2017 14:46:25 -0700 Subject: [PATCH 189/249] slub/memcg: cure the brainless abuse of sysfs attributes memcg_propagate_slab_attrs() abuses the sysfs attribute file functions to propagate settings from the root kmem_cache to a newly created kmem_cache. It does that with: attr->show(root, buf); attr->store(new, buf, strlen(bug); Aside of being a lazy and absurd hackery this is broken because it does not check the return value of the show() function. Some of the show() functions return 0 w/o touching the buffer. That means in such a case the store function is called with the stale content of the previous show(). That causes nonsense like invoking kmem_cache_shrink() on a newly created kmem_cache. In the worst case it would cause handing in an uninitialized buffer. This should be rewritten proper by adding a propagate() callback to those slub_attributes which must be propagated and avoid that insane conversion to and from ASCII, but that's too large for a hot fix. Check at least the return value of the show() function, so calling store() with stale content is prevented. Steven said: "It can cause a deadlock with get_online_cpus() that has been uncovered by recent cpu hotplug and lockdep changes that Thomas and Peter have been doing. Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(cpu_hotplug.lock); lock(slab_mutex); lock(cpu_hotplug.lock); lock(slab_mutex); *** DEADLOCK ***" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1705201244540.2255@nanos Signed-off-by: Thomas Gleixner Reported-by: Steven Rostedt Acked-by: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Christoph Hellwig Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 57e5156f02be..7449593fca72 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5512,6 +5512,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) char mbuf[64]; char *buf; struct slab_attribute *attr = to_slab_attr(slab_attrs[i]); + ssize_t len; if (!attr || !attr->store || !attr->show) continue; @@ -5536,8 +5537,9 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) buf = buffer; } - attr->show(root_cache, buf); - attr->store(s, buf, strlen(buf)); + len = attr->show(root_cache, buf); + if (len > 0) + attr->store(s, buf, len); } if (buffer) From ff5a20169b98d84ad8d7f99f27c5ebbb008204d6 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 2 Jun 2017 14:46:28 -0700 Subject: [PATCH 190/249] pcmcia: remove left-over %Z format Commit 5b5e0928f742 ("lib/vsprintf.c: remove %Z support") removed some usages of format %Z but forgot "%.2Zx". This makes clang 4.0 reports a -Wformat-extra-args warning because it does not know about %Z. Replace %Z with %z. Link: http://lkml.kernel.org/r/20170520090946.22562-1-nicolas.iooss_linux@m4x.org Signed-off-by: Nicolas Iooss Cc: Harald Welte Cc: Alexey Dobriyan Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/pcmcia/cm4040_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index d4dbd8d8e524..382c864814d9 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -374,7 +374,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, rc = write_sync_reg(SCR_HOST_TO_READER_START, dev); if (rc <= 0) { - DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc); + DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) return rc; @@ -387,7 +387,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, for (i = 0; i < bytes_to_write; i++) { rc = wait_for_bulk_out_ready(dev); if (rc <= 0) { - DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2Zx\n", + DEBUGP(5, dev, "wait_for_bulk_out_ready rc=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) @@ -403,7 +403,7 @@ static ssize_t cm4040_write(struct file *filp, const char __user *buf, rc = write_sync_reg(SCR_HOST_TO_READER_DONE, dev); if (rc <= 0) { - DEBUGP(5, dev, "write_sync_reg c=%.2Zx\n", rc); + DEBUGP(5, dev, "write_sync_reg c=%.2zx\n", rc); DEBUGP(2, dev, "<- cm4040_write (failed)\n"); if (rc == -ERESTARTSYS) return rc; From c288983dddf714216428774e022ad78f48dd8cb1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 2 Jun 2017 14:46:31 -0700 Subject: [PATCH 191/249] mm/page_alloc.c: make sure OOM victim can try allocations with no watermarks once Roman Gushchin has reported that the OOM killer can trivially selects next OOM victim when a thread doing memory allocation from page fault path was selected as first OOM victim. allocate invoked oom-killer: gfp_mask=0x14280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=(null), order=0, oom_score_adj=0 allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: oom_kill_process+0x219/0x3e0 out_of_memory+0x11d/0x480 __alloc_pages_slowpath+0xc84/0xd40 __alloc_pages_nodemask+0x245/0x260 alloc_pages_vma+0xa2/0x270 __handle_mm_fault+0xca9/0x10c0 handle_mm_fault+0xf3/0x210 __do_page_fault+0x240/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... Out of memory: Kill process 492 (allocate) score 899 or sacrifice child Killed process 492 (allocate) total-vm:2052368kB, anon-rss:1894576kB, file-rss:4kB, shmem-rss:0kB allocate: page allocation failure: order:0, mode:0x14280ca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), nodemask=(null) allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: __alloc_pages_slowpath+0xd32/0xd40 __alloc_pages_nodemask+0x245/0x260 alloc_pages_vma+0xa2/0x270 __handle_mm_fault+0xca9/0x10c0 handle_mm_fault+0xf3/0x210 __do_page_fault+0x240/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... oom_reaper: reaped process 492 (allocate), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB ... allocate invoked oom-killer: gfp_mask=0x0(), nodemask=(null), order=0, oom_score_adj=0 allocate cpuset=/ mems_allowed=0 CPU: 1 PID: 492 Comm: allocate Not tainted 4.12.0-rc1-mm1+ #181 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 Call Trace: oom_kill_process+0x219/0x3e0 out_of_memory+0x11d/0x480 pagefault_out_of_memory+0x68/0x80 mm_fault_error+0x8f/0x190 ? handle_mm_fault+0xf3/0x210 __do_page_fault+0x4b2/0x4e0 trace_do_page_fault+0x37/0xe0 do_async_page_fault+0x19/0x70 async_page_fault+0x28/0x30 ... Out of memory: Kill process 233 (firewalld) score 10 or sacrifice child Killed process 233 (firewalld) total-vm:246076kB, anon-rss:20956kB, file-rss:0kB, shmem-rss:0kB There is a race window that the OOM reaper completes reclaiming the first victim's memory while nothing but mutex_trylock() prevents the first victim from calling out_of_memory() from pagefault_out_of_memory() after memory allocation for page fault path failed due to being selected as an OOM victim. This is a side effect of commit 9a67f6488eca926f ("mm: consolidate GFP_NOFAIL checks in the allocator slowpath") because that commit silently changed the behavior from /* Avoid allocations with no watermarks from looping endlessly */ to /* * Give up allocations without trying memory reserves if selected * as an OOM victim */ in __alloc_pages_slowpath() by moving the location to check TIF_MEMDIE flag. I have noticed this change but I didn't post a patch because I thought it is an acceptable change other than noise by warn_alloc() because !__GFP_NOFAIL allocations are allowed to fail. But we overlooked that failing memory allocation from page fault path makes difference due to the race window explained above. While it might be possible to add a check to pagefault_out_of_memory() that prevents the first victim from calling out_of_memory() or remove out_of_memory() from pagefault_out_of_memory(), changing pagefault_out_of_memory() does not suppress noise by warn_alloc() when allocating thread was selected as an OOM victim. There is little point with printing similar backtraces and memory information from both out_of_memory() and warn_alloc(). Instead, if we guarantee that current thread can try allocations with no watermarks once when current thread looping inside __alloc_pages_slowpath() was selected as an OOM victim, we can follow "who can use memory reserves" rules and suppress noise by warn_alloc() and prevent memory allocations from page fault path from calling pagefault_out_of_memory(). If we take the comment literally, this patch would do - if (test_thread_flag(TIF_MEMDIE)) - goto nopage; + if (alloc_flags == ALLOC_NO_WATERMARKS || (gfp_mask & __GFP_NOMEMALLOC)) + goto nopage; because gfp_pfmemalloc_allowed() returns false if __GFP_NOMEMALLOC is given. But if I recall correctly (I couldn't find the message), the condition is meant to apply to only OOM victims despite the comment. Therefore, this patch preserves TIF_MEMDIE check. Fixes: 9a67f6488eca926f ("mm: consolidate GFP_NOFAIL checks in the allocator slowpath") Link: http://lkml.kernel.org/r/201705192112.IAF69238.OQOHSJLFOFFMtV@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: Roman Gushchin Tested-by: Roman Gushchin Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: [4.11] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f9e450c6b6e4..b7a6f583a373 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3870,7 +3870,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, goto got_pg; /* Avoid allocations with no watermarks from looping endlessly */ - if (test_thread_flag(TIF_MEMDIE)) + if (test_thread_flag(TIF_MEMDIE) && + (alloc_flags == ALLOC_NO_WATERMARKS || + (gfp_mask & __GFP_NOMEMALLOC))) goto nopage; /* Retry as long as the OOM killer is making progress */ From d0f0931de936a0a468d7e59284d39581c16d3a73 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 2 Jun 2017 14:46:34 -0700 Subject: [PATCH 192/249] mm: avoid spurious 'bad pmd' warning messages When the pmd_devmap() checks were added by 5c7fb56e5e3f ("mm, dax: dax-pmd vs thp-pmd vs hugetlbfs-pmd") to add better support for DAX huge pages, they were all added to the end of if() statements after existing pmd_trans_huge() checks. So, things like: - if (pmd_trans_huge(*pmd)) + if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) When further checks were added after pmd_trans_unstable() checks by commit 7267ec008b5c ("mm: postpone page table allocation until we have page to map") they were also added at the end of the conditional: + if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) This ordering is fine for pmd_trans_huge(), but doesn't work for pmd_trans_unstable(). This is because DAX huge pages trip the bad_pmd() check inside of pmd_none_or_trans_huge_or_clear_bad() (called by pmd_trans_unstable()), which prints out a warning and returns 1. So, we do end up doing the right thing, but only after spamming dmesg with suspicious looking messages: mm/pgtable-generic.c:39: bad pmd ffff8808daa49b88(84000001006000a5) Reorder these checks in a helper so that pmd_devmap() is checked first, avoiding the error messages, and add a comment explaining why the ordering is important. Fixes: commit 7267ec008b5c ("mm: postpone page table allocation until we have page to map") Link: http://lkml.kernel.org/r/20170522215749.23516-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Pawel Lebioda Cc: "Darrick J. Wong" Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: Matthew Wilcox Cc: "Kirill A . Shutemov" Cc: Dave Jiang Cc: Xiong Zhou Cc: Eryu Guan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 6ff5d729ded0..2e65df1831d9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3029,6 +3029,17 @@ static int __do_fault(struct vm_fault *vmf) return ret; } +/* + * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. + * If we check pmd_trans_unstable() first we will trip the bad_pmd() check + * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly + * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. + */ +static int pmd_devmap_trans_unstable(pmd_t *pmd) +{ + return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); +} + static int pte_alloc_one_map(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; @@ -3052,18 +3063,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf) map_pte: /* * If a huge pmd materialized under us just retry later. Use - * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd - * didn't become pmd_trans_huge under us and then back to pmd_none, as - * a result of MADV_DONTNEED running immediately after a huge pmd fault - * in a different thread of this mm, in turn leading to a misleading - * pmd_trans_huge() retval. All we have to ensure is that it is a - * regular pmd that we can walk with pte_offset_map() and we can do that - * through an atomic read in C, which is what pmd_trans_unstable() - * provides. + * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of + * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge + * under us and then back to pmd_none, as a result of MADV_DONTNEED + * running immediately after a huge pmd fault in a different thread of + * this mm, in turn leading to a misleading pmd_trans_huge() retval. + * All we have to ensure is that it is a regular pmd that we can walk + * with pte_offset_map() and we can do that through an atomic read in + * C, which is what pmd_trans_unstable() provides. */ - if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) + if (pmd_devmap_trans_unstable(vmf->pmd)) return VM_FAULT_NOPAGE; + /* + * At this point we know that our vmf->pmd points to a page of ptes + * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() + * for the duration of the fault. If a racing MADV_DONTNEED runs and + * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still + * be valid and we will re-check to make sure the vmf->pte isn't + * pte_none() under vmf->ptl protection when we return to + * alloc_set_pte(). + */ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); return 0; @@ -3690,7 +3710,7 @@ static int handle_pte_fault(struct vm_fault *vmf) vmf->pte = NULL; } else { /* See comment in pte_alloc_one_map() */ - if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) + if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* * A regular pmd is established and it can't morph into a huge From e2093926a098a8ccf0f1d10f6df8dad452cb28d3 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 2 Jun 2017 14:46:37 -0700 Subject: [PATCH 193/249] dax: fix race between colliding PMD & PTE entries We currently have two related PMD vs PTE races in the DAX code. These can both be easily triggered by having two threads reading and writing simultaneously to the same private mapping, with the key being that private mapping reads can be handled with PMDs but private mapping writes are always handled with PTEs so that we can COW. Here is the first race: CPU 0 CPU 1 (private mapping write) __handle_mm_fault() create_huge_pmd() - FALLBACK handle_pte_fault() passes check for pmd_devmap() (private mapping read) __handle_mm_fault() create_huge_pmd() dax_iomap_pmd_fault() inserts PMD dax_iomap_pte_fault() does a PTE fault, but we already have a DAX PMD installed in our page tables at this spot. Here's the second race: CPU 0 CPU 1 (private mapping read) __handle_mm_fault() passes check for pmd_none() create_huge_pmd() dax_iomap_pmd_fault() inserts PMD (private mapping write) __handle_mm_fault() create_huge_pmd() - FALLBACK (private mapping read) __handle_mm_fault() passes check for pmd_none() create_huge_pmd() handle_pte_fault() dax_iomap_pte_fault() inserts PTE dax_iomap_pmd_fault() inserts PMD, but we already have a PTE at this spot. The core of the issue is that while there is isolation between faults to the same range in the DAX fault handlers via our DAX entry locking, there is no isolation between faults in the code in mm/memory.c. This means for instance that this code in __handle_mm_fault() can run: if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) { ret = create_huge_pmd(&vmf); But by the time we actually get to run the fault handler called by create_huge_pmd(), the PMD is no longer pmd_none() because a racing PTE fault has installed a normal PMD here as a parent. This is the cause of the 2nd race. The first race is similar - there is the following check in handle_pte_fault(): } else { /* See comment in pte_alloc_one_map() */ if (pmd_devmap(*vmf->pmd) || pmd_trans_unstable(vmf->pmd)) return 0; So if a pmd_devmap() PMD (a DAX PMD) has been installed at vmf->pmd, we will bail and retry the fault. This is correct, but there is nothing preventing the PMD from being installed after this check but before we actually get to the DAX PTE fault handlers. In my testing these races result in the following types of errors: BUG: Bad rss-counter state mm:ffff8800a817d280 idx:1 val:1 BUG: non-zero nr_ptes on freeing mm: 15 Fix this issue by having the DAX fault handlers verify that it is safe to continue their fault after they have taken an entry lock to block other racing faults. [ross.zwisler@linux.intel.com: improve fix for colliding PMD & PTE entries] Link: http://lkml.kernel.org/r/20170526195932.32178-1-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/20170522215749.23516-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: Pawel Lebioda Reviewed-by: Jan Kara Cc: "Darrick J. Wong" Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Hansen Cc: Matthew Wilcox Cc: "Kirill A . Shutemov" Cc: Pawel Lebioda Cc: Dave Jiang Cc: Xiong Zhou Cc: Eryu Guan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index c22eaf162f95..2a6889b3585f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1154,6 +1154,17 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, goto out; } + /* + * It is possible, particularly with mixed reads & writes to private + * mappings, that we have raced with a PMD fault that overlaps with + * the PTE we need to set up. If so just return and the fault will be + * retried. + */ + if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) { + vmf_ret = VM_FAULT_NOPAGE; + goto unlock_entry; + } + /* * Note that we don't bother to use iomap_apply here: DAX required * the file system block size to be equal the page size, which means @@ -1397,6 +1408,18 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, if (IS_ERR(entry)) goto fallback; + /* + * It is possible, particularly with mixed reads & writes to private + * mappings, that we have raced with a PTE fault that overlaps with + * the PMD we need to set up. If so just return and the fault will be + * retried. + */ + if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd) && + !pmd_devmap(*vmf->pmd)) { + result = 0; + goto unlock_entry; + } + /* * Note that we don't use iomap_apply here. We aren't doing I/O, only * setting up a mapping, so really we're using iomap_begin() as a way From 30809f559a0d348c2dfd7ab05e9a451e2384962e Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 2 Jun 2017 14:46:40 -0700 Subject: [PATCH 194/249] mm/migrate: fix refcount handling when !hugepage_migration_supported() On failing to migrate a page, soft_offline_huge_page() performs the necessary update to the hugepage ref-count. But when !hugepage_migration_supported() , unmap_and_move_hugepage() also decrements the page ref-count for the hugepage. The combined behaviour leaves the ref-count in an inconsistent state. This leads to soft lockups when running the overcommitted hugepage test from mce-tests suite. Soft offlining pfn 0x83ed600 at process virtual address 0x400000000000 soft offline: 0x83ed600: migration failed 1, type 1fffc00000008008 (uptodate|head) INFO: rcu_preempt detected stalls on CPUs/tasks: Tasks blocked on level-0 rcu_node (CPUs 0-7): P2715 (detected by 7, t=5254 jiffies, g=963, c=962, q=321) thugetlb_overco R running task 0 2715 2685 0x00000008 Call trace: dump_backtrace+0x0/0x268 show_stack+0x24/0x30 sched_show_task+0x134/0x180 rcu_print_detail_task_stall_rnp+0x54/0x7c rcu_check_callbacks+0xa74/0xb08 update_process_times+0x34/0x60 tick_sched_handle.isra.7+0x38/0x70 tick_sched_timer+0x4c/0x98 __hrtimer_run_queues+0xc0/0x300 hrtimer_interrupt+0xac/0x228 arch_timer_handler_phys+0x3c/0x50 handle_percpu_devid_irq+0x8c/0x290 generic_handle_irq+0x34/0x50 __handle_domain_irq+0x68/0xc0 gic_handle_irq+0x5c/0xb0 Address this by changing the putback_active_hugepage() in soft_offline_huge_page() to putback_movable_pages(). This only triggers on systems that enable memory failure handling (ARCH_SUPPORTS_MEMORY_FAILURE) but not hugepage migration (!ARCH_ENABLE_HUGEPAGE_MIGRATION). I imagine this wasn't triggered as there aren't many systems running this configuration. [akpm@linux-foundation.org: remove dead comment, per Naoya] Link: http://lkml.kernel.org/r/20170525135146.32011-1-punit.agrawal@arm.com Reported-by: Manoj Iyer Tested-by: Manoj Iyer Suggested-by: Naoya Horiguchi Signed-off-by: Punit Agrawal Cc: Joonsoo Kim Cc: Wanpeng Li Cc: Christoph Lameter Cc: Mel Gorman Cc: [3.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2527dfeddb00..342fac9ba89b 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1595,12 +1595,8 @@ static int soft_offline_huge_page(struct page *page, int flags) if (ret) { pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n", pfn, ret, page->flags, &page->flags); - /* - * We know that soft_offline_huge_page() tries to migrate - * only one hugepage pointed to by hpage, so we need not - * run through the pagelist here. - */ - putback_active_hugepage(hpage); + if (!list_empty(&pagelist)) + putback_movable_pages(&pagelist); if (ret > 0) ret = -EIO; } else { From 70feee0e1ef331b22cc51f383d532a0d043fbdcc Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 2 Jun 2017 14:46:43 -0700 Subject: [PATCH 195/249] mlock: fix mlock count can not decrease in race condition Kefeng reported that when running the follow test, the mlock count in meminfo will increase permanently: [1] testcase linux:~ # cat test_mlockal grep Mlocked /proc/meminfo for j in `seq 0 10` do for i in `seq 4 15` do ./p_mlockall >> log & done sleep 0.2 done # wait some time to let mlock counter decrease and 5s may not enough sleep 5 grep Mlocked /proc/meminfo linux:~ # cat p_mlockall.c #include #include #include #define SPACE_LEN 4096 int main(int argc, char ** argv) { int ret; void *adr = malloc(SPACE_LEN); if (!adr) return -1; ret = mlockall(MCL_CURRENT | MCL_FUTURE); printf("mlcokall ret = %d\n", ret); ret = munlockall(); printf("munlcokall ret = %d\n", ret); free(adr); return 0; } In __munlock_pagevec() we should decrement NR_MLOCK for each page where we clear the PageMlocked flag. Commit 1ebb7cc6a583 ("mm: munlock: batch NR_MLOCK zone state updates") has introduced a bug where we don't decrement NR_MLOCK for pages where we clear the flag, but fail to isolate them from the lru list (e.g. when the pages are on some other cpu's percpu pagevec). Since PageMlocked stays cleared, the NR_MLOCK accounting gets permanently disrupted by this. Fix it by counting the number of page whose PageMlock flag is cleared. Fixes: 1ebb7cc6a583 (" mm: munlock: batch NR_MLOCK zone state updates") Link: http://lkml.kernel.org/r/1495678405-54569-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Yisheng Xie Reported-by: Kefeng Wang Tested-by: Kefeng Wang Cc: Vlastimil Babka Cc: Joern Engel Cc: Mel Gorman Cc: Michel Lespinasse Cc: Hugh Dickins Cc: Rik van Riel Cc: Johannes Weiner Cc: Michal Hocko Cc: Xishi Qiu Cc: zhongjiang Cc: Hanjun Guo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mlock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index c483c5c20b4b..b562b5523a65 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -284,7 +284,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked; + int delta_munlocked = -nr; struct pagevec pvec_putback; int pgrescued = 0; @@ -304,6 +304,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) continue; else __munlock_isolation_failed(page); + } else { + delta_munlocked++; } /* @@ -315,7 +317,6 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; } - delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(zone_lru_lock(zone)); From 9a291a7c9428155e8e623e4a3989f8be47134df5 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Jun 2017 14:46:46 -0700 Subject: [PATCH 196/249] mm/hugetlb: report -EHWPOISON not -EFAULT when FOLL_HWPOISON is specified KVM uses get_user_pages() to resolve its stage2 faults. KVM sets the FOLL_HWPOISON flag causing faultin_page() to return -EHWPOISON when it finds a VM_FAULT_HWPOISON. KVM handles these hwpoison pages as a special case. (check_user_page_hwpoison()) When huge pages are involved, this doesn't work so well. get_user_pages() calls follow_hugetlb_page(), which stops early if it receives VM_FAULT_HWPOISON from hugetlb_fault(), eventually returning -EFAULT to the caller. The step to map this to -EHWPOISON based on the FOLL_ flags is missing. The hwpoison special case is skipped, and -EFAULT is returned to user-space, causing Qemu or kvmtool to exit. Instead, move this VM_FAULT_ to errno mapping code into a header file and use it from faultin_page() and follow_hugetlb_page(). With this, KVM works as expected. This isn't a problem for arm64 today as we haven't enabled MEMORY_FAILURE, but I can't see any reason this doesn't happen on x86 too, so I think this should be a fix. This doesn't apply earlier than stable's v4.11.1 due to all sorts of cleanup. [james.morse@arm.com: add vm_fault_to_errno() call to faultin_page()] suggested. Link: http://lkml.kernel.org/r/20170525171035.16359-1-james.morse@arm.com [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170524160900.28786-1-james.morse@arm.com Signed-off-by: James Morse Acked-by: Punit Agrawal Acked-by: Naoya Horiguchi Cc: "Kirill A . Shutemov" Cc: [4.11.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 11 +++++++++++ mm/gup.c | 20 ++++++++------------ mm/hugetlb.c | 5 +++++ 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7cb17c6b97de..b892e95d4929 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2327,6 +2327,17 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ +static inline int vm_fault_to_errno(int vm_fault, int foll_flags) +{ + if (vm_fault & VM_FAULT_OOM) + return -ENOMEM; + if (vm_fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) + return (foll_flags & FOLL_HWPOISON) ? -EHWPOISON : -EFAULT; + if (vm_fault & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) + return -EFAULT; + return 0; +} + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, diff --git a/mm/gup.c b/mm/gup.c index d9e6fddcc51f..b3c7214d710d 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -407,12 +407,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, ret = handle_mm_fault(vma, address, fault_flags); if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) - return *flags & FOLL_HWPOISON ? -EHWPOISON : -EFAULT; - if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) - return -EFAULT; + int err = vm_fault_to_errno(ret, *flags); + + if (err) + return err; BUG(); } @@ -723,12 +721,10 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, ret = handle_mm_fault(vma, address, fault_flags); major |= ret & VM_FAULT_MAJOR; if (ret & VM_FAULT_ERROR) { - if (ret & VM_FAULT_OOM) - return -ENOMEM; - if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) - return -EHWPOISON; - if (ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) - return -EFAULT; + int err = vm_fault_to_errno(ret, 0); + + if (err) + return err; BUG(); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e5828875f7bb..3eedb187e549 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4170,6 +4170,11 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, } ret = hugetlb_fault(mm, vma, vaddr, fault_flags); if (ret & VM_FAULT_ERROR) { + int err = vm_fault_to_errno(ret, flags); + + if (err) + return err; + remainder = 0; break; } From 864b9a393dcb5aed09b8fd31b9bbda0fdda99374 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 2 Jun 2017 14:46:49 -0700 Subject: [PATCH 197/249] mm: consider memblock reservations for deferred memory initialization sizing We have seen an early OOM killer invocation on ppc64 systems with crashkernel=4096M: kthreadd invoked oom-killer: gfp_mask=0x16040c0(GFP_KERNEL|__GFP_COMP|__GFP_NOTRACK), nodemask=7, order=0, oom_score_adj=0 kthreadd cpuset=/ mems_allowed=7 CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.4.68-1.gd7fe927-default #1 Call Trace: dump_stack+0xb0/0xf0 (unreliable) dump_header+0xb0/0x258 out_of_memory+0x5f0/0x640 __alloc_pages_nodemask+0xa8c/0xc80 kmem_getpages+0x84/0x1a0 fallback_alloc+0x2a4/0x320 kmem_cache_alloc_node+0xc0/0x2e0 copy_process.isra.25+0x260/0x1b30 _do_fork+0x94/0x470 kernel_thread+0x48/0x60 kthreadd+0x264/0x330 ret_from_kernel_thread+0x5c/0xa4 Mem-Info: active_anon:0 inactive_anon:0 isolated_anon:0 active_file:0 inactive_file:0 isolated_file:0 unevictable:0 dirty:0 writeback:0 unstable:0 slab_reclaimable:5 slab_unreclaimable:73 mapped:0 shmem:0 pagetables:0 bounce:0 free:0 free_pcp:0 free_cma:0 Node 7 DMA free:0kB min:0kB low:0kB high:0kB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB present:52428800kB managed:110016kB mlocked:0kB dirty:0kB writeback:0kB mapped:0kB shmem:0kB slab_reclaimable:320kB slab_unreclaimable:4672kB kernel_stack:1152kB pagetables:0kB unstable:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:0 all_unreclaimable? yes lowmem_reserve[]: 0 0 0 0 Node 7 DMA: 0*64kB 0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB 0*8192kB 0*16384kB = 0kB 0 total pagecache pages 0 pages in swap cache Swap cache stats: add 0, delete 0, find 0/0 Free swap = 0kB Total swap = 0kB 819200 pages RAM 0 pages HighMem/MovableOnly 817481 pages reserved 0 pages cma reserved 0 pages hwpoisoned the reason is that the managed memory is too low (only 110MB) while the rest of the the 50GB is still waiting for the deferred intialization to be done. update_defer_init estimates the initial memoty to initialize to 2GB at least but it doesn't consider any memory allocated in that range. In this particular case we've had Reserving 4096MB of memory at 128MB for crashkernel (System RAM: 51200MB) so the low 2GB is mostly depleted. Fix this by considering memblock allocations in the initial static initialization estimation. Move the max_initialise to reset_deferred_meminit and implement a simple memblock_reserved_memory helper which iterates all reserved blocks and sums the size of all that start below the given address. The cumulative size is than added on top of the initial estimation. This is still not ideal because reset_deferred_meminit doesn't consider holes and so reservation might be above the initial estimation whihch we ignore but let's make the logic simpler until we really need to handle more complicated cases. Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Link: http://lkml.kernel.org/r/20170531104010.GI27783@dhcp22.suse.cz Signed-off-by: Michal Hocko Acked-by: Mel Gorman Tested-by: Srikar Dronamraju Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 8 ++++++++ include/linux/mmzone.h | 1 + mm/memblock.c | 23 +++++++++++++++++++++++ mm/page_alloc.c | 33 ++++++++++++++++++++++----------- 4 files changed, 54 insertions(+), 11 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 4ce24a376262..8098695e5d8d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -425,12 +425,20 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) } #endif +extern unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, + phys_addr_t end_addr); #else static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align) { return 0; } +static inline unsigned long memblock_reserved_memory_within(phys_addr_t start_addr, + phys_addr_t end_addr) +{ + return 0; +} + #endif /* CONFIG_HAVE_MEMBLOCK */ #endif /* __KERNEL__ */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ebaccd4e7d8c..ef6a13b7bd3e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -678,6 +678,7 @@ typedef struct pglist_data { * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; + unsigned long static_init_size; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/memblock.c b/mm/memblock.c index b049c9b2dba8..7b8a5db76a2f 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1739,6 +1739,29 @@ static void __init_memblock memblock_dump(struct memblock_type *type) } } +extern unsigned long __init_memblock +memblock_reserved_memory_within(phys_addr_t start_addr, phys_addr_t end_addr) +{ + struct memblock_region *rgn; + unsigned long size = 0; + int idx; + + for_each_memblock_type((&memblock.reserved), rgn) { + phys_addr_t start, end; + + if (rgn->base + rgn->size < start_addr) + continue; + if (rgn->base > end_addr) + continue; + + start = rgn->base; + end = start + rgn->size; + size += end - start; + } + + return size; +} + void __init_memblock __memblock_dump_all(void) { pr_info("MEMBLOCK configuration:\n"); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b7a6f583a373..2302f250d6b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -292,6 +292,26 @@ int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT static inline void reset_deferred_meminit(pg_data_t *pgdat) { + unsigned long max_initialise; + unsigned long reserved_lowmem; + + /* + * Initialise at least 2G of a node but also take into account that + * two large system hashes that can take up 1GB for 0.25TB/node. + */ + max_initialise = max(2UL << (30 - PAGE_SHIFT), + (pgdat->node_spanned_pages >> 8)); + + /* + * Compensate the all the memblock reservations (e.g. crash kernel) + * from the initial estimation to make sure we will initialize enough + * memory to boot. + */ + reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, + pgdat->node_start_pfn + max_initialise); + max_initialise += reserved_lowmem; + + pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); pgdat->first_deferred_pfn = ULONG_MAX; } @@ -314,20 +334,11 @@ static inline bool update_defer_init(pg_data_t *pgdat, unsigned long pfn, unsigned long zone_end, unsigned long *nr_initialised) { - unsigned long max_initialise; - /* Always populate low zones for address-contrained allocations */ if (zone_end < pgdat_end_pfn(pgdat)) return true; - /* - * Initialise at least 2G of a node but also take into account that - * two large system hashes that can take up 1GB for 0.25TB/node. - */ - max_initialise = max(2UL << (30 - PAGE_SHIFT), - (pgdat->node_spanned_pages >> 8)); - (*nr_initialised)++; - if ((*nr_initialised > max_initialise) && + if ((*nr_initialised > pgdat->static_init_size) && (pfn & (PAGES_PER_SECTION - 1)) == 0) { pgdat->first_deferred_pfn = pfn; return false; @@ -6138,7 +6149,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, /* pg_data_t should be reset to zero when it's allocated */ WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx); - reset_deferred_meminit(pgdat); pgdat->node_id = nid; pgdat->node_start_pfn = node_start_pfn; pgdat->per_cpu_nodestats = NULL; @@ -6160,6 +6170,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, (unsigned long)pgdat->node_mem_map); #endif + reset_deferred_meminit(pgdat); free_area_init_core(pgdat); } From d6c9708737c2107c38bd75f133d14d5801b8d6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Fri, 2 Jun 2017 14:46:51 -0700 Subject: [PATCH 198/249] scripts/gdb: make lx-dmesg command work (reliably) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lx-dmesg needs access to the log_buf symbol from printk.c. Unfortunately, the symbol log_buf also exists in BPF's verifier.c and hence gdb can pick one or the other. If it happens to pick BPF's log_buf, lx-dmesg doesn't work: (gdb) lx-dmesg Python Exception Cannot access memory at address 0x0: Error occurred in Python command: Cannot access memory at address 0x0 (gdb) p log_buf $15 = 0x0 Luckily, GDB has a way to deal with this, see https://sourceware.org/gdb/onlinedocs/gdb/Symbols.html (gdb) info variables ^log_buf$ All variables matching regular expression "^log_buf$": File /kernel/bpf/verifier.c: static char *log_buf; File /kernel/printk/printk.c: static char *log_buf; (gdb) p 'verifier.c'::log_buf $1 = 0x0 (gdb) p 'printk.c'::log_buf $2 = 0x811a6aa0 <__log_buf> "" (gdb) p &log_buf $3 = (char **) 0x8120fe40 (gdb) p &'verifier.c'::log_buf $4 = (char **) 0x8120fe40 (gdb) p &'printk.c'::log_buf $5 = (char **) 0x8048b7d0 By being explicit about the location of the symbol, we can make lx-dmesg work again. While at it, do the same for the other symbols we need from printk.c Link: http://lkml.kernel.org/r/20170526112222.3414-1-git@andred.net Signed-off-by: André Draszik Tested-by: Kieran Bingham Acked-by: Jan Kiszka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/gdb/linux/dmesg.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index f9b92ece7834..5afd1098e33a 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -23,10 +23,11 @@ class LxDmesg(gdb.Command): super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA) def invoke(self, arg, from_tty): - log_buf_addr = int(str(gdb.parse_and_eval("log_buf")).split()[0], 16) - log_first_idx = int(gdb.parse_and_eval("log_first_idx")) - log_next_idx = int(gdb.parse_and_eval("log_next_idx")) - log_buf_len = int(gdb.parse_and_eval("log_buf_len")) + log_buf_addr = int(str(gdb.parse_and_eval( + "'printk.c'::log_buf")).split()[0], 16) + log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx")) + log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx")) + log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len")) inf = gdb.inferiors()[0] start = log_buf_addr + log_first_idx From 8d4b31376974855cd4fb176e5d63ba8e0407928e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Jun 2017 17:18:47 -0700 Subject: [PATCH 199/249] Input: axp20x-pek - only check for "INTCFD9" ACPI device on Cherry Trail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 9b13a4ca8d2c ("Input: axp20x-pek - do not register input device on some systems") added a check for the INTCFD9 ACPI device which also handles the powerbutton as on some systems the powerbutton is connected to both the PMIC, handled by axp20x-pek, and to a gpio on the SoC, handled by soc_button_array which attaches itself to the INTCFD9 ACPI device. Testing + comparing DSDTs has shown that this only happens on Cherry Trail devices with an AXP288 PMIC, the AXP288 PMIC is also used on Bay Trail devices but there the power button is only connected to the PMIC and not handled by soc_button_array. This means that the INTCFD9 check has caused a regression on Bay Trail devices, causing power-button presses to no longer be seen. This commit fixes this by limiting the check to devices where the ACPI node for the AXP288 contains a _HRV (hardware revision) attribute with a value of 3 which indicates we are dealing with a Cherry Trail platform. Fixes: 9b13a4ca8d2c ("Input: axp20x-pek - do not register input ...") Reported-by: Сергей Трусов Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/misc/axp20x-pek.c | 43 +++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index f11807db6979..c0a763a70a00 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -256,6 +256,41 @@ static int axp20x_pek_probe_input_device(struct axp20x_pek *axp20x_pek, return 0; } +#ifdef CONFIG_ACPI +static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek, + struct platform_device *pdev) +{ + unsigned long long hrv = 0; + acpi_status status; + + if (IS_ENABLED(CONFIG_INPUT_SOC_BUTTON_ARRAY) && + axp20x_pek->axp20x->variant == AXP288_ID) { + status = acpi_evaluate_integer(ACPI_HANDLE(pdev->dev.parent), + "_HRV", NULL, &hrv); + if (ACPI_FAILURE(status)) + dev_err(&pdev->dev, "Failed to get PMIC hardware revision\n"); + + /* + * On Cherry Trail platforms (hrv == 3), do not register the + * input device if there is an "INTCFD9" gpio + * button ACPI device, as that handles the power button too, + * and otherwise we end up reporting all presses twice. + */ + if (hrv == 3 && acpi_dev_found("INTCFD9")) + return false; + + } + + return true; +} +#else +static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek, + struct platform_device *pdev) +{ + return true; +} +#endif + static int axp20x_pek_probe(struct platform_device *pdev) { struct axp20x_pek *axp20x_pek; @@ -268,13 +303,7 @@ static int axp20x_pek_probe(struct platform_device *pdev) axp20x_pek->axp20x = dev_get_drvdata(pdev->dev.parent); - /* - * Do not register the input device if there is an "INTCFD9" - * gpio button ACPI device, that handles the power button too, - * and otherwise we end up reporting all presses twice. - */ - if (!acpi_dev_found("INTCFD9") || - !IS_ENABLED(CONFIG_INPUT_SOC_BUTTON_ARRAY)) { + if (axp20x_pek_should_register_input(axp20x_pek, pdev)) { error = axp20x_pek_probe_input_device(axp20x_pek, pdev); if (error) return error; From 0fd5f221093870d93edb696f6903b058c4d75411 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 2 Jun 2017 17:50:22 -0700 Subject: [PATCH 200/249] Input: axp20x-pek - switch to acpi_dev_present and check for ACPI0011 too acpi_dev_found checks that there is a matching ACPI node, but it may be disabled (_STA method returns 0) in which case the soc_button_array driver will not bind to it and axp20x-pek should handle the power-button. This commit switches from acpi_dev_found to acpi_dev_present to avoid not registering an input-dev for the powerbutton when there is a disabled PNP0C40 device. The ACPI-6.0 standard defines a standard gpio button device using the ACPI0011 HID replacing the custom PNP0C40 gpio device, many newer devices define both PNP0C40 and ACPI0011 devices enabling one or the other depending on whether the BIOS thinks it is going to boot Android or Windows. This commit adds a check for the ACPI0011 device, so that if either device is present *and* enabled we don't register an input-dev for the powerbutton. Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/misc/axp20x-pek.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/input/misc/axp20x-pek.c b/drivers/input/misc/axp20x-pek.c index c0a763a70a00..400869e61a06 100644 --- a/drivers/input/misc/axp20x-pek.c +++ b/drivers/input/misc/axp20x-pek.c @@ -272,11 +272,12 @@ static bool axp20x_pek_should_register_input(struct axp20x_pek *axp20x_pek, /* * On Cherry Trail platforms (hrv == 3), do not register the - * input device if there is an "INTCFD9" gpio + * input device if there is an "INTCFD9" or "ACPI0011" gpio * button ACPI device, as that handles the power button too, * and otherwise we end up reporting all presses twice. */ - if (hrv == 3 && acpi_dev_found("INTCFD9")) + if (hrv == 3 && (acpi_dev_present("INTCFD9", NULL, -1) || + acpi_dev_present("ACPI0011", NULL, -1))) return false; } From 4d58e7329fdd87d37f04b319d6a5165aaff5d75c Mon Sep 17 00:00:00 2001 From: Stefan Schaeckeler Date: Fri, 2 Jun 2017 12:42:08 -0700 Subject: [PATCH 201/249] hwmon: (aspeed-pwm-tacho) Call of_node_put() on a node not claimed Call of_node_put() on a node claimed with of_node_get() or by any other means such as for_each_child_of_node(). Signed-off-by: Stefan Schaeckeler Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Signed-off-by: Guenter Roeck --- drivers/hwmon/aspeed-pwm-tacho.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index 12b716b70ead..cc15bedb3415 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -808,7 +808,6 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev) if (ret) return ret; } - of_node_put(np); priv->groups[0] = &pwm_dev_group; priv->groups[1] = &fan_dev_group; From 5f348fa35a28b19cf7ee1eaa420757341a724c86 Mon Sep 17 00:00:00 2001 From: Stefan Schaeckeler Date: Fri, 2 Jun 2017 12:43:28 -0700 Subject: [PATCH 202/249] hwmon: (aspeed-pwm-tacho) make fan/pwm names start with index 1 Make fan and pwm names in sysfs start with index 1 in accordance to Documentation/hwmon/sysfs-interface conventions. Current implementation starts with index 0, making tools such as sensors(1) skip the first fan. Signed-off-by: Stefan Schaeckeler Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") Signed-off-by: Guenter Roeck --- drivers/hwmon/aspeed-pwm-tacho.c | 52 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c index cc15bedb3415..9de13d626c68 100644 --- a/drivers/hwmon/aspeed-pwm-tacho.c +++ b/drivers/hwmon/aspeed-pwm-tacho.c @@ -597,24 +597,23 @@ static umode_t fan_dev_is_visible(struct kobject *kobj, return a->mode; } -static SENSOR_DEVICE_ATTR(pwm0, 0644, - show_pwm, set_pwm, 0); static SENSOR_DEVICE_ATTR(pwm1, 0644, - show_pwm, set_pwm, 1); + show_pwm, set_pwm, 0); static SENSOR_DEVICE_ATTR(pwm2, 0644, - show_pwm, set_pwm, 2); + show_pwm, set_pwm, 1); static SENSOR_DEVICE_ATTR(pwm3, 0644, - show_pwm, set_pwm, 3); + show_pwm, set_pwm, 2); static SENSOR_DEVICE_ATTR(pwm4, 0644, - show_pwm, set_pwm, 4); + show_pwm, set_pwm, 3); static SENSOR_DEVICE_ATTR(pwm5, 0644, - show_pwm, set_pwm, 5); + show_pwm, set_pwm, 4); static SENSOR_DEVICE_ATTR(pwm6, 0644, - show_pwm, set_pwm, 6); + show_pwm, set_pwm, 5); static SENSOR_DEVICE_ATTR(pwm7, 0644, + show_pwm, set_pwm, 6); +static SENSOR_DEVICE_ATTR(pwm8, 0644, show_pwm, set_pwm, 7); static struct attribute *pwm_dev_attrs[] = { - &sensor_dev_attr_pwm0.dev_attr.attr, &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_pwm2.dev_attr.attr, &sensor_dev_attr_pwm3.dev_attr.attr, @@ -622,6 +621,7 @@ static struct attribute *pwm_dev_attrs[] = { &sensor_dev_attr_pwm5.dev_attr.attr, &sensor_dev_attr_pwm6.dev_attr.attr, &sensor_dev_attr_pwm7.dev_attr.attr, + &sensor_dev_attr_pwm8.dev_attr.attr, NULL, }; @@ -630,40 +630,39 @@ static const struct attribute_group pwm_dev_group = { .is_visible = pwm_is_visible, }; -static SENSOR_DEVICE_ATTR(fan0_input, 0444, - show_rpm, NULL, 0); static SENSOR_DEVICE_ATTR(fan1_input, 0444, - show_rpm, NULL, 1); + show_rpm, NULL, 0); static SENSOR_DEVICE_ATTR(fan2_input, 0444, - show_rpm, NULL, 2); + show_rpm, NULL, 1); static SENSOR_DEVICE_ATTR(fan3_input, 0444, - show_rpm, NULL, 3); + show_rpm, NULL, 2); static SENSOR_DEVICE_ATTR(fan4_input, 0444, - show_rpm, NULL, 4); + show_rpm, NULL, 3); static SENSOR_DEVICE_ATTR(fan5_input, 0444, - show_rpm, NULL, 5); + show_rpm, NULL, 4); static SENSOR_DEVICE_ATTR(fan6_input, 0444, - show_rpm, NULL, 6); + show_rpm, NULL, 5); static SENSOR_DEVICE_ATTR(fan7_input, 0444, - show_rpm, NULL, 7); + show_rpm, NULL, 6); static SENSOR_DEVICE_ATTR(fan8_input, 0444, - show_rpm, NULL, 8); + show_rpm, NULL, 7); static SENSOR_DEVICE_ATTR(fan9_input, 0444, - show_rpm, NULL, 9); + show_rpm, NULL, 8); static SENSOR_DEVICE_ATTR(fan10_input, 0444, - show_rpm, NULL, 10); + show_rpm, NULL, 9); static SENSOR_DEVICE_ATTR(fan11_input, 0444, - show_rpm, NULL, 11); + show_rpm, NULL, 10); static SENSOR_DEVICE_ATTR(fan12_input, 0444, - show_rpm, NULL, 12); + show_rpm, NULL, 11); static SENSOR_DEVICE_ATTR(fan13_input, 0444, - show_rpm, NULL, 13); + show_rpm, NULL, 12); static SENSOR_DEVICE_ATTR(fan14_input, 0444, - show_rpm, NULL, 14); + show_rpm, NULL, 13); static SENSOR_DEVICE_ATTR(fan15_input, 0444, + show_rpm, NULL, 14); +static SENSOR_DEVICE_ATTR(fan16_input, 0444, show_rpm, NULL, 15); static struct attribute *fan_dev_attrs[] = { - &sensor_dev_attr_fan0_input.dev_attr.attr, &sensor_dev_attr_fan1_input.dev_attr.attr, &sensor_dev_attr_fan2_input.dev_attr.attr, &sensor_dev_attr_fan3_input.dev_attr.attr, @@ -679,6 +678,7 @@ static struct attribute *fan_dev_attrs[] = { &sensor_dev_attr_fan13_input.dev_attr.attr, &sensor_dev_attr_fan14_input.dev_attr.attr, &sensor_dev_attr_fan15_input.dev_attr.attr, + &sensor_dev_attr_fan16_input.dev_attr.attr, NULL }; From 4f253e1eb628f5adf7ca4f43aab4bbb1bfffa081 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 16 May 2017 12:18:11 +0200 Subject: [PATCH 203/249] nfs: Mark unnecessarily extern functions as static nfs_initialise_sb() and nfs_clone_super() are declared as extern even though they are used only in fs/nfs/super.c. Mark them as static. Also remove explicit 'inline' directive from nfs_initialise_sb() and leave it upto compiler to decide whether inlining is worth it. Signed-off-by: Jan Kara Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 -- fs/nfs/super.c | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e9b4c3320e37..3e24392f2caa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -398,7 +398,6 @@ extern struct file_system_type nfs4_referral_fs_type; bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); -void nfs_initialise_sb(struct super_block *); int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, @@ -458,7 +457,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ -void nfs_clone_super(struct super_block *, struct nfs_mount_info *); void nfs_umount_begin(struct super_block *); int nfs_statfs(struct dentry *, struct kstatfs *); int nfs_show_options(struct seq_file *, struct dentry *); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2f3822a4a7d5..eceb4eabb064 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2301,7 +2301,7 @@ EXPORT_SYMBOL_GPL(nfs_remount); /* * Initialise the common bits of the superblock */ -inline void nfs_initialise_sb(struct super_block *sb) +static void nfs_initialise_sb(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -2348,7 +2348,8 @@ EXPORT_SYMBOL_GPL(nfs_fill_super); /* * Finish setting up a cloned NFS2/3/4 superblock */ -void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) +static void nfs_clone_super(struct super_block *sb, + struct nfs_mount_info *mount_info) { const struct super_block *old_sb = mount_info->cloned->sb; struct nfs_server *server = NFS_SB(sb); From fc098af16b9ff6d470d779d8ddcfb2d91869045a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 4 Jun 2017 10:23:25 +0200 Subject: [PATCH 204/249] Revert "tty: fix port buffer locking" This reverts commit 925bb1ce47f429f69aad35876df7ecd8c53deb7e. It causes lots of warnings and problems so for now, let's just revert it. Reported-by: Reported-by: Russell King Reported-by: Sergey Senozhatsky Reported-by: Geert Uytterhoeven Reported-by: Jiri Slaby Reported-by: Andrey Konovalov Acked-by: Vegard Nossum Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 4fb3165384c4..6b137194069f 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -34,9 +34,7 @@ static int tty_port_default_receive_buf(struct tty_port *port, if (!disc) return 0; - mutex_lock(&tty->atomic_write_lock); ret = tty_ldisc_receive_buf(disc, p, (char *)f, count); - mutex_unlock(&tty->atomic_write_lock); tty_ldisc_deref(disc); From 239e250e4acbc0104d514307029c0839e834a51a Mon Sep 17 00:00:00 2001 From: Richard Narron Date: Sun, 4 Jun 2017 16:23:18 -0700 Subject: [PATCH 205/249] fs/ufs: Set UFS default maximum bytes per file This fixes a problem with reading files larger than 2GB from a UFS-2 file system: https://bugzilla.kernel.org/show_bug.cgi?id=195721 The incorrect UFS s_maxsize limit became a problem as of commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") which started using s_maxbytes to avoid a page index overflow in do_generic_file_read(). That caused files to be truncated on UFS-2 file systems because the default maximum file size is 2GB (MAX_NON_LFS) and UFS didn't update it. Here I simply increase the default to a common value used by other file systems. Signed-off-by: Richard Narron Cc: Al Viro Cc: Will B Cc: Theodore Ts'o Cc: # v4.9 and backports of c2a9737f45e2 Signed-off-by: Linus Torvalds --- fs/ufs/super.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 131b2b77c818..29ecaf739449 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -812,9 +812,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) uspi->s_dirblksize = UFS_SECTOR_SIZE; super_block_offset=UFS_SBLOCK; - /* Keep 2Gig file limit. Some UFS variants need to override - this but as I don't know which I'll let those in the know loosen - the rules */ + sb->s_maxbytes = MAX_LFS_FILESIZE; + switch (sbi->s_mount_opt & UFS_MOUNT_UFSTYPE) { case UFS_MOUNT_UFSTYPE_44BSD: UFSD("ufstype=44bsd\n"); From 3c2993b8c6143d8a5793746a54eba8f86f95240f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Jun 2017 16:47:43 -0700 Subject: [PATCH 206/249] Linux 4.12-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 470bd4d9513a..853ae9179af9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Fearless Coyote # *DOCUMENTATION* From 41bb26f8db3ad33b083e57eb9fc5828796110e77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 28 May 2017 08:56:46 +0300 Subject: [PATCH 207/249] uuid,afs: move struct uuid_v1 back into afs This essentially is a partial revert of commit ff548773 ("afs: Move UUID struct to linux/uuid.h") and moves struct uuid_v1 back into fs/afs as struct afs_uuid. It however keeps it as big endian structure so that we can use the normal uuid generation helpers when casting to/from struct afs_uuid. The V1 uuid intrepretation in struct form isn't really useful to the rest of the kernel, and not really compatible to it either, so move it back to AFS instead of polluting the global uuid.h. Signed-off-by: Christoph Hellwig Acked-by: David Howells --- fs/afs/cmservice.c | 16 ++++++++-------- fs/afs/internal.h | 11 ++++++++++- fs/afs/main.c | 2 +- include/linux/uuid.h | 24 ------------------------ 4 files changed, 19 insertions(+), 34 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 3062cceb5c2a..782d4d05a53b 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -350,7 +350,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { struct sockaddr_rxrpc srx; struct afs_server *server; - struct uuid_v1 *r; + struct afs_uuid *r; unsigned loop; __be32 *b; int ret; @@ -380,7 +380,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) } _debug("unmarshall UUID"); - call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); if (!call->request) return -ENOMEM; @@ -453,7 +453,7 @@ static int afs_deliver_cb_probe(struct afs_call *call) static void SRXAFSCB_ProbeUuid(struct work_struct *work) { struct afs_call *call = container_of(work, struct afs_call, work); - struct uuid_v1 *r = call->request; + struct afs_uuid *r = call->request; struct { __be32 match; @@ -476,7 +476,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) */ static int afs_deliver_cb_probe_uuid(struct afs_call *call) { - struct uuid_v1 *r; + struct afs_uuid *r; unsigned loop; __be32 *b; int ret; @@ -502,15 +502,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call) } _debug("unmarshall UUID"); - call->request = kmalloc(sizeof(struct uuid_v1), GFP_KERNEL); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); if (!call->request) return -ENOMEM; b = call->buffer; r = call->request; - r->time_low = b[0]; - r->time_mid = htons(ntohl(b[1])); - r->time_hi_and_version = htons(ntohl(b[2])); + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); r->clock_seq_hi_and_reserved = ntohl(b[3]); r->clock_seq_low = ntohl(b[4]); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 393672997cc2..4e2556606623 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -410,6 +410,15 @@ struct afs_interface { unsigned mtu; /* MTU of interface */ }; +struct afs_uuid { + __be32 time_low; /* low part of timestamp */ + __be16 time_mid; /* mid part of timestamp */ + __be16 time_hi_and_version; /* high part of timestamp and version */ + __u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ + __u8 clock_seq_low; /* clock seq low */ + __u8 node[6]; /* spatially unique node ID (MAC addr) */ +}; + /*****************************************************************************/ /* * cache.c @@ -544,7 +553,7 @@ extern int afs_drop_inode(struct inode *); * main.c */ extern struct workqueue_struct *afs_wq; -extern struct uuid_v1 afs_uuid; +extern struct afs_uuid afs_uuid; /* * misc.c diff --git a/fs/afs/main.c b/fs/afs/main.c index 51d7d17bca57..9944770849da 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -31,7 +31,7 @@ static char *rootcell; module_param(rootcell, charp, 0); MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); -struct uuid_v1 afs_uuid; +struct afs_uuid afs_uuid; struct workqueue_struct *afs_wq; /* diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 4dff73a89758..2d095fc60204 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -18,30 +18,6 @@ #include -/* - * V1 (time-based) UUID definition [RFC 4122]. - * - the timestamp is a 60-bit value, split 32/16/12, and goes in 100ns - * increments since midnight 15th October 1582 - * - add AFS_UUID_TO_UNIX_TIME to convert unix time in 100ns units to UUID - * time - * - the clock sequence is a 14-bit counter to avoid duplicate times - */ -struct uuid_v1 { - __be32 time_low; /* low part of timestamp */ - __be16 time_mid; /* mid part of timestamp */ - __be16 time_hi_and_version; /* high part of timestamp and version */ -#define UUID_TO_UNIX_TIME 0x01b21dd213814000ULL -#define UUID_TIMEHI_MASK 0x0fff -#define UUID_VERSION_TIME 0x1000 /* time-based UUID */ -#define UUID_VERSION_NAME 0x3000 /* name-based UUID */ -#define UUID_VERSION_RANDOM 0x4000 /* (pseudo-)random generated UUID */ - u8 clock_seq_hi_and_reserved; /* clock seq hi and variant */ -#define UUID_CLOCKHI_MASK 0x3f -#define UUID_VARIANT_STD 0x80 - u8 clock_seq_low; /* clock seq low */ - u8 node[6]; /* spatially unique node ID (MAC addr) */ -}; - /* * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") * not including trailing NUL. From dfd7487e991e5201808ff5e25fd1ffe5b1acbce8 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 May 2017 16:26:16 +0300 Subject: [PATCH 208/249] xfs: use uuid_copy() helper to abstract uuid_t uuid_t definition is about to change. Signed-off-by: Amir Goldstein Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Andy Shevchenko Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_inode_item.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 08cb7d1a4a3a..013cc78d7daf 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -834,9 +834,7 @@ xfs_inode_item_format_convert( in_f->ilf_dsize = in_f32->ilf_dsize; in_f->ilf_ino = in_f32->ilf_ino; /* copy biggest field of ilf_u */ - memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, - in_f32->ilf_u.ilfu_uuid.__u_bits, - sizeof(uuid_t)); + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f32->ilf_u.ilfu_uuid); in_f->ilf_blkno = in_f32->ilf_blkno; in_f->ilf_len = in_f32->ilf_len; in_f->ilf_boffset = in_f32->ilf_boffset; @@ -851,9 +849,7 @@ xfs_inode_item_format_convert( in_f->ilf_dsize = in_f64->ilf_dsize; in_f->ilf_ino = in_f64->ilf_ino; /* copy biggest field of ilf_u */ - memcpy(in_f->ilf_u.ilfu_uuid.__u_bits, - in_f64->ilf_u.ilfu_uuid.__u_bits, - sizeof(uuid_t)); + uuid_copy(&in_f->ilf_u.ilfu_uuid, &in_f64->ilf_u.ilfu_uuid); in_f->ilf_blkno = in_f64->ilf_blkno; in_f->ilf_len = in_f64->ilf_len; in_f->ilf_boffset = in_f64->ilf_boffset; From b1f359f9805c2a1ec0f57e08b74baf383eb4b6a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 May 2017 09:53:09 +0200 Subject: [PATCH 209/249] xfs: use uuid_be to implement the uuid_t type Use the generic Linux definition to implement our UUID type, this will allow using more generic infrastructure in the future. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Brian Foster Reviewed-by: Andy Shevchenko Reviewed-by: Darrick J. Wong --- fs/xfs/uuid.h | 4 ---- fs/xfs/xfs_linux.h | 3 +++ 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h index 104db0f3bed6..4f1441ba4fa5 100644 --- a/fs/xfs/uuid.h +++ b/fs/xfs/uuid.h @@ -18,10 +18,6 @@ #ifndef __XFS_SUPPORT_UUID_H__ #define __XFS_SUPPORT_UUID_H__ -typedef struct { - unsigned char __u_bits[16]; -} uuid_t; - extern int uuid_is_nil(uuid_t *uuid); extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 044fb0e15390..89ee5ec66837 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -19,6 +19,7 @@ #define __XFS_LINUX__ #include +#include /* * Kernel specific type declarations for XFS @@ -38,6 +39,8 @@ typedef __s64 xfs_daddr_t; /* type */ typedef __u32 xfs_dev_t; typedef __u32 xfs_nlink_t; +typedef uuid_be uuid_t; + #include "xfs_types.h" #include "kmem.h" From e6fd2093a85d3be874c47d4f43bea5bdf0770955 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 May 2017 16:26:20 +0300 Subject: [PATCH 210/249] md: namespace private helper names The md private helper uuid_equal() collides with a generic helper of the same name. Rename the md private helper to md_uuid_equal() and do the same for md_sb_equal(). Signed-off-by: Amir Goldstein Signed-off-by: Christoph Hellwig Reviewed-by: Shaohua Li Reviewed-by: Andy Shevchenko --- drivers/md/md.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 212a6777ff31..a50302e42cb5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -825,7 +825,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size) return -EINVAL; } -static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2) +static int md_uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2) { return sb1->set_uuid0 == sb2->set_uuid0 && sb1->set_uuid1 == sb2->set_uuid1 && @@ -833,7 +833,7 @@ static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2) sb1->set_uuid3 == sb2->set_uuid3; } -static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) +static int md_sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) { int ret; mdp_super_t *tmp1, *tmp2; @@ -1025,12 +1025,12 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor } else { __u64 ev1, ev2; mdp_super_t *refsb = page_address(refdev->sb_page); - if (!uuid_equal(refsb, sb)) { + if (!md_uuid_equal(refsb, sb)) { pr_warn("md: %s has different UUID to %s\n", b, bdevname(refdev->bdev,b2)); goto abort; } - if (!sb_equal(refsb, sb)) { + if (!md_sb_equal(refsb, sb)) { pr_warn("md: %s has same UUID but different superblock to %s\n", b, bdevname(refdev->bdev, b2)); goto abort; From 12ce5f8c5c56fcd027bb9d12d17d2628d7dd9882 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2017 16:40:52 +0200 Subject: [PATCH 211/249] nfsd: namespace-prefix uuid_parse Signed-off-by: Christoph Hellwig --- fs/nfsd/export.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index e71f11b1a180..3bc08c394a3f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -486,7 +486,7 @@ secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; } #endif static inline int -uuid_parse(char **mesg, char *buf, unsigned char **puuid) +nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid) { int len; @@ -586,7 +586,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (strcmp(buf, "fsloc") == 0) err = fsloc_parse(&mesg, buf, &exp.ex_fslocs); else if (strcmp(buf, "uuid") == 0) - err = uuid_parse(&mesg, buf, &exp.ex_uuid); + err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid); else if (strcmp(buf, "secinfo") == 0) err = secinfo_parse(&mesg, buf, &exp); else From 60927bc314363f91616c1f4577541c2a2e27aba3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:56:45 +0200 Subject: [PATCH 212/249] uuid: remove uuid_be defintions from the uapi header We don't use uuid_be and the UUID_BE constants in any uapi headers, so make them private to the kernel. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- include/linux/uuid.h | 15 +++++++++++++++ include/uapi/linux/uuid.h | 16 ---------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 2d095fc60204..30fb13018e29 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -18,6 +18,21 @@ #include +typedef struct { + __u8 b[16]; +} uuid_be; + +#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_be) \ +{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ + ((b) >> 8) & 0xff, (b) & 0xff, \ + ((c) >> 8) & 0xff, (c) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + +#define NULL_UUID_BE \ + UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + /* * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") * not including trailing NUL. diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h index 3738e5fb6a4d..0099756c4bac 100644 --- a/include/uapi/linux/uuid.h +++ b/include/uapi/linux/uuid.h @@ -24,10 +24,6 @@ typedef struct { __u8 b[16]; } uuid_le; -typedef struct { - __u8 b[16]; -} uuid_be; - #define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ ((uuid_le) \ {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ @@ -35,20 +31,8 @@ typedef struct { (c) & 0xff, ((c) >> 8) & 0xff, \ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) -#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ -((uuid_be) \ -{{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ - ((b) >> 8) & 0xff, (b) & 0xff, \ - ((c) >> 8) & 0xff, (c) & 0xff, \ - (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) - #define NULL_UUID_LE \ UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ 0x00, 0x00, 0x00, 0x00) -#define NULL_UUID_BE \ - UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x00, 0x00, 0x00) - - #endif /* _UAPI_LINUX_UUID_H_ */ From f9727a17db9bab71ddae91f74f11a8a2f9a0ece6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 10:02:48 +0200 Subject: [PATCH 213/249] uuid: rename uuid types Our "little endian" UUID really is a Wintel GUID, so rename it and its helpers such (guid_t). The big endian UUID is the only true one, so give it the name uuid_t. The uuid_le and uuid_be names are retained for now, but will hopefully go away soon. The exception to that are the _cmp helpers that will be replaced by better primitives ASAP and thus don't get the new names. Also the _to_bin helpers are named to match the better named uuid_parse routine in userspace. Also remove the existing typedef in XFS that's now been superceeded by the generic type name. Signed-off-by: Christoph Hellwig [andy: also update the UUID_LE/UUID_BE macros including fallout] Signed-off-by: Andy Shevchenko Reviewed-by: Amir Goldstein Reviewed-by: Darrick J. Wong Reviewed-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- fs/xfs/xfs_linux.h | 2 -- include/linux/uuid.h | 55 +++++++++++++++++++++++---------------- include/uapi/linux/uuid.h | 12 ++++++--- lib/test_uuid.c | 32 +++++++++++------------ lib/uuid.c | 30 ++++++++++----------- lib/vsprintf.c | 4 +-- 6 files changed, 73 insertions(+), 62 deletions(-) diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 89ee5ec66837..2c33d915e550 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -39,8 +39,6 @@ typedef __s64 xfs_daddr_t; /* type */ typedef __u32 xfs_dev_t; typedef __u32 xfs_nlink_t; -typedef uuid_be uuid_t; - #include "xfs_types.h" #include "kmem.h" diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 30fb13018e29..c2adb8046095 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -20,46 +20,55 @@ typedef struct { __u8 b[16]; -} uuid_be; +} uuid_t; -#define UUID_BE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ -((uuid_be) \ +#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((uuid_t) \ {{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ ((b) >> 8) & 0xff, (b) & 0xff, \ ((c) >> 8) & 0xff, (c) & 0xff, \ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) -#define NULL_UUID_BE \ - UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x00, 0x00, 0x00) - /* * The length of a UUID string ("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee") * not including trailing NUL. */ #define UUID_STRING_LEN 36 -static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) -{ - return memcmp(&u1, &u2, sizeof(uuid_le)); -} - -static inline int uuid_be_cmp(const uuid_be u1, const uuid_be u2) -{ - return memcmp(&u1, &u2, sizeof(uuid_be)); -} - void generate_random_uuid(unsigned char uuid[16]); -extern void uuid_le_gen(uuid_le *u); -extern void uuid_be_gen(uuid_be *u); +extern void guid_gen(guid_t *u); +extern void uuid_gen(uuid_t *u); bool __must_check uuid_is_valid(const char *uuid); -extern const u8 uuid_le_index[16]; -extern const u8 uuid_be_index[16]; +extern const u8 guid_index[16]; +extern const u8 uuid_index[16]; -int uuid_le_to_bin(const char *uuid, uuid_le *u); -int uuid_be_to_bin(const char *uuid, uuid_be *u); +int guid_parse(const char *uuid, guid_t *u); +int uuid_parse(const char *uuid, uuid_t *u); + +/* backwards compatibility, don't use in new code */ +typedef uuid_t uuid_be; +#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) +#define NULL_UUID_BE \ + UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00) + +#define uuid_le_gen(u) guid_gen(u) +#define uuid_be_gen(u) uuid_gen(u) +#define uuid_le_to_bin(guid, u) guid_parse(guid, u) +#define uuid_be_to_bin(uuid, u) uuid_parse(uuid, u) + +static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) +{ + return memcmp(&u1, &u2, sizeof(guid_t)); +} + +static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2) +{ + return memcmp(&u1, &u2, sizeof(uuid_t)); +} #endif diff --git a/include/uapi/linux/uuid.h b/include/uapi/linux/uuid.h index 0099756c4bac..8ef82f433877 100644 --- a/include/uapi/linux/uuid.h +++ b/include/uapi/linux/uuid.h @@ -22,17 +22,21 @@ typedef struct { __u8 b[16]; -} uuid_le; +} guid_t; -#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ -((uuid_le) \ +#define GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((guid_t) \ {{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ (b) & 0xff, ((b) >> 8) & 0xff, \ (c) & 0xff, ((c) >> 8) & 0xff, \ (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) +/* backwards compatibility, don't use in new code */ +typedef guid_t uuid_le; +#define UUID_LE(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ + GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) #define NULL_UUID_LE \ UUID_LE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, \ - 0x00, 0x00, 0x00, 0x00) + 0x00, 0x00, 0x00, 0x00) #endif /* _UAPI_LINUX_UUID_H_ */ diff --git a/lib/test_uuid.c b/lib/test_uuid.c index 547d3127a3cf..ff36f3240e90 100644 --- a/lib/test_uuid.c +++ b/lib/test_uuid.c @@ -11,25 +11,25 @@ struct test_uuid_data { const char *uuid; - uuid_le le; - uuid_be be; + guid_t le; + uuid_t be; }; static const struct test_uuid_data test_uuid_test_data[] = { { .uuid = "c33f4995-3701-450e-9fbf-206a2e98e576", - .le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), - .be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), + .le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), + .be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), }, { .uuid = "64b4371c-77c1-48f9-8221-29f054fc023b", - .le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), - .be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), + .le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), + .be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), }, { .uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84", - .le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), - .be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), + .le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), + .be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), }, }; @@ -61,13 +61,13 @@ static void __init test_uuid_failed(const char *prefix, bool wrong, bool be, static void __init test_uuid_test(const struct test_uuid_data *data) { - uuid_le le; - uuid_be be; + guid_t le; + uuid_t be; char buf[48]; /* LE */ total_tests++; - if (uuid_le_to_bin(data->uuid, &le)) + if (guid_parse(data->uuid, &le)) test_uuid_failed("conversion", false, false, data->uuid, NULL); total_tests++; @@ -78,7 +78,7 @@ static void __init test_uuid_test(const struct test_uuid_data *data) /* BE */ total_tests++; - if (uuid_be_to_bin(data->uuid, &be)) + if (uuid_parse(data->uuid, &be)) test_uuid_failed("conversion", false, true, data->uuid, NULL); total_tests++; @@ -90,17 +90,17 @@ static void __init test_uuid_test(const struct test_uuid_data *data) static void __init test_uuid_wrong(const char *data) { - uuid_le le; - uuid_be be; + guid_t le; + uuid_t be; /* LE */ total_tests++; - if (!uuid_le_to_bin(data, &le)) + if (!guid_parse(data, &le)) test_uuid_failed("negative", true, false, data, NULL); /* BE */ total_tests++; - if (!uuid_be_to_bin(data, &be)) + if (!uuid_parse(data, &be)) test_uuid_failed("negative", true, true, data, NULL); } diff --git a/lib/uuid.c b/lib/uuid.c index 37687af77ff8..90bee73f7bd7 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -21,10 +21,10 @@ #include #include -const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; -EXPORT_SYMBOL(uuid_le_index); -const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; -EXPORT_SYMBOL(uuid_be_index); +const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; +EXPORT_SYMBOL(guid_index); +const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; +EXPORT_SYMBOL(uuid_index); /*************************************************************** * Random UUID interface @@ -53,21 +53,21 @@ static void __uuid_gen_common(__u8 b[16]) b[8] = (b[8] & 0x3F) | 0x80; } -void uuid_le_gen(uuid_le *lu) +void guid_gen(guid_t *lu) { __uuid_gen_common(lu->b); /* version 4 : random generation */ lu->b[7] = (lu->b[7] & 0x0F) | 0x40; } -EXPORT_SYMBOL_GPL(uuid_le_gen); +EXPORT_SYMBOL_GPL(guid_gen); -void uuid_be_gen(uuid_be *bu) +void uuid_gen(uuid_t *bu) { __uuid_gen_common(bu->b); /* version 4 : random generation */ bu->b[6] = (bu->b[6] & 0x0F) | 0x40; } -EXPORT_SYMBOL_GPL(uuid_be_gen); +EXPORT_SYMBOL_GPL(uuid_gen); /** * uuid_is_valid - checks if UUID string valid @@ -97,7 +97,7 @@ bool uuid_is_valid(const char *uuid) } EXPORT_SYMBOL(uuid_is_valid); -static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16]) +static int __uuid_parse(const char *uuid, __u8 b[16], const u8 ei[16]) { static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34}; unsigned int i; @@ -115,14 +115,14 @@ static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16]) return 0; } -int uuid_le_to_bin(const char *uuid, uuid_le *u) +int guid_parse(const char *uuid, guid_t *u) { - return __uuid_to_bin(uuid, u->b, uuid_le_index); + return __uuid_parse(uuid, u->b, guid_index); } -EXPORT_SYMBOL(uuid_le_to_bin); +EXPORT_SYMBOL(guid_parse); -int uuid_be_to_bin(const char *uuid, uuid_be *u) +int uuid_parse(const char *uuid, uuid_t *u) { - return __uuid_to_bin(uuid, u->b, uuid_be_index); + return __uuid_parse(uuid, u->b, uuid_index); } -EXPORT_SYMBOL(uuid_be_to_bin); +EXPORT_SYMBOL(uuid_parse); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 2d41de3f98a1..9f37d6208e99 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1308,14 +1308,14 @@ char *uuid_string(char *buf, char *end, const u8 *addr, char uuid[UUID_STRING_LEN + 1]; char *p = uuid; int i; - const u8 *index = uuid_be_index; + const u8 *index = uuid_index; bool uc = false; switch (*(++fmt)) { case 'L': uc = true; /* fall-through */ case 'l': - index = uuid_le_index; + index = guid_index; break; case 'B': uc = true; From b10bf0e281040f152da93fede1d43ec57caa7cee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 May 2017 14:22:11 +0200 Subject: [PATCH 214/249] uuid: don't export guid_index and uuid_index These are only used in uuid.c and vsprintf.c and aren't something modules should use directly. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- lib/uuid.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/uuid.c b/lib/uuid.c index 90bee73f7bd7..f7116ed88e01 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -22,9 +22,7 @@ #include const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; -EXPORT_SYMBOL(guid_index); const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; -EXPORT_SYMBOL(uuid_index); /*************************************************************** * Random UUID interface From df33767d9fe0ca93c606cc9042df05e5045c8158 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 May 2017 14:00:57 +0200 Subject: [PATCH 215/249] uuid: hoist helpers uuid_equal() and uuid_copy() from xfs These helper are used to compare and copy two uuid_t type objects. Signed-off-by: Amir Goldstein [hch: also provide the respective guid_ versions] Signed-off-by: Christoph Hellwig Reviewed-by: Andy Shevchenko --- fs/xfs/uuid.c | 6 ------ fs/xfs/uuid.h | 7 ------- include/linux/uuid.h | 20 ++++++++++++++++++++ lib/test_uuid.c | 4 ++-- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c index b83f76b6d410..29ed78c8637b 100644 --- a/fs/xfs/uuid.c +++ b/fs/xfs/uuid.c @@ -55,9 +55,3 @@ uuid_is_nil(uuid_t *uuid) if (*cp++) return 0; /* not nil */ return 1; /* is nil */ } - -int -uuid_equal(uuid_t *uuid1, uuid_t *uuid2) -{ - return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; -} diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h index 4f1441ba4fa5..86bbed071e79 100644 --- a/fs/xfs/uuid.h +++ b/fs/xfs/uuid.h @@ -19,13 +19,6 @@ #define __XFS_SUPPORT_UUID_H__ extern int uuid_is_nil(uuid_t *uuid); -extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); -static inline void -uuid_copy(uuid_t *dst, uuid_t *src) -{ - memcpy(dst, src, sizeof(uuid_t)); -} - #endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/include/linux/uuid.h b/include/linux/uuid.h index c2adb8046095..777f9cb01eb1 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -35,6 +35,26 @@ typedef struct { */ #define UUID_STRING_LEN 36 +static inline bool guid_equal(const guid_t *u1, const guid_t *u2) +{ + return memcmp(u1, u2, sizeof(guid_t)) == 0; +} + +static inline void guid_copy(guid_t *dst, const guid_t *src) +{ + memcpy(dst, src, sizeof(guid_t)); +} + +static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2) +{ + return memcmp(u1, u2, sizeof(uuid_t)) == 0; +} + +static inline void uuid_copy(uuid_t *dst, const uuid_t *src) +{ + memcpy(dst, src, sizeof(uuid_t)); +} + void generate_random_uuid(unsigned char uuid[16]); extern void guid_gen(guid_t *u); diff --git a/lib/test_uuid.c b/lib/test_uuid.c index ff36f3240e90..478c049630b5 100644 --- a/lib/test_uuid.c +++ b/lib/test_uuid.c @@ -71,7 +71,7 @@ static void __init test_uuid_test(const struct test_uuid_data *data) test_uuid_failed("conversion", false, false, data->uuid, NULL); total_tests++; - if (uuid_le_cmp(data->le, le)) { + if (!guid_equal(&data->le, &le)) { sprintf(buf, "%pUl", &le); test_uuid_failed("cmp", false, false, data->uuid, buf); } @@ -82,7 +82,7 @@ static void __init test_uuid_test(const struct test_uuid_data *data) test_uuid_failed("conversion", false, true, data->uuid, NULL); total_tests++; - if (uuid_be_cmp(data->be, be)) { + if (uuid_equal(&data->be, &be)) { sprintf(buf, "%pUb", &be); test_uuid_failed("cmp", false, true, data->uuid, buf); } From ef40dda5bbc310f6517082c0ff002913104358cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 May 2017 09:01:42 +0200 Subject: [PATCH 216/249] uuid: hoist uuid_is_null() helper from libnvdimm Hoist the libnvdimm helper as an inline helper to linux/uuid.h using an auxiliary const variable uuid_null in lib/uuid.c. [hch: also add the guid variant. Both do the same but I'd like to keep casts to a minimum] The common helper uses the new abstract type uuid_t * instead of u8 *. Suggested-by: Christoph Hellwig Signed-off-by: Amir Goldstein [hch: added guid_is_null] Signed-off-by: Christoph Hellwig Acked-by: Dan Williams Reviewed-by: Andy Shevchenko --- drivers/nvdimm/btt_devs.c | 9 +-------- include/linux/uuid.h | 13 +++++++++++++ lib/uuid.c | 5 +++++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index ae00dc0d9791..4c989bb9a8a0 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -222,13 +222,6 @@ struct device *nd_btt_create(struct nd_region *nd_region) return dev; } -static bool uuid_is_null(u8 *uuid) -{ - static const u8 null_uuid[16]; - - return (memcmp(uuid, null_uuid, 16) == 0); -} - /** * nd_btt_arena_is_valid - check if the metadata layout is valid * @nd_btt: device with BTT geometry and backing device info @@ -249,7 +242,7 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0) return false; - if (!uuid_is_null(super->parent_uuid)) + if (!guid_is_null((guid_t *)&super->parent_uuid)) if (memcmp(super->parent_uuid, parent_uuid, 16) != 0) return false; diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 777f9cb01eb1..75f7182d5360 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -35,6 +35,9 @@ typedef struct { */ #define UUID_STRING_LEN 36 +extern const guid_t guid_null; +extern const uuid_t uuid_null; + static inline bool guid_equal(const guid_t *u1, const guid_t *u2) { return memcmp(u1, u2, sizeof(guid_t)) == 0; @@ -45,6 +48,11 @@ static inline void guid_copy(guid_t *dst, const guid_t *src) memcpy(dst, src, sizeof(guid_t)); } +static inline bool guid_is_null(guid_t *guid) +{ + return guid_equal(guid, &guid_null); +} + static inline bool uuid_equal(const uuid_t *u1, const uuid_t *u2) { return memcmp(u1, u2, sizeof(uuid_t)) == 0; @@ -55,6 +63,11 @@ static inline void uuid_copy(uuid_t *dst, const uuid_t *src) memcpy(dst, src, sizeof(uuid_t)); } +static inline bool uuid_is_null(uuid_t *uuid) +{ + return uuid_equal(uuid, &uuid_null); +} + void generate_random_uuid(unsigned char uuid[16]); extern void guid_gen(guid_t *u); diff --git a/lib/uuid.c b/lib/uuid.c index f7116ed88e01..680b9fb9ba09 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -21,6 +21,11 @@ #include #include +const guid_t guid_null; +EXPORT_SYMBOL(guid_null); +const uuid_t uuid_null; +EXPORT_SYMBOL(uuid_null); + const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; From e64e17a554746a7f794f3d480d06e4be530fc090 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 May 2017 14:00:57 +0200 Subject: [PATCH 217/249] S390/sysinfo: use uuid_is_null instead of opencoding it And switch to use uuid_t instead of the old uuid_be type. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- arch/s390/include/asm/sysinfo.h | 4 ++-- arch/s390/kernel/sysinfo.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index e784bed6ed7f..2b498e58b914 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -109,7 +109,7 @@ struct sysinfo_2_2_2 { unsigned short cpus_shared; char reserved_4[3]; unsigned char vsne; - uuid_be uuid; + uuid_t uuid; char reserved_5[160]; char ext_name[256]; }; @@ -134,7 +134,7 @@ struct sysinfo_3_2_2 { char reserved_1[3]; unsigned char evmne; unsigned int reserved_2; - uuid_be uuid; + uuid_t uuid; } vm[8]; char reserved_3[1504]; char ext_names[8][256]; diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index eefcb54872a5..fb869b103825 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -242,7 +242,7 @@ static void print_ext_name(struct seq_file *m, int lvl, static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info) { - if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be))) + if (uuid_is_null(&info->vm[i].uuid)) return; seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid); } From cb0ba6cc22f372a2d42f9f2afc5a35f432cc61e2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 May 2017 09:39:10 +0200 Subject: [PATCH 218/249] xfs: remove uuid_getnodeuniq and xfs_uu_t Opencode uuid_getnodeuniq in the only caller, and directly decode the uuid_t representation instead of using a structure cast for it. Signed-off-by: Christoph Hellwig --- fs/xfs/uuid.c | 25 ------------------------- fs/xfs/uuid.h | 1 - fs/xfs/xfs_mount.c | 5 ++++- 3 files changed, 4 insertions(+), 27 deletions(-) diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c index 29ed78c8637b..737c186ea98b 100644 --- a/fs/xfs/uuid.c +++ b/fs/xfs/uuid.c @@ -17,31 +17,6 @@ */ #include -/* IRIX interpretation of an uuid_t */ -typedef struct { - __be32 uu_timelow; - __be16 uu_timemid; - __be16 uu_timehi; - __be16 uu_clockseq; - __be16 uu_node[3]; -} xfs_uu_t; - -/* - * uuid_getnodeuniq - obtain the node unique fields of a UUID. - * - * This is not in any way a standard or condoned UUID function; - * it just something that's needed for user-level file handles. - */ -void -uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) -{ - xfs_uu_t *uup = (xfs_uu_t *)uuid; - - fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) | - be16_to_cpu(uup->uu_timemid); - fsid[1] = be32_to_cpu(uup->uu_timelow); -} - int uuid_is_nil(uuid_t *uuid) { diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h index 86bbed071e79..5aea49bf0963 100644 --- a/fs/xfs/uuid.h +++ b/fs/xfs/uuid.h @@ -19,6 +19,5 @@ #define __XFS_SUPPORT_UUID_H__ extern int uuid_is_nil(uuid_t *uuid); -extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); #endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2eaf81859166..51f7d03ef86c 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -793,7 +793,10 @@ xfs_mountfs( * Copies the low order bits of the timestamp and the randomly * set "sequence" number out of a UUID. */ - uuid_getnodeuniq(&sbp->sb_uuid, mp->m_fixedfsid); + mp->m_fixedfsid[0] = + (get_unaligned_be16(&sbp->sb_uuid.b[8]) << 16) | + get_unaligned_be16(&sbp->sb_uuid.b[4]); + mp->m_fixedfsid[1] = get_unaligned_be32(&sbp->sb_uuid.b[0]); mp->m_dmevmask = 0; /* not persistent; set after each mount */ From d905fdaaa7d59abde1535075cca6774fd7f0ef0a Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 4 May 2017 16:26:23 +0300 Subject: [PATCH 219/249] xfs: use the common helper uuid_is_null() Use the common helper uuid_is_null() and remove the xfs specific helper uuid_is_nil(). The common helper does not check for the NULL pointer value as xfs helper did, but xfs code never calls the helper with a pointer that can be NULL. Conform comments and warning strings to use the term 'null uuid' instead of 'nil uuid', because this is the terminology used by lib/uuid.c and its users. It is also the terminology used in userspace by libuuid and xfsprogs. Signed-off-by: Amir Goldstein [hch: remove now unused uuid.[ch]] Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Andy Shevchenko --- fs/xfs/Makefile | 3 +-- fs/xfs/uuid.c | 32 -------------------------------- fs/xfs/uuid.h | 23 ----------------------- fs/xfs/xfs_linux.h | 1 - fs/xfs/xfs_log_recover.c | 6 +++--- fs/xfs/xfs_mount.c | 8 ++++---- 6 files changed, 8 insertions(+), 65 deletions(-) delete mode 100644 fs/xfs/uuid.c delete mode 100644 fs/xfs/uuid.h diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 5c90f82b8f6b..a6e955bfead8 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -98,8 +98,7 @@ xfs-y += xfs_aops.o \ xfs_sysfs.o \ xfs_trans.o \ xfs_xattr.o \ - kmem.o \ - uuid.o + kmem.o # low-level transaction/log code xfs-y += xfs_log.o \ diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c deleted file mode 100644 index 737c186ea98b..000000000000 --- a/fs/xfs/uuid.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include - -int -uuid_is_nil(uuid_t *uuid) -{ - int i; - char *cp = (char *)uuid; - - if (uuid == NULL) - return 0; - /* implied check of version number here... */ - for (i = 0; i < sizeof *uuid; i++) - if (*cp++) return 0; /* not nil */ - return 1; /* is nil */ -} diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h deleted file mode 100644 index 5aea49bf0963..000000000000 --- a/fs/xfs/uuid.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_UUID_H__ -#define __XFS_SUPPORT_UUID_H__ - -extern int uuid_is_nil(uuid_t *uuid); - -#endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 2c33d915e550..2d167fe643ec 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -43,7 +43,6 @@ typedef __u32 xfs_nlink_t; #include "kmem.h" #include "mrlock.h" -#include "uuid.h" #include #include diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index cd0b077deb35..8cec1e5505a4 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -352,13 +352,13 @@ xlog_header_check_mount( { ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); - if (uuid_is_nil(&head->h_fs_uuid)) { + if (uuid_is_null(&head->h_fs_uuid)) { /* * IRIX doesn't write the h_fs_uuid or h_fmt fields. If - * h_fs_uuid is nil, we assume this log was last mounted + * h_fs_uuid is null, we assume this log was last mounted * by IRIX and continue. */ - xfs_warn(mp, "nil uuid in log - IRIX style log"); + xfs_warn(mp, "null uuid in log - IRIX style log"); } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { xfs_warn(mp, "log has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 51f7d03ef86c..54452967e35e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -80,14 +80,14 @@ xfs_uuid_mount( if (mp->m_flags & XFS_MOUNT_NOUUID) return 0; - if (uuid_is_nil(uuid)) { - xfs_warn(mp, "Filesystem has nil UUID - can't mount"); + if (uuid_is_null(uuid)) { + xfs_warn(mp, "Filesystem has null UUID - can't mount"); return -EINVAL; } mutex_lock(&xfs_uuid_table_mutex); for (i = 0, hole = -1; i < xfs_uuid_table_size; i++) { - if (uuid_is_nil(&xfs_uuid_table[i])) { + if (uuid_is_null(&xfs_uuid_table[i])) { hole = i; continue; } @@ -124,7 +124,7 @@ xfs_uuid_unmount( mutex_lock(&xfs_uuid_table_mutex); for (i = 0; i < xfs_uuid_table_size; i++) { - if (uuid_is_nil(&xfs_uuid_table[i])) + if (uuid_is_null(&xfs_uuid_table[i])) continue; if (!uuid_equal(uuid, &xfs_uuid_table[i])) continue; From 1dd771eb0b09fe9c12ea58b18c676b32a528be39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 May 2017 15:16:44 +0200 Subject: [PATCH 220/249] block: remove blk_part_pack_uuid This helper was only used by IMA of all things, which would get spurious errors if CONFIG_BLOCK is disabled. Just opencode the call there. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Acked-by: Mimi Zohar Reviewed-by: Andy Shevchenko --- include/linux/genhd.h | 11 ----------- security/integrity/ima/ima_policy.c | 3 +-- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index acff9437e5c3..e619fae2f037 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -219,12 +219,6 @@ static inline struct gendisk *part_to_disk(struct hd_struct *part) return NULL; } -static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to) -{ - uuid_be_to_bin(uuid_str, (uuid_be *)to); - return 0; -} - static inline int disk_max_parts(struct gendisk *disk) { if (disk->flags & GENHD_FL_EXT_DEVT) @@ -736,11 +730,6 @@ static inline dev_t blk_lookup_devt(const char *name, int partno) dev_t devt = MKDEV(0, 0); return devt; } - -static inline int blk_part_pack_uuid(const u8 *uuid_str, u8 *to) -{ - return -EINVAL; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_GENHD_H */ diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 3ab1067db624..1431ada649e5 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -717,8 +717,7 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) break; } - result = blk_part_pack_uuid(args[0].from, - entry->fsuuid); + result = uuid_parse(args[0].from, (uuid_t *)&entry->fsuuid); if (!result) entry->flags |= IMA_FSUUID; break; From 787d8c530af73257240fc0c0f60e296a83d5e5f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jun 2017 07:00:26 +0200 Subject: [PATCH 221/249] ima/policy: switch to use uuid_t Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Acked-by: Mimi Zohar Reviewed-by: Andy Shevchenko --- security/integrity/ima/ima_policy.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 1431ada649e5..9a7c7cbdbe7c 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -61,7 +61,7 @@ struct ima_rule_entry { enum ima_hooks func; int mask; unsigned long fsmagic; - u8 fsuuid[16]; + uuid_t fsuuid; kuid_t uid; kuid_t fowner; bool (*uid_op)(kuid_t, kuid_t); /* Handlers for operators */ @@ -244,7 +244,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode, && rule->fsmagic != inode->i_sb->s_magic) return false; if ((rule->flags & IMA_FSUUID) && - memcmp(rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid))) + memcmp(&rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid))) return false; if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid)) return false; @@ -711,13 +711,12 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry) case Opt_fsuuid: ima_log_string(ab, "fsuuid", args[0].from); - if (memchr_inv(entry->fsuuid, 0x00, - sizeof(entry->fsuuid))) { + if (uuid_is_null(&entry->fsuuid)) { result = -EINVAL; break; } - result = uuid_parse(args[0].from, (uuid_t *)&entry->fsuuid); + result = uuid_parse(args[0].from, &entry->fsuuid); if (!result) entry->flags |= IMA_FSUUID; break; @@ -1086,7 +1085,7 @@ int ima_policy_show(struct seq_file *m, void *v) } if (entry->flags & IMA_FSUUID) { - seq_printf(m, "fsuuid=%pU", entry->fsuuid); + seq_printf(m, "fsuuid=%pU", &entry->fsuuid); seq_puts(m, " "); } From 85787090a21eb749d8b347eaf9ff1a455637473c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 May 2017 15:06:33 +0200 Subject: [PATCH 222/249] fs: switch ->s_uuid to uuid_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For some file systems we still memcpy into it, but in various places this already allows us to use the proper uuid helpers. More to come.. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Acked-by: Mimi Zohar  (Changes to IMA/EVM) Reviewed-by: Andy Shevchenko --- drivers/xen/tmem.c | 6 +++--- fs/ext4/super.c | 2 +- fs/f2fs/super.c | 2 +- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/sys.c | 22 +++++----------------- fs/ocfs2/super.c | 2 +- fs/overlayfs/copy_up.c | 5 ++--- fs/overlayfs/namei.c | 2 +- fs/xfs/xfs_mount.c | 3 +-- include/linux/cleancache.h | 2 +- include/linux/fs.h | 5 +++-- mm/cleancache.c | 2 +- security/integrity/evm/evm_crypto.c | 2 +- security/integrity/ima/ima_policy.c | 2 +- 14 files changed, 23 insertions(+), 36 deletions(-) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 4ac2ca8a7656..bf13d1ec51f3 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -233,12 +233,12 @@ static int tmem_cleancache_init_fs(size_t pagesize) return xen_tmem_new_pool(uuid_private, 0, pagesize); } -static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize) +static int tmem_cleancache_init_shared_fs(uuid_t *uuid, size_t pagesize) { struct tmem_pool_uuid shared_uuid; - shared_uuid.uuid_lo = *(u64 *)uuid; - shared_uuid.uuid_hi = *(u64 *)(&uuid[8]); + shared_uuid.uuid_lo = *(u64 *)&uuid->b[0]; + shared_uuid.uuid_hi = *(u64 *)&uuid->b[8]; return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize); } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 0b177da9ea82..6e3b4186a22f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3952,7 +3952,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->s_qcop = &ext4_qctl_operations; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif - memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); + memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 83355ec4a92c..0b89b0b7b9f7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1937,7 +1937,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); - memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); + memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ed67548b286c..b92135c202c2 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -203,7 +203,7 @@ static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf) memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); - memcpy(s->s_uuid, str->sb_uuid, 16); + memcpy(&s->s_uuid, str->sb_uuid, 16); } /** diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 7a515345610c..e77bc52b468f 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -71,25 +71,14 @@ static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); } -static int gfs2_uuid_valid(const u8 *uuid) -{ - int i; - - for (i = 0; i < 16; i++) { - if (uuid[i]) - return 1; - } - return 0; -} - static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) { struct super_block *s = sdp->sd_vfs; - const u8 *uuid = s->s_uuid; + buf[0] = '\0'; - if (!gfs2_uuid_valid(uuid)) + if (uuid_is_null(&s->s_uuid)) return 0; - return snprintf(buf, PAGE_SIZE, "%pUB\n", uuid); + return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid); } static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) @@ -712,14 +701,13 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, { struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); struct super_block *s = sdp->sd_vfs; - const u8 *uuid = s->s_uuid; add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid); - if (gfs2_uuid_valid(uuid)) - add_uevent_var(env, "UUID=%pUB", uuid); + if (!uuid_is_null(&s->s_uuid)) + add_uevent_var(env, "UUID=%pUB", &s->s_uuid); return 0; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index ca1646fbcaef..83005f486451 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2062,7 +2062,7 @@ static int ocfs2_initialize_super(struct super_block *sb, cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); - memcpy(sb->s_uuid, di->id2.i_super.s_uuid, + memcpy(&sb->s_uuid, di->id2.i_super.s_uuid, sizeof(di->id2.i_super.s_uuid)); osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 7a44533f4bbf..d55fceb4e414 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -284,7 +284,6 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, struct dentry *upper) { struct super_block *sb = lower->d_sb; - uuid_be *uuid = (uuid_be *) &sb->s_uuid; const struct ovl_fh *fh = NULL; int err; @@ -294,8 +293,8 @@ static int ovl_set_origin(struct dentry *dentry, struct dentry *lower, * up and a pure upper inode. */ if (sb->s_export_op && sb->s_export_op->fh_to_dentry && - uuid_be_cmp(*uuid, NULL_UUID_BE)) { - fh = ovl_encode_fh(lower, uuid); + !uuid_is_null(&sb->s_uuid)) { + fh = ovl_encode_fh(lower, &sb->s_uuid); if (IS_ERR(fh)) return PTR_ERR(fh); } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index f3136c31e72a..de0d4f742f36 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -135,7 +135,7 @@ static struct dentry *ovl_get_origin(struct dentry *dentry, * Make sure that the stored uuid matches the uuid of the lower * layer where file handle will be decoded. */ - if (uuid_be_cmp(fh->uuid, *(uuid_be *) &mnt->mnt_sb->s_uuid)) + if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid)) goto out; origin = exportfs_decode_fh(mnt, (struct fid *)fh->fid, diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 54452967e35e..d249546da15e 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -74,8 +74,7 @@ xfs_uuid_mount( int hole, i; /* Publish UUID in struct super_block */ - BUILD_BUG_ON(sizeof(mp->m_super->s_uuid) != sizeof(uuid_t)); - memcpy(&mp->m_super->s_uuid, uuid, sizeof(uuid_t)); + uuid_copy(&mp->m_super->s_uuid, uuid); if (mp->m_flags & XFS_MOUNT_NOUUID) return 0; diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h index fccf7f44139d..bbb3712dd892 100644 --- a/include/linux/cleancache.h +++ b/include/linux/cleancache.h @@ -27,7 +27,7 @@ struct cleancache_filekey { struct cleancache_ops { int (*init_fs)(size_t); - int (*init_shared_fs)(char *uuid, size_t); + int (*init_shared_fs)(uuid_t *uuid, size_t); int (*get_page)(int, struct cleancache_filekey, pgoff_t, struct page *); void (*put_page)(int, struct cleancache_filekey, diff --git a/include/linux/fs.h b/include/linux/fs.h index 803e5a9b2654..3e68cabb8457 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -1328,8 +1329,8 @@ struct super_block { struct sb_writers s_writers; - char s_id[32]; /* Informational name */ - u8 s_uuid[16]; /* UUID */ + char s_id[32]; /* Informational name */ + uuid_t s_uuid; /* UUID */ void *s_fs_info; /* Filesystem private info */ unsigned int s_max_links; diff --git a/mm/cleancache.c b/mm/cleancache.c index ba5d8f3e6d68..f7b9fdc79d97 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -130,7 +130,7 @@ void __cleancache_init_shared_fs(struct super_block *sb) int pool_id = CLEANCACHE_NO_BACKEND_SHARED; if (cleancache_ops) { - pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE); + pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE); if (pool_id < 0) pool_id = CLEANCACHE_NO_POOL; } diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index d7f282d75cc1..1d32cd20009a 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -164,7 +164,7 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, hmac_misc.mode = inode->i_mode; crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc)); if (evm_hmac_attrs & EVM_ATTR_FSUUID) - crypto_shash_update(desc, inode->i_sb->s_uuid, + crypto_shash_update(desc, &inode->i_sb->s_uuid.b[0], sizeof(inode->i_sb->s_uuid)); crypto_shash_final(desc, digest); } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 9a7c7cbdbe7c..6f885fab9d84 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -244,7 +244,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode, && rule->fsmagic != inode->i_sb->s_magic) return false; if ((rule->flags & IMA_FSUUID) && - memcmp(&rule->fsuuid, inode->i_sb->s_uuid, sizeof(rule->fsuuid))) + !uuid_equal(&rule->fsuuid, &inode->i_sb->s_uuid)) return false; if ((rule->flags & IMA_UID) && !rule->uid_op(cred->uid, rule->uid)) return false; From 01633fd254182eaa9372efa5c0688bf286e60d6b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:32:50 +0200 Subject: [PATCH 223/249] overlayfs: use uuid_t instead of uuid_be Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/overlayfs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index d55fceb4e414..33fe6ca929f7 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -233,7 +233,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) return err; } -static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_be *uuid) +static struct ovl_fh *ovl_encode_fh(struct dentry *lower, uuid_t *uuid) { struct ovl_fh *fh; int fh_type, fh_len, dwords; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 0623cebeefff..10863b4105fa 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -56,7 +56,7 @@ struct ovl_fh { u8 len; /* size of this header + size of fid */ u8 flags; /* OVL_FH_FLAG_* */ u8 type; /* fid_type of fid */ - uuid_be uuid; /* uuid of filesystem */ + uuid_t uuid; /* uuid of filesystem */ u8 fid[0]; /* file identifier */ } __packed; From 59b9c6291c4fcacf2e8290d79560aa78edbbbe2e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:38:37 +0200 Subject: [PATCH 224/249] partitions/ldm: switch to use uuid_t And the uuid helpers. Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- block/partitions/ldm.c | 10 +++++----- block/partitions/ldm.h | 6 ++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index edcea70674c9..2a365c756648 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -115,7 +115,7 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph) ldm_error("PRIVHEAD disk size doesn't match real disk size"); return false; } - if (uuid_be_to_bin(data + 0x0030, (uuid_be *)ph->disk_id)) { + if (uuid_parse(data + 0x0030, &ph->disk_id)) { ldm_error("PRIVHEAD contains an invalid GUID."); return false; } @@ -234,7 +234,7 @@ static bool ldm_compare_privheads (const struct privhead *ph1, (ph1->logical_disk_size == ph2->logical_disk_size) && (ph1->config_start == ph2->config_start) && (ph1->config_size == ph2->config_size) && - !memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE)); + uuid_equal(&ph1->disk_id, &ph2->disk_id)); } /** @@ -557,7 +557,7 @@ static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb) list_for_each (item, &ldb->v_disk) { struct vblk *v = list_entry (item, struct vblk, list); - if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE)) + if (uuid_equal(&v->vblk.disk.disk_id, &ldb->ph.disk_id)) return v; } @@ -892,7 +892,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb) disk = &vb->vblk.disk; ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name, sizeof (disk->alt_name)); - if (uuid_be_to_bin(buffer + 0x19 + r_name, (uuid_be *)disk->disk_id)) + if (uuid_parse(buffer + 0x19 + r_name, &disk->disk_id)) return false; return true; @@ -927,7 +927,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) return false; disk = &vb->vblk.disk; - memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE); + uuid_copy(&disk->disk_id, (uuid_t *)(buffer + 0x18 + r_name)); return true; } diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h index 374242c0971a..f4c6055df956 100644 --- a/block/partitions/ldm.h +++ b/block/partitions/ldm.h @@ -112,8 +112,6 @@ struct frag { /* VBLK Fragment handling */ /* In memory LDM database structures. */ -#define GUID_SIZE 16 - struct privhead { /* Offsets and sizes are in sectors. */ u16 ver_major; u16 ver_minor; @@ -121,7 +119,7 @@ struct privhead { /* Offsets and sizes are in sectors. */ u64 logical_disk_size; u64 config_start; u64 config_size; - u8 disk_id[GUID_SIZE]; + uuid_t disk_id; }; struct tocblock { /* We have exactly two bitmaps. */ @@ -154,7 +152,7 @@ struct vblk_dgrp { /* VBLK Disk Group */ }; struct vblk_disk { /* VBLK Disk */ - u8 disk_id[GUID_SIZE]; + uuid_t disk_id; u8 alt_name[128]; }; From 680895d6efe47332d25e49817d2d6781295c1614 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:51:09 +0200 Subject: [PATCH 225/249] sysctl: switch to use uuid_t Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- kernel/sysctl_binary.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index ece4b177052b..939a158eab11 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1119,7 +1119,7 @@ static ssize_t bin_uuid(struct file *file, /* Only supports reads */ if (oldval && oldlen) { char buf[UUID_STRING_LEN + 1]; - uuid_be uuid; + uuid_t uuid; result = kernel_read(file, 0, buf, sizeof(buf) - 1); if (result < 0) @@ -1128,7 +1128,7 @@ static ssize_t bin_uuid(struct file *file, buf[result] = '\0'; result = -EIO; - if (uuid_be_to_bin(buf, &uuid)) + if (uuid_parse(buf, &uuid)) goto out; if (oldlen > 16) From 8e41226324e7c00f2087bfbc9f470d665e92df18 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:54:27 +0200 Subject: [PATCH 226/249] nvme: switch to uuid_t Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- drivers/nvme/host/fabrics.c | 8 ++++---- drivers/nvme/host/fabrics.h | 2 +- drivers/nvme/host/fc.c | 3 +-- drivers/nvme/target/nvmet.h | 1 + include/linux/nvme-fc.h | 3 +-- include/linux/nvme.h | 3 ++- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 990e6fb32a63..c190d7e36900 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -58,7 +58,7 @@ static struct nvmf_host *nvmf_host_add(const char *hostnqn) kref_init(&host->ref); memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE); - uuid_be_gen(&host->id); + uuid_gen(&host->id); list_add_tail(&host->list, &nvmf_hosts); out_unlock: @@ -75,7 +75,7 @@ static struct nvmf_host *nvmf_host_default(void) return NULL; kref_init(&host->ref); - uuid_be_gen(&host->id); + uuid_gen(&host->id); snprintf(host->nqn, NVMF_NQN_SIZE, "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); @@ -395,7 +395,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) if (!data) return -ENOMEM; - memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be)); + uuid_copy(&data->hostid, &ctrl->opts->host->id); data->cntlid = cpu_to_le16(0xffff); strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); @@ -454,7 +454,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) if (!data) return -ENOMEM; - memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be)); + uuid_copy(&data->hostid, &ctrl->opts->host->id); data->cntlid = cpu_to_le16(ctrl->cntlid); strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index f5a9c1fb186f..29be7600689d 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -36,7 +36,7 @@ struct nvmf_host { struct kref ref; struct list_head list; char nqn[NVMF_NQN_SIZE]; - uuid_be id; + uuid_t id; }; /** diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5b14cbefb724..96b983bb44bd 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -878,8 +878,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); /* Linux supports only Dynamic controllers */ assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); - memcpy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id, - min_t(size_t, FCNVME_ASSOC_HOSTID_LEN, sizeof(uuid_be))); + uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index cfc5c7fb0ab7..8ff6e430b30a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index e997c4a49a88..bc711a10be05 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -177,7 +177,6 @@ struct fcnvme_lsdesc_rjt { }; -#define FCNVME_ASSOC_HOSTID_LEN 16 #define FCNVME_ASSOC_HOSTNQN_LEN 256 #define FCNVME_ASSOC_SUBNQN_LEN 256 @@ -191,7 +190,7 @@ struct fcnvme_lsdesc_cr_assoc_cmd { __be16 cntlid; __be16 sqsize; __be32 rsvd52; - u8 hostid[FCNVME_ASSOC_HOSTID_LEN]; + uuid_t hostid; u8 hostnqn[FCNVME_ASSOC_HOSTNQN_LEN]; u8 subnqn[FCNVME_ASSOC_SUBNQN_LEN]; u8 rsvd632[384]; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b625bacf37ef..e400a69fa1d3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -16,6 +16,7 @@ #define _LINUX_NVME_H #include +#include /* NQN names in commands fields specified one size */ #define NVMF_NQN_FIELD_LEN 256 @@ -843,7 +844,7 @@ struct nvmf_connect_command { }; struct nvmf_connect_data { - __u8 hostid[16]; + uuid_t hostid; __le16 cntlid; char resv4[238]; char subsysnqn[NVMF_NQN_FIELD_LEN]; From bf47643389bbc1164c2615dc420ed0010264222e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 17 May 2017 09:55:26 +0200 Subject: [PATCH 227/249] scsi_debug: switch to uuid_t Signed-off-by: Christoph Hellwig Reviewed-by: Amir Goldstein Reviewed-by: Andy Shevchenko --- drivers/scsi/scsi_debug.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 17249c3650fe..35ee09644cfb 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -245,7 +245,7 @@ struct sdebug_dev_info { unsigned int channel; unsigned int target; u64 lun; - uuid_be lu_name; + uuid_t lu_name; struct sdebug_host_info *sdbg_host; unsigned long uas_bm[1]; atomic_t num_in_q; @@ -965,7 +965,7 @@ static const u64 naa3_comp_c = 0x3111111000000000ULL; static int inquiry_vpd_83(unsigned char *arr, int port_group_id, int target_dev_id, int dev_id_num, const char *dev_id_str, int dev_id_str_len, - const uuid_be *lu_name) + const uuid_t *lu_name) { int num, port_a; char b[32]; @@ -3568,7 +3568,7 @@ static void sdebug_q_cmd_wq_complete(struct work_struct *work) } static bool got_shared_uuid; -static uuid_be shared_uuid; +static uuid_t shared_uuid; static struct sdebug_dev_info *sdebug_device_create( struct sdebug_host_info *sdbg_host, gfp_t flags) @@ -3578,12 +3578,12 @@ static struct sdebug_dev_info *sdebug_device_create( devip = kzalloc(sizeof(*devip), flags); if (devip) { if (sdebug_uuid_ctl == 1) - uuid_be_gen(&devip->lu_name); + uuid_gen(&devip->lu_name); else if (sdebug_uuid_ctl == 2) { if (got_shared_uuid) devip->lu_name = shared_uuid; else { - uuid_be_gen(&shared_uuid); + uuid_gen(&shared_uuid); got_shared_uuid = true; devip->lu_name = shared_uuid; } From 2b4db79618ade14a9566455cbbea339f74674789 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 18 May 2017 15:29:33 +0300 Subject: [PATCH 228/249] tmpfs: generate random sb->s_uuid This is used by overlayfs to encode intrasystem unique file handles. Suggested-by: Miklos Szeredi Cc: Hugh Dickins Cc: Andrew Morton Signed-off-by: Amir Goldstein Signed-off-by: Christoph Hellwig --- mm/shmem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/shmem.c b/mm/shmem.c index e67d6ba4e98e..391f2dcca727 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -75,6 +75,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include @@ -3761,6 +3762,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_TMPFS_POSIX_ACL sb->s_flags |= MS_POSIXACL; #endif + uuid_gen(&sb->s_uuid); inode = shmem_get_inode(sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); if (!inode) From fafd3cdfbe246e280c0a18b424dfae73f5485c5d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 May 2017 10:52:58 +0200 Subject: [PATCH 229/249] MAINTAINERS: add uuid entry I'll keep maintaining whatever little changed we need here, with Andy as my designated reviewer. Signed-off-by: Christoph Hellwig --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 7a28acd7f525..714da939a8cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13463,6 +13463,17 @@ W: http://en.wikipedia.org/wiki/Util-linux T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git S: Maintained +UUID HELPERS +M: Christoph Hellwig +R: Andy Shevchenko +L: linux-kernel@vger.kernel.org +T: git git://git.infradead.org/users/hch/uuid.git +F: lib/uuid.c +F: lib/test_uuid.c +F: include/linux/uuid.h +F: include/uapi/linux/uuid.h +S: Maintained + UVESAFB DRIVER M: Michal Januszewski L: linux-fbdev@vger.kernel.org From 41c8bdb3ab10c1fefcac61d081e2fd9aaf8694b8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Jun 2017 19:40:42 +0300 Subject: [PATCH 230/249] acpi, nfit: Switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. Reviewed-by: Dan Williams Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- drivers/acpi/nfit/core.c | 54 ++++++++++++++++++++-------------------- drivers/acpi/nfit/nfit.h | 3 +-- include/linux/acpi.h | 1 + 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 656acb5d7166..d9b39d0e9d6a 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -74,11 +74,11 @@ struct nfit_table_prev { struct list_head flushes; }; -static u8 nfit_uuid[NFIT_UUID_MAX][16]; +static guid_t nfit_uuid[NFIT_UUID_MAX]; -const u8 *to_nfit_uuid(enum nfit_uuids id) +const guid_t *to_nfit_uuid(enum nfit_uuids id) { - return nfit_uuid[id]; + return &nfit_uuid[id]; } EXPORT_SYMBOL(to_nfit_uuid); @@ -222,7 +222,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, u32 offset, fw_status = 0; acpi_handle handle; unsigned int func; - const u8 *uuid; + const guid_t *guid; int rc, i; func = cmd; @@ -245,7 +245,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_mask = nvdimm_cmd_mask(nvdimm); dsm_mask = nfit_mem->dsm_mask; desc = nd_cmd_dimm_desc(cmd); - uuid = to_nfit_uuid(nfit_mem->family); + guid = to_nfit_uuid(nfit_mem->family); handle = adev->handle; } else { struct acpi_device *adev = to_acpi_dev(acpi_desc); @@ -254,7 +254,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, cmd_mask = nd_desc->cmd_mask; dsm_mask = cmd_mask; desc = nd_cmd_bus_desc(cmd); - uuid = to_nfit_uuid(NFIT_DEV_BUS); + guid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; dimm_name = "bus"; } @@ -289,7 +289,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, in_buf.buffer.pointer, min_t(u32, 256, in_buf.buffer.length), true); - out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj); + out_obj = acpi_evaluate_dsm(handle, guid.b, 1, func, &in_obj); if (!out_obj) { dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, cmd_name); @@ -409,7 +409,7 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa) int i; for (i = 0; i < NFIT_UUID_MAX; i++) - if (memcmp(to_nfit_uuid(i), spa->range_guid, 16) == 0) + if (guid_equal(to_nfit_uuid(i), (guid_t *)&spa->range_guid)) return i; return -1; } @@ -1415,7 +1415,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, struct acpi_device *adev, *adev_dimm; struct device *dev = acpi_desc->dev; unsigned long dsm_mask; - const u8 *uuid; + const guid_t *guid; int i; int family = -1; @@ -1444,7 +1444,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, /* * Until standardization materializes we need to consider 4 * different command sets. Note, that checking for function0 (bit0) - * tells us if any commands are reachable through this uuid. + * tells us if any commands are reachable through this GUID. */ for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++) if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) @@ -1474,9 +1474,9 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return 0; } - uuid = to_nfit_uuid(nfit_mem->family); + guid = to_nfit_uuid(nfit_mem->family); for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) - if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i)) + if (acpi_check_dsm(adev_dimm->handle, guid.b, 1, 1ULL << i)) set_bit(i, &nfit_mem->dsm_mask); return 0; @@ -1611,7 +1611,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) { struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS); + const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS); struct acpi_device *adev; int i; @@ -1621,7 +1621,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) return; for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) - if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) + if (acpi_check_dsm(adev->handle, guid.b, 1, 1ULL << i)) set_bit(i, &nd_desc->cmd_mask); } @@ -3051,19 +3051,19 @@ static __init int nfit_init(void) BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80); BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40); - acpi_str_to_uuid(UUID_VOLATILE_MEMORY, nfit_uuid[NFIT_SPA_VOLATILE]); - acpi_str_to_uuid(UUID_PERSISTENT_MEMORY, nfit_uuid[NFIT_SPA_PM]); - acpi_str_to_uuid(UUID_CONTROL_REGION, nfit_uuid[NFIT_SPA_DCR]); - acpi_str_to_uuid(UUID_DATA_REGION, nfit_uuid[NFIT_SPA_BDW]); - acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_VDISK]); - acpi_str_to_uuid(UUID_VOLATILE_VIRTUAL_CD, nfit_uuid[NFIT_SPA_VCD]); - acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_DISK, nfit_uuid[NFIT_SPA_PDISK]); - acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]); - acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); - acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); - acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]); - acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]); - acpi_str_to_uuid(UUID_NFIT_DIMM_N_MSFT, nfit_uuid[NFIT_DEV_DIMM_N_MSFT]); + guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]); + guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]); + guid_parse(UUID_CONTROL_REGION, &nfit_uuid[NFIT_SPA_DCR]); + guid_parse(UUID_DATA_REGION, &nfit_uuid[NFIT_SPA_BDW]); + guid_parse(UUID_VOLATILE_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_VDISK]); + guid_parse(UUID_VOLATILE_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_VCD]); + guid_parse(UUID_PERSISTENT_VIRTUAL_DISK, &nfit_uuid[NFIT_SPA_PDISK]); + guid_parse(UUID_PERSISTENT_VIRTUAL_CD, &nfit_uuid[NFIT_SPA_PCD]); + guid_parse(UUID_NFIT_BUS, &nfit_uuid[NFIT_DEV_BUS]); + guid_parse(UUID_NFIT_DIMM, &nfit_uuid[NFIT_DEV_DIMM]); + guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]); + guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]); + guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]); nfit_wq = create_singlethread_workqueue("nfit"); if (!nfit_wq) diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 58fb7d68e04a..29bdd959517f 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -237,7 +236,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc( return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); } -const u8 *to_nfit_uuid(enum nfit_uuids id); +const guid_t *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); void acpi_nfit_shutdown(void *data); void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 137e4a3d89c5..b0e1636ca5c3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include From 5b53696a30d5f52d35fec21c1da1e52520225efe Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Jun 2017 19:40:43 +0300 Subject: [PATCH 231/249] ACPI / APEI: Switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. Cc: Borislav Petkov Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- drivers/acpi/apei/ghes.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index d0855c09f32f..980515e029fa 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -431,12 +431,13 @@ static void ghes_do_proc(struct ghes *ghes, { int sev, sec_sev; struct acpi_hest_generic_data *gdata; + guid_t *sec_type; sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { + sec_type = (guid_t *)gdata->section_type; sec_sev = ghes_severity(gdata->error_severity); - if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, - CPER_SEC_PLATFORM_MEM)) { + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err; mem_err = (struct cper_sec_mem_err *)(gdata+1); ghes_edac_report_mem_error(ghes, sev, mem_err); @@ -445,8 +446,7 @@ static void ghes_do_proc(struct ghes *ghes, ghes_handle_memory_failure(gdata, sev); } #ifdef CONFIG_ACPI_APEI_PCIEAER - else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type, - CPER_SEC_PCIE)) { + else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { struct cper_sec_pcie *pcie_err; pcie_err = (struct cper_sec_pcie *)(gdata+1); if (sev == GHES_SEV_RECOVERABLE && From dfcaad8faeec3208bb1905ca3c1e4fd4cef46113 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Jun 2017 19:40:44 +0300 Subject: [PATCH 232/249] ACPI / bus: Switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. Acked-by: Rafael J. Wysocki Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- drivers/acpi/bus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 784bda663d16..042cd16265b3 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -225,13 +225,13 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context) struct acpi_object_list input; union acpi_object in_params[4]; union acpi_object *out_obj; - u8 uuid[16]; + guid_t guid; u32 errors; struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; if (!context) return AE_ERROR; - if (ACPI_FAILURE(acpi_str_to_uuid(context->uuid_str, uuid))) + if (guid_parse(context->uuid_str, &guid)) return AE_ERROR; context->ret.length = ACPI_ALLOCATE_BUFFER; context->ret.pointer = NULL; @@ -241,7 +241,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context) input.pointer = in_params; in_params[0].type = ACPI_TYPE_BUFFER; in_params[0].buffer.length = 16; - in_params[0].buffer.pointer = uuid; + in_params[0].buffer.pointer = (u8 *)&guid; in_params[1].type = ACPI_TYPE_INTEGER; in_params[1].integer.value = context->rev; in_params[2].type = ACPI_TYPE_INTEGER; From b7fe92999a98a9aab3c292bd44942f3bdbe04765 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Jun 2017 19:40:45 +0300 Subject: [PATCH 233/249] ACPI / extlog: Switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. Cc: Borislav Petkov Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- drivers/acpi/acpi_extlog.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 502ea4dc2080..193529417cc3 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -141,9 +141,9 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, int cpu = mce->extcpu; struct acpi_hest_generic_status *estatus, *tmp; struct acpi_hest_generic_data *gdata; - const uuid_le *fru_id = &NULL_UUID_LE; + const guid_t *fru_id = &guid_null; char *fru_text = ""; - uuid_le *sec_type; + guid_t *sec_type; static u32 err_seq; estatus = extlog_elog_entry_check(cpu, bank); @@ -165,11 +165,11 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, err_seq++; gdata = (struct acpi_hest_generic_data *)(tmp + 1); if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) - fru_id = (uuid_le *)gdata->fru_id; + fru_id = (guid_t *)gdata->fru_id; if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) fru_text = gdata->fru_text; - sec_type = (uuid_le *)gdata->section_type; - if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { + sec_type = (guid_t *)gdata->section_type; + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem = (void *)(gdata + 1); if (gdata->error_data_length >= sizeof(*mem)) trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, @@ -182,17 +182,17 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, static bool __init extlog_get_l1addr(void) { - u8 uuid[16]; + guid_t guid; acpi_handle handle; union acpi_object *obj; - acpi_str_to_uuid(extlog_dsm_uuid, uuid); - + if (guid_parse(extlog_dsm_uuid, &guid)) + return false; if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return false; - if (!acpi_check_dsm(handle, uuid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR)) + if (!acpi_check_dsm(handle, guid.b, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR)) return false; - obj = acpi_evaluate_dsm_typed(handle, uuid, EXTLOG_DSM_REV, + obj = acpi_evaluate_dsm_typed(handle, guid.b, EXTLOG_DSM_REV, EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER); if (!obj) { return false; From 94116f8126de9762751fd92731581b73b56292e5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Jun 2017 19:40:46 +0300 Subject: [PATCH 234/249] ACPI: Switch to use generic guid_t in acpi_evaluate_dsm() acpi_evaluate_dsm() and friends take a pointer to a raw buffer of 16 bytes. Instead we convert them to use guid_t type. At the same time we convert current users. acpi_str_to_uuid() becomes useless after the conversion and it's safe to get rid of it. Acked-by: Rafael J. Wysocki Cc: Borislav Petkov Acked-by: Dan Williams Cc: Amir Goldstein Reviewed-by: Jarkko Sakkinen Reviewed-by: Jani Nikula Acked-by: Jani Nikula Cc: Ben Skeggs Acked-by: Benjamin Tissoires Acked-by: Joerg Roedel Acked-by: Adrian Hunter Cc: Yisen Zhuang Acked-by: Bjorn Helgaas Acked-by: Felipe Balbi Acked-by: Mathias Nyman Reviewed-by: Heikki Krogerus Acked-by: Mark Brown Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- drivers/acpi/acpi_extlog.c | 4 ++-- drivers/acpi/bus.c | 23 ------------------- drivers/acpi/nfit/core.c | 6 ++--- drivers/acpi/utils.c | 16 ++++++------- drivers/char/tpm/tpm_crb.c | 9 ++++---- drivers/char/tpm/tpm_ppi.c | 20 +++++++--------- drivers/gpu/drm/i915/intel_acpi.c | 14 ++++------- drivers/gpu/drm/nouveau/nouveau_acpi.c | 20 ++++++++-------- .../gpu/drm/nouveau/nvkm/subdev/mxm/base.c | 9 ++++---- drivers/hid/i2c-hid/i2c-hid.c | 9 ++++---- drivers/iommu/dmar.c | 11 ++++----- drivers/mmc/host/sdhci-pci-core.c | 9 ++++---- .../ethernet/hisilicon/hns/hns_dsaf_misc.c | 15 ++++++------ drivers/pci/pci-acpi.c | 13 +++++------ drivers/pci/pci-label.c | 4 ++-- drivers/usb/dwc3/dwc3-pci.c | 10 ++++---- drivers/usb/host/xhci-pci.c | 9 ++++---- drivers/usb/misc/ucsi.c | 6 ++--- drivers/usb/typec/typec_wcove.c | 8 +++---- include/acpi/acpi_bus.h | 11 +++++---- include/linux/acpi.h | 3 +-- include/linux/pci-acpi.h | 2 +- sound/soc/intel/skylake/skl-nhlt.c | 7 +++--- tools/testing/nvdimm/test/iomap.c | 6 ++--- tools/testing/nvdimm/test/nfit.c | 2 +- tools/testing/nvdimm/test/nfit_test.h | 4 +++- 26 files changed, 106 insertions(+), 144 deletions(-) diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 193529417cc3..560fdae8cc59 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -190,9 +190,9 @@ static bool __init extlog_get_l1addr(void) return false; if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle))) return false; - if (!acpi_check_dsm(handle, guid.b, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR)) + if (!acpi_check_dsm(handle, &guid, EXTLOG_DSM_REV, 1 << EXTLOG_FN_ADDR)) return false; - obj = acpi_evaluate_dsm_typed(handle, guid.b, EXTLOG_DSM_REV, + obj = acpi_evaluate_dsm_typed(handle, &guid, EXTLOG_DSM_REV, EXTLOG_FN_ADDR, NULL, ACPI_TYPE_INTEGER); if (!obj) { return false; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 042cd16265b3..5a6fbe0fcaf2 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -196,29 +196,6 @@ static void acpi_print_osc_error(acpi_handle handle, pr_debug("\n"); } -acpi_status acpi_str_to_uuid(char *str, u8 *uuid) -{ - int i; - static int opc_map_to_uuid[16] = {6, 4, 2, 0, 11, 9, 16, 14, 19, 21, - 24, 26, 28, 30, 32, 34}; - - if (strlen(str) != 36) - return AE_BAD_PARAMETER; - for (i = 0; i < 36; i++) { - if (i == 8 || i == 13 || i == 18 || i == 23) { - if (str[i] != '-') - return AE_BAD_PARAMETER; - } else if (!isxdigit(str[i])) - return AE_BAD_PARAMETER; - } - for (i = 0; i < 16; i++) { - uuid[i] = hex_to_bin(str[opc_map_to_uuid[i]]) << 4; - uuid[i] |= hex_to_bin(str[opc_map_to_uuid[i] + 1]); - } - return AE_OK; -} -EXPORT_SYMBOL_GPL(acpi_str_to_uuid); - acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context) { acpi_status status; diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index d9b39d0e9d6a..097eff0b963d 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -289,7 +289,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, in_buf.buffer.pointer, min_t(u32, 256, in_buf.buffer.length), true); - out_obj = acpi_evaluate_dsm(handle, guid.b, 1, func, &in_obj); + out_obj = acpi_evaluate_dsm(handle, guid, 1, func, &in_obj); if (!out_obj) { dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, cmd_name); @@ -1476,7 +1476,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, guid = to_nfit_uuid(nfit_mem->family); for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) - if (acpi_check_dsm(adev_dimm->handle, guid.b, 1, 1ULL << i)) + if (acpi_check_dsm(adev_dimm->handle, guid, 1, 1ULL << i)) set_bit(i, &nfit_mem->dsm_mask); return 0; @@ -1621,7 +1621,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) return; for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) - if (acpi_check_dsm(adev->handle, guid.b, 1, 1ULL << i)) + if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i)) set_bit(i, &nd_desc->cmd_mask); } diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 27d0dcfcf47d..b9d956c916f5 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -613,19 +613,19 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock) /** * acpi_evaluate_dsm - evaluate device's _DSM method * @handle: ACPI device handle - * @uuid: UUID of requested functions, should be 16 bytes + * @guid: GUID of requested functions, should be 16 bytes * @rev: revision number of requested function * @func: requested function number * @argv4: the function specific parameter * - * Evaluate device's _DSM method with specified UUID, revision id and + * Evaluate device's _DSM method with specified GUID, revision id and * function number. Caller needs to free the returned object. * * Though ACPI defines the fourth parameter for _DSM should be a package, * some old BIOSes do expect a buffer or an integer etc. */ union acpi_object * -acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func, +acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4) { acpi_status ret; @@ -638,7 +638,7 @@ acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func, params[0].type = ACPI_TYPE_BUFFER; params[0].buffer.length = 16; - params[0].buffer.pointer = (char *)uuid; + params[0].buffer.pointer = (u8 *)guid; params[1].type = ACPI_TYPE_INTEGER; params[1].integer.value = rev; params[2].type = ACPI_TYPE_INTEGER; @@ -666,7 +666,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm); /** * acpi_check_dsm - check if _DSM method supports requested functions. * @handle: ACPI device handle - * @uuid: UUID of requested functions, should be 16 bytes at least + * @guid: GUID of requested functions, should be 16 bytes at least * @rev: revision number of requested functions * @funcs: bitmap of requested functions * @@ -674,7 +674,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm); * functions. Currently only support 64 functions at maximum, should be * enough for now. */ -bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs) +bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs) { int i; u64 mask = 0; @@ -683,7 +683,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs) if (funcs == 0) return false; - obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL); + obj = acpi_evaluate_dsm(handle, guid, rev, 0, NULL); if (!obj) return false; @@ -697,7 +697,7 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs) /* * Bit 0 indicates whether there's support for any functions other than - * function 0 for the specified UUID and revision. + * function 0 for the specified GUID and revision. */ if ((mask & 0x1) && (mask & funcs) == funcs) return true; diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index b917b9d5f710..c378c7b15d49 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -27,10 +27,9 @@ #define ACPI_SIG_TPM2 "TPM2" -static const u8 CRB_ACPI_START_UUID[] = { - /* 0000 */ 0xAB, 0x6C, 0xBF, 0x6B, 0x63, 0x54, 0x14, 0x47, - /* 0008 */ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4 -}; +static const guid_t crb_acpi_start_guid = + GUID_INIT(0x6BBF6CAB, 0x5463, 0x4714, + 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4); enum crb_defaults { CRB_ACPI_START_REVISION_ID = 1, @@ -266,7 +265,7 @@ static int crb_do_acpi_start(struct tpm_chip *chip) int rc; obj = acpi_evaluate_dsm(chip->acpi_dev_handle, - CRB_ACPI_START_UUID, + &crb_acpi_start_guid, CRB_ACPI_START_REVISION_ID, CRB_ACPI_START_INDEX, NULL); diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c index 692a2c6ae036..86dd8521feef 100644 --- a/drivers/char/tpm/tpm_ppi.c +++ b/drivers/char/tpm/tpm_ppi.c @@ -32,20 +32,16 @@ #define PPI_VS_REQ_START 128 #define PPI_VS_REQ_END 255 -static const u8 tpm_ppi_uuid[] = { - 0xA6, 0xFA, 0xDD, 0x3D, - 0x1B, 0x36, - 0xB4, 0x4E, - 0xA4, 0x24, - 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53 -}; +static const guid_t tpm_ppi_guid = + GUID_INIT(0x3DDDFAA6, 0x361B, 0x4EB4, + 0xA4, 0x24, 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53); static inline union acpi_object * tpm_eval_dsm(acpi_handle ppi_handle, int func, acpi_object_type type, union acpi_object *argv4) { BUG_ON(!ppi_handle); - return acpi_evaluate_dsm_typed(ppi_handle, tpm_ppi_uuid, + return acpi_evaluate_dsm_typed(ppi_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID, func, argv4, type); } @@ -107,7 +103,7 @@ static ssize_t tpm_store_ppi_request(struct device *dev, * is updated with function index from SUBREQ to SUBREQ2 since PPI * version 1.1 */ - if (acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid, + if (acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_SUBREQ2)) func = TPM_PPI_FN_SUBREQ2; @@ -268,7 +264,7 @@ static ssize_t show_ppi_operations(acpi_handle dev_handle, char *buf, u32 start, "User not required", }; - if (!acpi_check_dsm(dev_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID, + if (!acpi_check_dsm(dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_GETOPR)) return -EPERM; @@ -341,12 +337,12 @@ void tpm_add_ppi(struct tpm_chip *chip) if (!chip->acpi_dev_handle) return; - if (!acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid, + if (!acpi_check_dsm(chip->acpi_dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_VERSION)) return; /* Cache PPI version string. */ - obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, tpm_ppi_uuid, + obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, &tpm_ppi_guid, TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION, NULL, ACPI_TYPE_STRING); if (obj) { diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index eb638a1e69d2..42fb436f6cdc 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -15,13 +15,9 @@ static struct intel_dsm_priv { acpi_handle dhandle; } intel_dsm_priv; -static const u8 intel_dsm_guid[] = { - 0xd3, 0x73, 0xd8, 0x7e, - 0xd0, 0xc2, - 0x4f, 0x4e, - 0xa8, 0x54, - 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c -}; +static const guid_t intel_dsm_guid = + GUID_INIT(0x7ed873d3, 0xc2d0, 0x4e4f, + 0xa8, 0x54, 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c); static char *intel_dsm_port_name(u8 id) { @@ -80,7 +76,7 @@ static void intel_dsm_platform_mux_info(void) int i; union acpi_object *pkg, *connector_count; - pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, intel_dsm_guid, + pkg = acpi_evaluate_dsm_typed(intel_dsm_priv.dhandle, &intel_dsm_guid, INTEL_DSM_REVISION_ID, INTEL_DSM_FN_PLATFORM_MUX_INFO, NULL, ACPI_TYPE_PACKAGE); if (!pkg) { @@ -118,7 +114,7 @@ static bool intel_dsm_pci_probe(struct pci_dev *pdev) if (!dhandle) return false; - if (!acpi_check_dsm(dhandle, intel_dsm_guid, INTEL_DSM_REVISION_ID, + if (!acpi_check_dsm(dhandle, &intel_dsm_guid, INTEL_DSM_REVISION_ID, 1 << INTEL_DSM_FN_PLATFORM_MUX_INFO)) { DRM_DEBUG_KMS("no _DSM method for intel device\n"); return false; diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index 39468c218027..7459ef9943ec 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -60,15 +60,13 @@ bool nouveau_is_v1_dsm(void) { } #ifdef CONFIG_VGA_SWITCHEROO -static const char nouveau_dsm_muid[] = { - 0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D, - 0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4, -}; +static const guid_t nouveau_dsm_muid = + GUID_INIT(0x9D95A0A0, 0x0060, 0x4D48, + 0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4); -static const char nouveau_op_dsm_muid[] = { - 0xF8, 0xD8, 0x86, 0xA4, 0xDA, 0x0B, 0x1B, 0x47, - 0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0, -}; +static const guid_t nouveau_op_dsm_muid = + GUID_INIT(0xA486D8F8, 0x0BDA, 0x471B, + 0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0); static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *result) { @@ -86,7 +84,7 @@ static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t * args_buff[i] = (arg >> i * 8) & 0xFF; *result = 0; - obj = acpi_evaluate_dsm_typed(handle, nouveau_op_dsm_muid, 0x00000100, + obj = acpi_evaluate_dsm_typed(handle, &nouveau_op_dsm_muid, 0x00000100, func, &argv4, ACPI_TYPE_BUFFER); if (!obj) { acpi_handle_info(handle, "failed to evaluate _DSM\n"); @@ -138,7 +136,7 @@ static int nouveau_dsm(acpi_handle handle, int func, int arg) .integer.value = arg, }; - obj = acpi_evaluate_dsm_typed(handle, nouveau_dsm_muid, 0x00000102, + obj = acpi_evaluate_dsm_typed(handle, &nouveau_dsm_muid, 0x00000102, func, &argv4, ACPI_TYPE_INTEGER); if (!obj) { acpi_handle_info(handle, "failed to evaluate _DSM\n"); @@ -259,7 +257,7 @@ static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out if (!acpi_has_method(dhandle, "_DSM")) return; - supports_mux = acpi_check_dsm(dhandle, nouveau_dsm_muid, 0x00000102, + supports_mux = acpi_check_dsm(dhandle, &nouveau_dsm_muid, 0x00000102, 1 << NOUVEAU_DSM_POWER); optimus_funcs = nouveau_dsm_get_optimus_functions(dhandle); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c index e3e2f5e83815..f44682d62f75 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c @@ -81,10 +81,9 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version) { struct nvkm_subdev *subdev = &mxm->subdev; struct nvkm_device *device = subdev->device; - static char muid[] = { - 0x00, 0xA4, 0x04, 0x40, 0x7D, 0x91, 0xF2, 0x4C, - 0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65 - }; + static guid_t muid = + GUID_INIT(0x4004A400, 0x917D, 0x4CF2, + 0xB8, 0x9C, 0x79, 0xB6, 0x2F, 0xD5, 0x56, 0x65); u32 mxms_args[] = { 0x00000000 }; union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, @@ -105,7 +104,7 @@ mxm_shadow_dsm(struct nvkm_mxm *mxm, u8 version) * unless you pass in exactly the version it supports.. */ rev = (version & 0xf0) << 4 | (version & 0x0f); - obj = acpi_evaluate_dsm(handle, muid, rev, 0x00000010, &argv4); + obj = acpi_evaluate_dsm(handle, &muid, rev, 0x00000010, &argv4); if (!obj) { nvkm_debug(subdev, "DSM MXMS failed\n"); return false; diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index fb55fb4c39fc..04015032a35a 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -872,10 +872,9 @@ static int i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid) static int i2c_hid_acpi_pdata(struct i2c_client *client, struct i2c_hid_platform_data *pdata) { - static u8 i2c_hid_guid[] = { - 0xF7, 0xF6, 0xDF, 0x3C, 0x67, 0x42, 0x55, 0x45, - 0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE, - }; + static guid_t i2c_hid_guid = + GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555, + 0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE); union acpi_object *obj; struct acpi_device *adev; acpi_handle handle; @@ -884,7 +883,7 @@ static int i2c_hid_acpi_pdata(struct i2c_client *client, if (!handle || acpi_bus_get_device(handle, &adev)) return -ENODEV; - obj = acpi_evaluate_dsm_typed(handle, i2c_hid_guid, 1, 1, NULL, + obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL, ACPI_TYPE_INTEGER); if (!obj) { dev_err(&client->dev, "device _DSM execution failed\n"); diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index cbf7763d8091..c8b0329c85d2 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1808,10 +1808,9 @@ IOMMU_INIT_POST(detect_intel_iommu); * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8 * "Remapping Hardware Unit Hot Plug". */ -static u8 dmar_hp_uuid[] = { - /* 0000 */ 0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C, - /* 0008 */ 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF -}; +static guid_t dmar_hp_guid = + GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B, + 0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF); /* * Currently there's only one revision and BIOS will not check the revision id, @@ -1824,7 +1823,7 @@ static u8 dmar_hp_uuid[] = { static inline bool dmar_detect_dsm(acpi_handle handle, int func) { - return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func); + return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func); } static int dmar_walk_dsm_resource(acpi_handle handle, int func, @@ -1843,7 +1842,7 @@ static int dmar_walk_dsm_resource(acpi_handle handle, int func, if (!dmar_detect_dsm(handle, func)) return 0; - obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, + obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, func, NULL, ACPI_TYPE_BUFFER); if (!obj) return -ENODEV; diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 92fc3f7c538d..9577beb278e7 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -404,10 +404,9 @@ struct intel_host { bool d3_retune; }; -const u8 intel_dsm_uuid[] = { - 0xA5, 0x3E, 0xC1, 0xF6, 0xCD, 0x65, 0x1F, 0x46, - 0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61, -}; +const guid_t intel_dsm_guid = + GUID_INIT(0xF6C13EA5, 0x65CD, 0x461F, + 0xAB, 0x7A, 0x29, 0xF7, 0xE8, 0xD5, 0xBD, 0x61); static int __intel_dsm(struct intel_host *intel_host, struct device *dev, unsigned int fn, u32 *result) @@ -416,7 +415,7 @@ static int __intel_dsm(struct intel_host *intel_host, struct device *dev, int err = 0; size_t len; - obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), intel_dsm_uuid, 0, fn, NULL); + obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &intel_dsm_guid, 0, fn, NULL); if (!obj) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index e13aa064a8e9..6b15a507999c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -29,10 +29,9 @@ enum _dsm_rst_type { HNS_ROCE_RESET_FUNC = 0x7, }; -const u8 hns_dsaf_acpi_dsm_uuid[] = { - 0x1A, 0xAA, 0x85, 0x1A, 0x93, 0xE2, 0x5E, 0x41, - 0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A -}; +const guid_t hns_dsaf_acpi_dsm_guid = + GUID_INIT(0x1A85AA1A, 0xE293, 0x415E, + 0x8E, 0x28, 0x8D, 0x69, 0x0A, 0x0F, 0x82, 0x0A); static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val) { @@ -151,7 +150,7 @@ static void hns_dsaf_acpi_srst_by_port(struct dsaf_device *dsaf_dev, u8 op_type, argv4.package.elements = obj_args; obj = acpi_evaluate_dsm(ACPI_HANDLE(dsaf_dev->dev), - hns_dsaf_acpi_dsm_uuid, 0, op_type, &argv4); + &hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4); if (!obj) { dev_warn(dsaf_dev->dev, "reset port_type%d port%d fail!", port_type, port); @@ -434,7 +433,7 @@ static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb) argv4.package.elements = &obj_args, obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev), - hns_dsaf_acpi_dsm_uuid, 0, + &hns_dsaf_acpi_dsm_guid, 0, HNS_OP_GET_PORT_TYPE_FUNC, &argv4); if (!obj || obj->type != ACPI_TYPE_INTEGER) @@ -474,7 +473,7 @@ int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt) argv4.package.elements = &obj_args, obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev), - hns_dsaf_acpi_dsm_uuid, 0, + &hns_dsaf_acpi_dsm_guid, 0, HNS_OP_GET_SFP_STAT_FUNC, &argv4); if (!obj || obj->type != ACPI_TYPE_INTEGER) @@ -565,7 +564,7 @@ hns_mac_config_sds_loopback_acpi(struct hns_mac_cb *mac_cb, bool en) argv4.package.elements = obj_args; obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dsaf_dev->dev), - hns_dsaf_acpi_dsm_uuid, 0, + &hns_dsaf_acpi_dsm_guid, 0, HNS_OP_SERDES_LP_FUNC, &argv4); if (!obj) { dev_warn(mac_cb->dsaf_dev->dev, "set port%d serdes lp fail!", diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 001860361434..47070cff508c 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -21,13 +21,12 @@ #include "pci.h" /* - * The UUID is defined in the PCI Firmware Specification available here: + * The GUID is defined in the PCI Firmware Specification available here: * https://www.pcisig.com/members/downloads/pcifw_r3_1_13Dec10.pdf */ -const u8 pci_acpi_dsm_uuid[] = { - 0xd0, 0x37, 0xc9, 0xe5, 0x53, 0x35, 0x7a, 0x4d, - 0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d -}; +const guid_t pci_acpi_dsm_guid = + GUID_INIT(0xe5c937d0, 0x3553, 0x4d7a, + 0x91, 0x17, 0xea, 0x4d, 0x19, 0xc3, 0x43, 0x4d); #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64) static int acpi_get_rc_addr(struct acpi_device *adev, struct resource *res) @@ -680,7 +679,7 @@ void acpi_pci_add_bus(struct pci_bus *bus) if (!pci_is_root_bus(bus)) return; - obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), pci_acpi_dsm_uuid, 3, + obj = acpi_evaluate_dsm(ACPI_HANDLE(bus->bridge), &pci_acpi_dsm_guid, 3, RESET_DELAY_DSM, NULL); if (!obj) return; @@ -745,7 +744,7 @@ static void pci_acpi_optimize_delay(struct pci_dev *pdev, if (bridge->ignore_reset_delay) pdev->d3cold_delay = 0; - obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 3, + obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 3, FUNCTION_DELAY_DSM, NULL); if (!obj) return; diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c index 51357377efbc..2d8db3ead6e8 100644 --- a/drivers/pci/pci-label.c +++ b/drivers/pci/pci-label.c @@ -172,7 +172,7 @@ static int dsm_get_label(struct device *dev, char *buf, if (!handle) return -1; - obj = acpi_evaluate_dsm(handle, pci_acpi_dsm_uuid, 0x2, + obj = acpi_evaluate_dsm(handle, &pci_acpi_dsm_guid, 0x2, DEVICE_LABEL_DSM, NULL); if (!obj) return -1; @@ -212,7 +212,7 @@ static bool device_has_dsm(struct device *dev) if (!handle) return false; - return !!acpi_check_dsm(handle, pci_acpi_dsm_uuid, 0x2, + return !!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2, 1 << DEVICE_LABEL_DSM); } diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 84a2cebfc712..fe851544d7fb 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -42,7 +42,7 @@ #define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e -#define PCI_INTEL_BXT_DSM_UUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" +#define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" #define PCI_INTEL_BXT_FUNC_PMU_PWR 4 #define PCI_INTEL_BXT_STATE_D0 0 #define PCI_INTEL_BXT_STATE_D3 3 @@ -51,14 +51,14 @@ * struct dwc3_pci - Driver private structure * @dwc3: child dwc3 platform_device * @pci: our link to PCI bus - * @uuid: _DSM UUID + * @guid: _DSM GUID * @has_dsm_for_pm: true for devices which need to run _DSM on runtime PM */ struct dwc3_pci { struct platform_device *dwc3; struct pci_dev *pci; - u8 uuid[16]; + guid_t guid; unsigned int has_dsm_for_pm:1; }; @@ -120,7 +120,7 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc) if (pdev->device == PCI_DEVICE_ID_INTEL_BXT || pdev->device == PCI_DEVICE_ID_INTEL_BXT_M) { - acpi_str_to_uuid(PCI_INTEL_BXT_DSM_UUID, dwc->uuid); + guid_parse(PCI_INTEL_BXT_DSM_GUID, &dwc->guid); dwc->has_dsm_for_pm = true; } @@ -292,7 +292,7 @@ static int dwc3_pci_dsm(struct dwc3_pci *dwc, int param) tmp.type = ACPI_TYPE_INTEGER; tmp.integer.value = param; - obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), dwc->uuid, + obj = acpi_evaluate_dsm(ACPI_HANDLE(&dwc->pci->dev), &dwc->guid, 1, PCI_INTEL_BXT_FUNC_PMU_PWR, &argv4); if (!obj) { dev_err(&dwc->pci->dev, "failed to evaluate _DSM\n"); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index fcf1f3f63e7a..4842be5687a7 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -213,13 +213,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) #ifdef CONFIG_ACPI static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { - static const u8 intel_dsm_uuid[] = { - 0xb7, 0x0c, 0x34, 0xac, 0x01, 0xe9, 0xbf, 0x45, - 0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23, - }; + static const guid_t intel_dsm_guid = + GUID_INIT(0xac340cb7, 0xe901, 0x45bf, + 0xb7, 0xe6, 0x2b, 0x34, 0xec, 0x93, 0x1e, 0x23); union acpi_object *obj; - obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), intel_dsm_uuid, 3, 1, + obj = acpi_evaluate_dsm(ACPI_HANDLE(&dev->dev), &intel_dsm_guid, 3, 1, NULL); ACPI_FREE(obj); } diff --git a/drivers/usb/misc/ucsi.c b/drivers/usb/misc/ucsi.c index 07397bddefa3..81251aaa20f9 100644 --- a/drivers/usb/misc/ucsi.c +++ b/drivers/usb/misc/ucsi.c @@ -55,13 +55,13 @@ struct ucsi { static int ucsi_acpi_cmd(struct ucsi *ucsi, struct ucsi_control *ctrl) { - uuid_le uuid = UUID_LE(0x6f8398c2, 0x7ca4, 0x11e4, - 0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f); + guid_t guid = GUID_INIT(0x6f8398c2, 0x7ca4, 0x11e4, + 0xad, 0x36, 0x63, 0x10, 0x42, 0xb5, 0x00, 0x8f); union acpi_object *obj; ucsi->data->ctrl.raw_cmd = ctrl->raw_cmd; - obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), uuid.b, 1, 1, NULL); + obj = acpi_evaluate_dsm(ACPI_HANDLE(ucsi->dev), &guid, 1, 1, NULL); if (!obj) { dev_err(ucsi->dev, "%s: failed to evaluate _DSM\n", __func__); return -EIO; diff --git a/drivers/usb/typec/typec_wcove.c b/drivers/usb/typec/typec_wcove.c index d5a7b21fa3f1..c2ce25289027 100644 --- a/drivers/usb/typec/typec_wcove.c +++ b/drivers/usb/typec/typec_wcove.c @@ -105,8 +105,8 @@ enum wcove_typec_role { WCOVE_ROLE_DEVICE, }; -static uuid_le uuid = UUID_LE(0x482383f0, 0x2876, 0x4e49, - 0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37); +static guid_t guid = GUID_INIT(0x482383f0, 0x2876, 0x4e49, + 0x86, 0x85, 0xdb, 0x66, 0x21, 0x1a, 0xf0, 0x37); static int wcove_typec_func(struct wcove_typec *wcove, enum wcove_typec_func func, int param) @@ -118,7 +118,7 @@ static int wcove_typec_func(struct wcove_typec *wcove, tmp.type = ACPI_TYPE_INTEGER; tmp.integer.value = param; - obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), uuid.b, 1, func, + obj = acpi_evaluate_dsm(ACPI_HANDLE(wcove->dev), &guid, 1, func, &argv4); if (!obj) { dev_err(wcove->dev, "%s: failed to evaluate _DSM\n", __func__); @@ -314,7 +314,7 @@ static int wcove_typec_probe(struct platform_device *pdev) if (ret) return ret; - if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), uuid.b, 0, 0x1f)) { + if (!acpi_check_dsm(ACPI_HANDLE(&pdev->dev), &guid, 0, 0x1f)) { dev_err(&pdev->dev, "Missing _DSM functions\n"); return -ENODEV; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 197f3fffc9a7..ea7df16e71a7 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -61,17 +61,18 @@ bool acpi_ata_match(acpi_handle handle); bool acpi_bay_match(acpi_handle handle); bool acpi_dock_match(acpi_handle handle); -bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs); -union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, +bool acpi_check_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 funcs); +union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4); static inline union acpi_object * -acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func, - union acpi_object *argv4, acpi_object_type type) +acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev, + u64 func, union acpi_object *argv4, + acpi_object_type type) { union acpi_object *obj; - obj = acpi_evaluate_dsm(handle, uuid, rev, func, argv4); + obj = acpi_evaluate_dsm(handle, guid, rev, func, argv4); if (obj && obj->type != type) { ACPI_FREE(obj); obj = NULL; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b0e1636ca5c3..ab19365c905f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -458,7 +458,6 @@ struct acpi_osc_context { struct acpi_buffer ret; /* free by caller if success */ }; -acpi_status acpi_str_to_uuid(char *str, u8 *uuid); acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ @@ -742,7 +741,7 @@ static inline bool acpi_driver_match_device(struct device *dev, } static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, - const u8 *uuid, + const guid_t *guid, int rev, int func, union acpi_object *argv4) { diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 7a4e83a8c89c..dd86c97f2454 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -105,7 +105,7 @@ static inline void acpiphp_remove_slots(struct pci_bus *bus) { } static inline void acpiphp_check_host_bridge(struct acpi_device *adev) { } #endif -extern const u8 pci_acpi_dsm_uuid[]; +extern const guid_t pci_acpi_dsm_guid; #define DEVICE_LABEL_DSM 0x07 #define RESET_DELAY_DSM 0x08 #define FUNCTION_DELAY_DSM 0x09 diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c index e3f06672fd6d..e7d766d56c8e 100644 --- a/sound/soc/intel/skylake/skl-nhlt.c +++ b/sound/soc/intel/skylake/skl-nhlt.c @@ -21,8 +21,9 @@ #include "skl.h" /* Unique identification for getting NHLT blobs */ -static u8 OSC_UUID[16] = {0x6E, 0x88, 0x9F, 0xA6, 0xEB, 0x6C, 0x94, 0x45, - 0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53}; +static guid_t osc_guid = + GUID_INIT(0xA69F886E, 0x6CEB, 0x4594, + 0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53); struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) { @@ -37,7 +38,7 @@ struct nhlt_acpi_table *skl_nhlt_init(struct device *dev) return NULL; } - obj = acpi_evaluate_dsm(handle, OSC_UUID, 1, 1, NULL); + obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL); if (obj && obj->type == ACPI_TYPE_BUFFER) { nhlt_ptr = (struct nhlt_resource_desc *)obj->buffer.pointer; nhlt_table = (struct nhlt_acpi_table *) diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 64cae1a5deff..e1f75a1914a1 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -370,7 +370,7 @@ acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path, } EXPORT_SYMBOL(__wrap_acpi_evaluate_object); -union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, +union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4) { union acpi_object *obj = ERR_PTR(-ENXIO); @@ -379,11 +379,11 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, rcu_read_lock(); ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list); if (ops) - obj = ops->evaluate_dsm(handle, uuid, rev, func, argv4); + obj = ops->evaluate_dsm(handle, guid, rev, func, argv4); rcu_read_unlock(); if (IS_ERR(obj)) - return acpi_evaluate_dsm(handle, uuid, rev, func, argv4); + return acpi_evaluate_dsm(handle, guid, rev, func, argv4); return obj; } EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index c2187178fb13..28859da78edf 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1559,7 +1559,7 @@ static unsigned long nfit_ctl_handle; union acpi_object *result; static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle, - const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4) + const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4) { if (handle != &nfit_ctl_handle) return ERR_PTR(-ENXIO); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index f54c0032c6ff..d3d63dd5ed38 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -13,6 +13,7 @@ #ifndef __NFIT_TEST_H__ #define __NFIT_TEST_H__ #include +#include #include #include @@ -36,7 +37,8 @@ typedef void *acpi_handle; typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle, - const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4); + const guid_t *guid, u64 rev, u64 func, + union acpi_object *argv4); void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size); void __wrap_iounmap(volatile void __iomem *addr); From bcbc2265f269cc57924371e3bce8c3220d0270c5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jun 2017 09:02:20 +0200 Subject: [PATCH 235/249] acpi: always include uuid.h Without this the build will fail for !CONFIG_ACPI builds on x86. Fixes: 94116f81 ("ACPI: Switch to use generic guid_t in acpi_evaluate_dsm()") Signed-off-by: Christoph Hellwig --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ab19365c905f..cafdfb84ca28 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -26,6 +26,7 @@ #include #include #include +#include #ifndef _LINUX #define _LINUX @@ -39,7 +40,6 @@ #include #include #include -#include #include #include From f4c19ac9c24c5c5dcab4e961e4d7f8f5709c650e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 9 Jun 2017 11:33:06 +0300 Subject: [PATCH 236/249] thermal: int340x_thermal: Switch to use new generic UUID API There are new types and helpers that are supposed to be used in new code. As a preparation to get rid of legacy types and API functions do the conversion here. The conversion fixes a potential bug in int340x_thermal as well since we have to use memcmp() on binary data. Acked-by: Zhang Rui Signed-off-by: Andy Shevchenko Signed-off-by: Christoph Hellwig --- drivers/thermal/int340x_thermal/int3400_thermal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c index 9413c4abf0b9..fb7284153878 100644 --- a/drivers/thermal/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/int340x_thermal/int3400_thermal.c @@ -23,7 +23,7 @@ enum int3400_thermal_uuid { INT3400_THERMAL_MAXIMUM_UUID, }; -static u8 *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { +static const char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { "42A441D6-AE6A-462b-A84B-4A8CE79027D3", "3A95C389-E4B8-4629-A526-C52C88626BAE", "97C68AE7-15FA-499c-B8C9-5DA81D606E0A", @@ -141,10 +141,10 @@ static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv) } for (j = 0; j < INT3400_THERMAL_MAXIMUM_UUID; j++) { - u8 uuid[16]; + guid_t guid; - acpi_str_to_uuid(int3400_thermal_uuids[j], uuid); - if (!strncmp(uuid, objb->buffer.pointer, 16)) { + guid_parse(int3400_thermal_uuids[j], &guid); + if (guid_equal(objb->buffer.pointer, &guid)) { priv->uuid_bitmap |= (1 << j); break; } From 87085ff2e90ecfa91f8bb0cb0ce19ea661bd6f83 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Jun 2017 16:36:50 +0200 Subject: [PATCH 237/249] thermal: int340x_thermal: fix compile after the UUID API switch Fix the compile after the switch to the UUID API in commit f4c19ac9 ("thermal: int340x_thermal: Switch to use new generic UUID API"). Signed-off-by: Christoph Hellwig --- drivers/thermal/int340x_thermal/int3400_thermal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c index fb7284153878..a9ec94ed7a42 100644 --- a/drivers/thermal/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/int340x_thermal/int3400_thermal.c @@ -23,7 +23,7 @@ enum int3400_thermal_uuid { INT3400_THERMAL_MAXIMUM_UUID, }; -static const char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { +static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { "42A441D6-AE6A-462b-A84B-4A8CE79027D3", "3A95C389-E4B8-4629-A526-C52C88626BAE", "97C68AE7-15FA-499c-B8C9-5DA81D606E0A", @@ -144,7 +144,7 @@ static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv) guid_t guid; guid_parse(int3400_thermal_uuids[j], &guid); - if (guid_equal(objb->buffer.pointer, &guid)) { + if (guid_equal((guid_t *)objb->buffer.pointer, &guid)) { priv->uuid_bitmap |= (1 << j); break; } From 42aa560446622da6c33dce54fc8f4cd81516e906 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:04 -0600 Subject: [PATCH 238/249] acpi: apei: read ack upon ghes record consumption A RAS (Reliability, Availability, Serviceability) controller may be a separate processor running in parallel with OS execution, and may generate error records for consumption by the OS. If the RAS controller produces multiple error records, then they may be overwritten before the OS has consumed them. The Generic Hardware Error Source (GHES) v2 structure introduces the capability for the OS to acknowledge the consumption of the error record generated by the RAS controller. A RAS controller supporting GHESv2 shall wait for the acknowledgment before writing a new error record, thus eliminating the race condition. Add support for parsing of GHESv2 sub-tables as well. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 59 ++++++++++++++++++++++++++++++++++++++-- drivers/acpi/apei/hest.c | 7 +++-- include/acpi/ghes.h | 5 +++- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 980515e029fa..866d244c6311 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -80,6 +81,11 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) +static inline bool is_hest_type_generic_v2(struct ghes *ghes) +{ + return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; +} + /* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain @@ -240,6 +246,16 @@ static int ghes_estatus_pool_expand(unsigned long len) return 0; } +static int map_gen_v2(struct ghes *ghes) +{ + return apei_map_generic_address(&ghes->generic_v2->read_ack_register); +} + +static void unmap_gen_v2(struct ghes *ghes) +{ + apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); +} + static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -249,10 +265,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); if (!ghes) return ERR_PTR(-ENOMEM); + ghes->generic = generic; + if (is_hest_type_generic_v2(ghes)) { + rc = map_gen_v2(ghes); + if (rc) + goto err_free; + } + rc = apei_map_generic_address(&generic->error_status_address); if (rc) - goto err_free; + goto err_unmap_read_ack_addr; error_block_length = generic->error_block_length; if (error_block_length > GHES_ESTATUS_MAX_SIZE) { pr_warning(FW_WARN GHES_PFX @@ -264,13 +287,16 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); if (!ghes->estatus) { rc = -ENOMEM; - goto err_unmap; + goto err_unmap_status_addr; } return ghes; -err_unmap: +err_unmap_status_addr: apei_unmap_generic_address(&generic->error_status_address); +err_unmap_read_ack_addr: + if (is_hest_type_generic_v2(ghes)) + unmap_gen_v2(ghes); err_free: kfree(ghes); return ERR_PTR(rc); @@ -280,6 +306,8 @@ static void ghes_fini(struct ghes *ghes) { kfree(ghes->estatus); apei_unmap_generic_address(&ghes->generic->error_status_address); + if (is_hest_type_generic_v2(ghes)) + unmap_gen_v2(ghes); } static inline int ghes_severity(int severity) @@ -649,6 +677,21 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } +static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) +{ + int rc; + u64 val = 0; + + rc = apei_read(&val, &gv2->read_ack_register); + if (rc) + return rc; + + val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; + val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; + + return apei_write(val, &gv2->read_ack_register); +} + static int ghes_proc(struct ghes *ghes) { int rc; @@ -661,6 +704,16 @@ static int ghes_proc(struct ghes *ghes) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } ghes_do_proc(ghes, ghes->estatus); + + /* + * GHESv2 type HEST entries introduce support for error acknowledgment, + * so only acknowledge the error if this support is present. + */ + if (is_hest_type_generic_v2(ghes)) { + rc = ghes_ack_error(ghes->generic_v2); + if (rc) + return rc; + } out: ghes_clear_estatus(ghes); return rc; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 8f2a98e23bba..456b488eb1df 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), + [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2), }; static int hest_esrc_len(struct acpi_hest_header *hest_hdr) @@ -141,7 +142,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void { int *count = data; - if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR) + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR || + hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) (*count)++; return 0; } @@ -152,7 +154,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) struct ghes_arr *ghes_arr = data; int rc, i; - if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR && + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2) return 0; if (!((struct acpi_hest_generic *)hest_hdr)->enabled) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 720446cb243e..68f088a92398 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -13,7 +13,10 @@ #define GHES_EXITING 0x0002 struct ghes { - struct acpi_hest_generic *generic; + union { + struct acpi_hest_generic *generic; + struct acpi_hest_generic_v2 *generic_v2; + }; struct acpi_hest_generic_status *estatus; u64 buffer_paddr; unsigned long flags; From bbcc2e7b642ed241651fee50ac6ed59643cb1736 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:05 -0600 Subject: [PATCH 239/249] ras: acpi/apei: cper: add support for generic data v3 structure The ACPI 6.1 spec adds a new revision of the generic error data entry structure. Add support to handle the new structure as well as properly verify and iterate through the generic data entries. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 11 +++++------ drivers/firmware/efi/cper.c | 37 ++++++++++++++++++++++--------------- include/acpi/ghes.h | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 866d244c6311..81e06e3cb08f 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -428,8 +428,7 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int unsigned long pfn; int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); - struct cper_sec_mem_err *mem_err; - mem_err = (struct cper_sec_mem_err *)(gdata + 1); + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) return; @@ -466,8 +465,8 @@ static void ghes_do_proc(struct ghes *ghes, sec_type = (guid_t *)gdata->section_type; sec_sev = ghes_severity(gdata->error_severity); if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { - struct cper_sec_mem_err *mem_err; - mem_err = (struct cper_sec_mem_err *)(gdata+1); + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + ghes_edac_report_mem_error(ghes, sev, mem_err); arch_apei_report_mem_error(sev, mem_err); @@ -475,8 +474,8 @@ static void ghes_do_proc(struct ghes *ghes, } #ifdef CONFIG_ACPI_APEI_PCIEAER else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { - struct cper_sec_pcie *pcie_err; - pcie_err = (struct cper_sec_pcie *)(gdata+1); + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); + if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE && pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index d42537425438..902475704311 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -32,6 +32,7 @@ #include #include #include +#include #define INDENT_SP " " @@ -386,8 +387,9 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, pfx, pcie->bridge.secondary_status, pcie->bridge.control); } -static void cper_estatus_print_section( - const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no) +static void +cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, + int sec_no) { uuid_le *sec_type = (uuid_le *)gdata->section_type; __u16 severity; @@ -403,14 +405,16 @@ static void cper_estatus_print_section( snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) { - struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1); + struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata); + printk("%s""section_type: general processor error\n", newpfx); if (gdata->error_data_length >= sizeof(*proc_err)) cper_print_proc_generic(newpfx, proc_err); else goto err_section_too_small; } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { - struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + printk("%s""section_type: memory error\n", newpfx); if (gdata->error_data_length >= sizeof(struct cper_sec_mem_err_old)) @@ -419,7 +423,8 @@ static void cper_estatus_print_section( else goto err_section_too_small; } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { - struct cper_sec_pcie *pcie = (void *)(gdata + 1); + struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata); + printk("%s""section_type: PCIe error\n", newpfx); if (gdata->error_data_length >= sizeof(*pcie)) cper_print_pcie(newpfx, pcie, gdata); @@ -438,7 +443,7 @@ void cper_estatus_print(const char *pfx, const struct acpi_hest_generic_status *estatus) { struct acpi_hest_generic_data *gdata; - unsigned int data_len, gedata_len; + unsigned int data_len; int sec_no = 0; char newpfx[64]; __u16 severity; @@ -452,11 +457,11 @@ void cper_estatus_print(const char *pfx, data_len = estatus->data_length; gdata = (struct acpi_hest_generic_data *)(estatus + 1); snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); - while (data_len >= sizeof(*gdata)) { - gedata_len = gdata->error_data_length; + + while (data_len >= acpi_hest_get_size(gdata)) { cper_estatus_print_section(newpfx, gdata, sec_no); - data_len -= gedata_len + sizeof(*gdata); - gdata = (void *)(gdata + 1) + gedata_len; + data_len -= acpi_hest_get_record_size(gdata); + gdata = acpi_hest_get_next(gdata); sec_no++; } } @@ -486,12 +491,14 @@ int cper_estatus_check(const struct acpi_hest_generic_status *estatus) return rc; data_len = estatus->data_length; gdata = (struct acpi_hest_generic_data *)(estatus + 1); - while (data_len >= sizeof(*gdata)) { - gedata_len = gdata->error_data_length; - if (gedata_len > data_len - sizeof(*gdata)) + + while (data_len >= acpi_hest_get_size(gdata)) { + gedata_len = acpi_hest_get_error_length(gdata); + if (gedata_len > data_len - acpi_hest_get_size(gdata)) return -EINVAL; - data_len -= gedata_len + sizeof(*gdata); - gdata = (void *)(gdata + 1) + gedata_len; + + data_len -= acpi_hest_get_record_size(gdata); + gdata = acpi_hest_get_next(gdata); } if (data_len) return -EINVAL; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 68f088a92398..8f8fea004df5 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -73,3 +73,39 @@ static inline void ghes_edac_unregister(struct ghes *ghes) { } #endif + +static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata) +{ + return gdata->revision >> 8; +} + +static inline void *acpi_hest_get_payload(struct acpi_hest_generic_data *gdata) +{ + if (acpi_hest_get_version(gdata) >= 3) + return (void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1); + + return gdata + 1; +} + +static inline int acpi_hest_get_error_length(struct acpi_hest_generic_data *gdata) +{ + return ((struct acpi_hest_generic_data *)(gdata))->error_data_length; +} + +static inline int acpi_hest_get_size(struct acpi_hest_generic_data *gdata) +{ + if (acpi_hest_get_version(gdata) >= 3) + return sizeof(struct acpi_hest_generic_data_v300); + + return sizeof(struct acpi_hest_generic_data); +} + +static inline int acpi_hest_get_record_size(struct acpi_hest_generic_data *gdata) +{ + return (acpi_hest_get_size(gdata) + acpi_hest_get_error_length(gdata)); +} + +static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata) +{ + return (void *)(gdata) + acpi_hest_get_record_size(gdata); +} From 8a94471fb73fd53589746561a2dba29e073ed829 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:06 -0600 Subject: [PATCH 240/249] cper: add timestamp print to CPER status printing The ACPI 6.1 spec added a timestamp to the generic error data entry structure. Print the timestamp out when printing out the error information. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Signed-off-by: Will Deacon --- drivers/firmware/efi/cper.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 902475704311..229cf9277524 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #define INDENT_SP " " @@ -387,6 +389,27 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, pfx, pcie->bridge.secondary_status, pcie->bridge.control); } +static void cper_print_tstamp(const char *pfx, + struct acpi_hest_generic_data_v300 *gdata) +{ + __u8 hour, min, sec, day, mon, year, century, *timestamp; + + if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) { + timestamp = (__u8 *)&(gdata->time_stamp); + sec = bcd2bin(timestamp[0]); + min = bcd2bin(timestamp[1]); + hour = bcd2bin(timestamp[2]); + day = bcd2bin(timestamp[4]); + mon = bcd2bin(timestamp[5]); + year = bcd2bin(timestamp[6]); + century = bcd2bin(timestamp[7]); + + printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx, + (timestamp[3] & 0x1 ? "precise " : "imprecise "), + century, year, mon, day, hour, min, sec); + } +} + static void cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no) @@ -395,6 +418,9 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata __u16 severity; char newpfx[64]; + if (acpi_hest_get_version(gdata) >= 3) + cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata); + severity = gdata->error_severity; printk("%s""Error %d, type: %s\n", pfx, sec_no, cper_severity_str(severity)); From 2f74f09bce4f8d0236f20174a6daae63e10fe733 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:07 -0600 Subject: [PATCH 241/249] efi: parse ARM processor error Add support for ARM Common Platform Error Record (CPER). UEFI 2.6 specification adds support for ARM specific processor error information to be reported as part of the CPER records. This provides more detail on for processor error logs. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- drivers/firmware/efi/cper.c | 129 ++++++++++++++++++++++++++++++++++++ include/linux/cper.h | 54 +++++++++++++++ 2 files changed, 183 insertions(+) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 229cf9277524..eac08548d233 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -110,12 +110,15 @@ void cper_print_bits(const char *pfx, unsigned int bits, static const char * const proc_type_strs[] = { "IA32/X64", "IA64", + "ARM", }; static const char * const proc_isa_strs[] = { "IA32", "IA64", "X64", + "ARM A32/T32", + "ARM A64", }; static const char * const proc_error_type_strs[] = { @@ -184,6 +187,122 @@ static void cper_print_proc_generic(const char *pfx, printk("%s""IP: 0x%016llx\n", pfx, proc->ip); } +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) +static const char * const arm_reg_ctx_strs[] = { + "AArch32 general purpose registers", + "AArch32 EL1 context registers", + "AArch32 EL2 context registers", + "AArch32 secure context registers", + "AArch64 general purpose registers", + "AArch64 EL1 context registers", + "AArch64 EL2 context registers", + "AArch64 EL3 context registers", + "Misc. system register structure", +}; + +static void cper_print_proc_arm(const char *pfx, + const struct cper_sec_proc_arm *proc) +{ + int i, len, max_ctx_type; + struct cper_arm_err_info *err_info; + struct cper_arm_ctx_info *ctx_info; + char newpfx[64]; + + printk("%sMIDR: 0x%016llx\n", pfx, proc->midr); + + len = proc->section_length - (sizeof(*proc) + + proc->err_info_num * (sizeof(*err_info))); + if (len < 0) { + printk("%ssection length: %d\n", pfx, proc->section_length); + printk("%ssection length is too small\n", pfx); + printk("%sfirmware-generated error record is incorrect\n", pfx); + printk("%sERR_INFO_NUM is %d\n", pfx, proc->err_info_num); + return; + } + + if (proc->validation_bits & CPER_ARM_VALID_MPIDR) + printk("%sMultiprocessor Affinity Register (MPIDR): 0x%016llx\n", + pfx, proc->mpidr); + + if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL) + printk("%serror affinity level: %d\n", pfx, + proc->affinity_level); + + if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) { + printk("%srunning state: 0x%x\n", pfx, proc->running_state); + printk("%sPower State Coordination Interface state: %d\n", + pfx, proc->psci_state); + } + + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); + + err_info = (struct cper_arm_err_info *)(proc + 1); + for (i = 0; i < proc->err_info_num; i++) { + printk("%sError info structure %d:\n", pfx, i); + + printk("%snum errors: %d\n", pfx, err_info->multiple_error + 1); + + if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) { + if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST) + printk("%sfirst error captured\n", newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST) + printk("%slast error captured\n", newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED) + printk("%spropagated error captured\n", + newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW) + printk("%soverflow occurred, error info is incomplete\n", + newpfx); + } + + printk("%serror_type: %d, %s\n", newpfx, err_info->type, + err_info->type < ARRAY_SIZE(proc_error_type_strs) ? + proc_error_type_strs[err_info->type] : "unknown"); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) + printk("%serror_info: 0x%016llx\n", newpfx, + err_info->error_info); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR) + printk("%svirtual fault address: 0x%016llx\n", + newpfx, err_info->virt_fault_addr); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR) + printk("%sphysical fault address: 0x%016llx\n", + newpfx, err_info->physical_fault_addr); + err_info += 1; + } + + ctx_info = (struct cper_arm_ctx_info *)err_info; + max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1; + for (i = 0; i < proc->context_info_num; i++) { + int size = sizeof(*ctx_info) + ctx_info->size; + + printk("%sContext info structure %d:\n", pfx, i); + if (len < size) { + printk("%ssection length is too small\n", newpfx); + printk("%sfirmware-generated error record is incorrect\n", pfx); + return; + } + if (ctx_info->type > max_ctx_type) { + printk("%sInvalid context type: %d (max: %d)\n", + newpfx, ctx_info->type, max_ctx_type); + return; + } + printk("%sregister context type: %s\n", newpfx, + arm_reg_ctx_strs[ctx_info->type]); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, + (ctx_info + 1), ctx_info->size, 0); + len -= size; + ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + size); + } + + if (len > 0) { + printk("%sVendor specific error info has %u bytes:\n", pfx, + len); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, ctx_info, + len, true); + } +} +#endif + static const char * const mem_err_type_strs[] = { "unknown", "no error", @@ -456,6 +575,16 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata cper_print_pcie(newpfx, pcie, gdata); else goto err_section_too_small; +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) + } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) { + struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata); + + printk("%ssection_type: ARM processor error\n", newpfx); + if (gdata->error_data_length >= sizeof(*arm_err)) + cper_print_proc_arm(newpfx, arm_err); + else + goto err_section_too_small; +#endif } else printk("%s""section type: unknown, %pUl\n", newpfx, sec_type); diff --git a/include/linux/cper.h b/include/linux/cper.h index dcacb1a72e26..4c671fc2081e 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -180,6 +180,10 @@ enum { #define CPER_SEC_PROC_IPF \ UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00, \ 0x80, 0xC7, 0x3C, 0x88, 0x81) +/* Processor Specific: ARM */ +#define CPER_SEC_PROC_ARM \ + UUID_LE(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05, \ + 0x1D, 0x5D, 0x46, 0xB0) /* Platform Memory */ #define CPER_SEC_PLATFORM_MEM \ UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ @@ -255,6 +259,22 @@ enum { #define CPER_PCIE_SLOT_SHIFT 3 +#define CPER_ARM_VALID_MPIDR BIT(0) +#define CPER_ARM_VALID_AFFINITY_LEVEL BIT(1) +#define CPER_ARM_VALID_RUNNING_STATE BIT(2) +#define CPER_ARM_VALID_VENDOR_INFO BIT(3) + +#define CPER_ARM_INFO_VALID_MULTI_ERR BIT(0) +#define CPER_ARM_INFO_VALID_FLAGS BIT(1) +#define CPER_ARM_INFO_VALID_ERR_INFO BIT(2) +#define CPER_ARM_INFO_VALID_VIRT_ADDR BIT(3) +#define CPER_ARM_INFO_VALID_PHYSICAL_ADDR BIT(4) + +#define CPER_ARM_INFO_FLAGS_FIRST BIT(0) +#define CPER_ARM_INFO_FLAGS_LAST BIT(1) +#define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2) +#define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3) + /* * All tables and structs must be byte-packed to match CPER * specification, since the tables are provided by the system BIOS @@ -340,6 +360,40 @@ struct cper_ia_proc_ctx { __u64 mm_reg_addr; }; +/* ARM Processor Error Section */ +struct cper_sec_proc_arm { + __u32 validation_bits; + __u16 err_info_num; /* Number of Processor Error Info */ + __u16 context_info_num; /* Number of Processor Context Info Records*/ + __u32 section_length; + __u8 affinity_level; + __u8 reserved[3]; /* must be zero */ + __u64 mpidr; + __u64 midr; + __u32 running_state; /* Bit 0 set - Processor running. PSCI = 0 */ + __u32 psci_state; +}; + +/* ARM Processor Error Information Structure */ +struct cper_arm_err_info { + __u8 version; + __u8 length; + __u16 validation_bits; + __u8 type; + __u16 multiple_error; + __u8 flags; + __u64 error_info; + __u64 virt_fault_addr; + __u64 physical_fault_addr; +}; + +/* ARM Processor Context Information Structure */ +struct cper_arm_ctx_info { + __u16 version; + __u16 type; + __u32 size; +}; + /* Old Memory Error Section UEFI 2.1, 2.2 */ struct cper_sec_mem_err_old { __u64 validation_bits; From 32015c235603a209697d1228667b1fb1cc8a8d06 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:08 -0600 Subject: [PATCH 242/249] arm64: exception: handle Synchronous External Abort SEA exceptions are often caused by an uncorrected hardware error, and are handled when data abort and instruction abort exception classes have specific values for their Fault Status Code. When SEA occurs, before killing the process, report the error in the kernel logs. Update fault_info[] with specific SEA faults so that the new SEA handler is used. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Acked-by: Catalin Marinas [will: use NULL instead of 0 when assigning si_addr] Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/mm/fault.c | 45 ++++++++++++++++++++++++++++-------- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 85997c0e5443..28bf02efce76 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -83,6 +83,7 @@ #define ESR_ELx_WNR (UL(1) << 6) /* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_FnV (UL(1) << 10) #define ESR_ELx_EA (UL(1) << 9) #define ESR_ELx_S1PTW (UL(1) << 7) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 37b95dff0b07..246e64f60610 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -522,6 +522,31 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } +/* + * This abort handler deals with Synchronous External Abort. + * It calls notifiers, and then returns "fault". + */ +static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + struct siginfo info; + const struct fault_info *inf; + + inf = esr_to_fault_info(esr); + pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", + inf->name, esr, addr); + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = 0; + if (esr & ESR_ELx_FnV) + info.si_addr = NULL; + else + info.si_addr = (void __user *)addr; + arm64_notify_die("", regs, &info, esr); + + return 0; +} + static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "ttbr address size fault" }, { do_bad, SIGBUS, 0, "level 1 address size fault" }, @@ -539,22 +564,22 @@ static const struct fault_info fault_info[] = { { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, - { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_sea, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "unknown 17" }, { do_bad, SIGBUS, 0, "unknown 18" }, { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_sea, SIGBUS, 0, "level 0 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 1 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 2 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 3 (translation table walk)" }, + { do_sea, SIGBUS, 0, "synchronous parity or ECC error" }, { do_bad, SIGBUS, 0, "unknown 25" }, { do_bad, SIGBUS, 0, "unknown 26" }, { do_bad, SIGBUS, 0, "unknown 27" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" }, { do_bad, SIGBUS, 0, "unknown 32" }, { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, { do_bad, SIGBUS, 0, "unknown 34" }, From 7edda0886bc3d1e5418951558a2555af1bc73b0a Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:09 -0600 Subject: [PATCH 243/249] acpi: apei: handle SEA notification type for ARMv8 ARM APEI extension proposal added SEA (Synchronous External Abort) notification type for ARMv8. Add a new GHES error source handling function for SEA. If an error source's notification type is SEA, then this function can be registered into the SEA exception handler. That way GHES will parse and report SEA exceptions when they occur. An SEA can interrupt code that had interrupts masked and is treated as an NMI. To aid this the page of address space for mapping APEI buffers while in_nmi() is always reserved, and ghes_ioremap_pfn_nmi() is changed to use the helper methods to find the prot_t to map with in the same way as ghes_ioremap_pfn_irq(). Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 ++ arch/arm64/mm/fault.c | 17 ++++++++++ drivers/acpi/apei/Kconfig | 15 +++++++++ drivers/acpi/apei/ghes.c | 70 +++++++++++++++++++++++++++++++++++---- include/acpi/ghes.h | 7 ++++ 5 files changed, 105 insertions(+), 6 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3dcd7ec69bca..80559977a83e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -19,6 +19,7 @@ config ARM64 select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING @@ -92,6 +93,7 @@ config ARM64 select HAVE_IRQ_TIME_ACCOUNTING select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP if NUMA + select HAVE_NMI if ACPI_APEI_SEA select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_REGS diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 246e64f60610..07481af15fd7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -42,6 +42,8 @@ #include #include +#include + struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -535,6 +537,21 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr); + /* + * Synchronous aborts may interrupt code which had interrupts masked. + * Before calling out into the wider kernel tell the interested + * subsystems. + */ + if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { + if (interrupts_enabled(regs)) + nmi_enter(); + + ghes_notify_sea(); + + if (interrupts_enabled(regs)) + nmi_exit(); + } + info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = 0; diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index b0140c8fc733..de14d49a5c90 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -39,6 +39,21 @@ config ACPI_APEI_PCIEAER PCIe AER errors may be reported via APEI firmware first mode. Turn on this option to enable the corresponding support. +config ACPI_APEI_SEA + bool "APEI Synchronous External Abort logging/recovering support" + depends on ARM64 && ACPI_APEI_GHES + default y + help + This option should be enabled if the system supports + firmware first handling of SEA (Synchronous External Abort). + SEA happens with certain faults of data abort or instruction + abort synchronous exceptions on ARMv8 systems. If a system + supports firmware first handling of SEA, the platform analyzes + and handles hardware error notifications from SEA, and it may then + form a HW error record for the OS to parse and handle. This + option allows the OS to look for such hardware error record, and + take appropriate action. + config ACPI_APEI_MEMORY_FAILURE bool "APEI memory error recovering support" depends on ACPI_APEI && MEMORY_FAILURE diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 81e06e3cb08f..42ddc0eff25b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -116,11 +116,7 @@ static DEFINE_MUTEX(ghes_list_mutex); * Two virtual pages are used, one for IRQ/PROCESS context, the other for * NMI context (optionally). */ -#ifdef CONFIG_HAVE_ACPI_APEI_NMI #define GHES_IOREMAP_PAGES 2 -#else -#define GHES_IOREMAP_PAGES 1 -#endif #define GHES_IOREMAP_IRQ_PAGE(base) (base) #define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE) @@ -159,10 +155,14 @@ static void ghes_ioremap_exit(void) static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) { unsigned long vaddr; + phys_addr_t paddr; + pgprot_t prot; vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, - pfn << PAGE_SHIFT, PAGE_KERNEL); + + paddr = pfn << PAGE_SHIFT; + prot = arch_apei_get_mem_attribute(paddr); + ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); return (void __iomem *)vaddr; } @@ -774,6 +774,50 @@ static struct notifier_block ghes_notifier_sci = { .notifier_call = ghes_notify_sci, }; +#ifdef CONFIG_ACPI_APEI_SEA +static LIST_HEAD(ghes_sea); + +void ghes_notify_sea(void) +{ + struct ghes *ghes; + + /* + * synchronize_rcu() will wait for nmi_exit(), so no need to + * rcu_read_lock(). + */ + list_for_each_entry_rcu(ghes, &ghes_sea, list) { + ghes_proc(ghes); + } +} + +static void ghes_sea_add(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_add_rcu(&ghes->list, &ghes_sea); + mutex_unlock(&ghes_list_mutex); +} + +static void ghes_sea_remove(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); +} +#else /* CONFIG_ACPI_APEI_SEA */ +static inline void ghes_sea_add(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n", + ghes->generic->header.source_id); +} + +static inline void ghes_sea_remove(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n", + ghes->generic->header.source_id); +} +#endif /* CONFIG_ACPI_APEI_SEA */ + #ifdef CONFIG_HAVE_ACPI_APEI_NMI /* * printk is not safe in NMI context. So in NMI handler, we allocate @@ -1019,6 +1063,14 @@ static int ghes_probe(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_EXTERNAL: case ACPI_HEST_NOTIFY_SCI: break; + case ACPI_HEST_NOTIFY_SEA: + if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", + generic->header.source_id); + rc = -ENOTSUPP; + goto err; + } + break; case ACPI_HEST_NOTIFY_NMI: if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", @@ -1083,6 +1135,9 @@ static int ghes_probe(struct platform_device *ghes_dev) list_add_rcu(&ghes->list, &ghes_sci); mutex_unlock(&ghes_list_mutex); break; + case ACPI_HEST_NOTIFY_SEA: + ghes_sea_add(ghes); + break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_add(ghes); break; @@ -1126,6 +1181,9 @@ static int ghes_remove(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); synchronize_rcu(); break; + case ACPI_HEST_NOTIFY_SEA: + ghes_sea_remove(ghes); + break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_remove(ghes); break; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 8f8fea004df5..95305aeb13ff 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -1,3 +1,6 @@ +#ifndef GHES_H +#define GHES_H + #include #include @@ -109,3 +112,7 @@ static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata) { return (void *)(gdata) + acpi_hest_get_record_size(gdata); } + +void ghes_notify_sea(void); + +#endif /* GHES_H */ From 2fb5853e4442334cb66fc2ab33a51c91d4434769 Mon Sep 17 00:00:00 2001 From: "Jonathan (Zhixiong) Zhang" Date: Wed, 21 Jun 2017 12:17:10 -0600 Subject: [PATCH 244/249] acpi: apei: panic OS with fatal error status block Even if an error status block's severity is fatal, the kernel does not honor the severity level and panic. With the firmware first model, the platform could inform the OS about a fatal hardware error through the non-NMI GHES notification type. The OS should panic when a hardware error record is received with this severity. Call panic() after CPER data in error status block is printed if severity is fatal, before each error section is handled. Signed-off-by: Jonathan (Zhixiong) Zhang Signed-off-by: Tyler Baicar Reviewed-by: James Morse Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 42ddc0eff25b..7a91ac7d6b75 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -135,6 +135,8 @@ static unsigned long ghes_estatus_pool_size_request; static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; +static int ghes_panic_timeout __read_mostly = 30; + static int ghes_ioremap_init(void) { ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, @@ -691,6 +693,16 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) return apei_write(val, &gv2->read_ack_register); } +static void __ghes_panic(struct ghes *ghes) +{ + __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); + + /* reboot to log the error! */ + if (!panic_timeout) + panic_timeout = ghes_panic_timeout; + panic("Fatal hardware error!"); +} + static int ghes_proc(struct ghes *ghes) { int rc; @@ -698,6 +710,11 @@ static int ghes_proc(struct ghes *ghes) rc = ghes_read_estatus(ghes, 0); if (rc) goto out; + + if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { + __ghes_panic(ghes); + } + if (!ghes_estatus_cached(ghes->estatus)) { if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) ghes_estatus_cache_add(ghes->generic, ghes->estatus); @@ -838,8 +855,6 @@ static atomic_t ghes_in_nmi = ATOMIC_INIT(0); static LIST_HEAD(ghes_nmi); -static int ghes_panic_timeout __read_mostly = 30; - static void ghes_proc_in_irq(struct irq_work *irq_work) { struct llist_node *llnode, *next; @@ -925,18 +940,6 @@ static void __process_error(struct ghes *ghes) #endif } -static void __ghes_panic(struct ghes *ghes) -{ - oops_begin(); - ghes_print_queued_estatus(); - __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); - - /* reboot to log the error! */ - if (panic_timeout == 0) - panic_timeout = ghes_panic_timeout; - panic("Fatal hardware error!"); -} - static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { struct ghes *ghes; @@ -954,8 +957,11 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) } sev = ghes_severity(ghes->estatus->error_severity); - if (sev >= GHES_SEV_PANIC) + if (sev >= GHES_SEV_PANIC) { + oops_begin(); + ghes_print_queued_estatus(); __ghes_panic(ghes); + } if (!(ghes->flags & GHES_TO_CLEAR)) continue; From 0fc300f414519b10c146fc3329a1b3094e4b6d52 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:11 -0600 Subject: [PATCH 245/249] efi: print unrecognized CPER section UEFI spec allows for non-standard section in Common Platform Error Record. This is defined in section N.2.3 of UEFI version 2.5. Currently if the CPER section's type (UUID) does not match with one of the section types that the kernel knows how to parse, the section is skipped. Therefore, user is not able to see such CPER data, for instance, error record of non-standard section. This change prints out the raw data in hex in the dmesg buffer so that non-standard sections are reported to the user. Non-standard section type errors should be reported to the user because these can include errors which are vendor specific. The data length is taken from Error Data length field of Generic Error Data Entry. The following is a sample output from dmesg: Hardware error from APEI Generic Hardware Error Source: 2 It has been corrected by h/w and requires no further action event severity: corrected time: precise 2017-03-15 20:37:35 Error 0, type: corrected section type: unknown, d2e2621c-f936-468d-0d84-15a4ed015c8b section length: 0x238 00000000: 4d415201 4d492031 453a4d45 435f4343 .RAM1 IMEM:ECC_C 00000010: 53515f45 44525f42 00000000 00000000 E_QSB_RD........ 00000020: 00000000 00000000 00000000 00000000 ................ 00000030: 00000000 00000000 01010000 01010000 ................ 00000040: 00000000 00000000 00000005 00000000 ................ 00000050: 01010000 00000000 00000001 00dddd00 ................ ... The raw data from the error can then be decoded using vendor specific tools. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Signed-off-by: Will Deacon --- drivers/firmware/efi/cper.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index eac08548d233..d5a5855906d6 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -585,8 +585,15 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata else goto err_section_too_small; #endif - } else - printk("%s""section type: unknown, %pUl\n", newpfx, sec_type); + } else { + const void *err = acpi_hest_get_payload(gdata); + + printk("%ssection type: unknown, %pUl\n", newpfx, sec_type); + printk("%ssection length: %#x\n", newpfx, + gdata->error_data_length); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err, + gdata->error_data_length, true); + } return; From 297b64c74385fc7ea5dfff66105ab6465f2df49a Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:12 -0600 Subject: [PATCH 246/249] ras: acpi / apei: generate trace event for unrecognized CPER section The UEFI spec includes non-standard section type support in the Common Platform Error Record. This is defined in section N.2.3 of UEFI version 2.5. Currently if the CPER section's type (UUID) does not match any section type that the kernel knows how to parse, a trace event is not generated. Generate a trace event which contains the raw error data for non-standard section type error records. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Tested-by: Shiju Jose Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 18 ++++++++++++++++ drivers/ras/ras.c | 10 ++++++++- include/linux/ras.h | 12 +++++++++++ include/linux/uuid.h | 4 +++- include/ras/ras_event.h | 45 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7a91ac7d6b75..ab36ad628c68 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -45,11 +45,14 @@ #include #include #include +#include +#include #include #include #include #include +#include #include "apei-internal.h" @@ -461,11 +464,19 @@ static void ghes_do_proc(struct ghes *ghes, int sev, sec_sev; struct acpi_hest_generic_data *gdata; guid_t *sec_type; + guid_t *fru_id = &NULL_UUID_LE; + char *fru_text = ""; sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { sec_type = (guid_t *)gdata->section_type; sec_sev = ghes_severity(gdata->error_severity); + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + fru_id = (guid_t *)gdata->fru_id; + + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + fru_text = gdata->fru_text; + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); @@ -506,6 +517,13 @@ static void ghes_do_proc(struct ghes *ghes, } #endif + else { + void *err = acpi_hest_get_payload(gdata); + + log_non_standard_event(sec_type, fru_id, fru_text, + sec_sev, err, + gdata->error_data_length); + } } } diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 94f8038864b4..e87fd9e32ee2 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -7,11 +7,19 @@ #include #include +#include #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../../include/ras #include +void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id, + const char *fru_text, const u8 sev, const u8 *err, + const u32 len) +{ + trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len); +} + static int __init ras_init(void) { int rc = 0; @@ -27,7 +35,7 @@ subsys_initcall(ras_init); EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); #endif EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); - +EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event); int __init parse_ras_param(char *str) { diff --git a/include/linux/ras.h b/include/linux/ras.h index ffb147185e8d..62fac3042dce 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -2,6 +2,7 @@ #define __RAS_H__ #include +#include #ifdef CONFIG_DEBUG_FS int ras_userspace_consumers(void); @@ -22,4 +23,15 @@ static inline void __init cec_init(void) { } static inline int cec_add_elem(u64 pfn) { return -ENODEV; } #endif +#ifdef CONFIG_RAS +void log_non_standard_event(const guid_t *sec_type, + const guid_t *fru_id, const char *fru_text, + const u8 sev, const u8 *err, const u32 len); +#else +static void log_non_standard_event(const guid_t *sec_type, + const guid_t *fru_id, const char *fru_text, + const u8 sev, const u8 *err, + const u32 len) { return; } +#endif + #endif /* __RAS_H__ */ diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 75f7182d5360..61641faca38b 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -18,8 +18,10 @@ #include +#define UUID_SIZE 16 + typedef struct { - __u8 b[16]; + __u8 b[UUID_SIZE]; } uuid_t; #define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 1791a12cfa85..4f79ba94fa6b 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -161,6 +161,51 @@ TRACE_EVENT(mc_event, __get_str(driver_detail)) ); +/* + * Non-Standard Section Report + * + * This event is generated when hardware detected a hardware + * error event, which may be of non-standard section as defined + * in UEFI spec appendix "Common Platform Error Record", or may + * be of sections for which TRACE_EVENT is not defined. + * + */ +TRACE_EVENT(non_standard_event, + + TP_PROTO(const uuid_le *sec_type, + const uuid_le *fru_id, + const char *fru_text, + const u8 sev, + const u8 *err, + const u32 len), + + TP_ARGS(sec_type, fru_id, fru_text, sev, err, len), + + TP_STRUCT__entry( + __array(char, sec_type, UUID_SIZE) + __array(char, fru_id, UUID_SIZE) + __string(fru_text, fru_text) + __field(u8, sev) + __field(u32, len) + __dynamic_array(u8, buf, len) + ), + + TP_fast_assign( + memcpy(__entry->sec_type, sec_type, UUID_SIZE); + memcpy(__entry->fru_id, fru_id, UUID_SIZE); + __assign_str(fru_text, fru_text); + __entry->sev = sev; + __entry->len = len; + memcpy(__get_dynamic_array(buf), err, len); + ), + + TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s", + __entry->sev, __entry->sec_type, + __entry->fru_id, __get_str(fru_text), + __entry->len, + __print_hex(__get_dynamic_array(buf), __entry->len)) +); + /* * PCIe AER Trace event * From e9279e83ad1f4b5af541a522a81888f828210b40 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:13 -0600 Subject: [PATCH 247/249] trace, ras: add ARM processor error trace event Currently there are trace events for the various RAS errors with the exception of ARM processor type errors. Add a new trace event for such errors so that the user will know when they occur. These trace events are consistent with the ARM processor error section type defined in UEFI 2.6 spec section N.2.4.4. Signed-off-by: Tyler Baicar Acked-by: Steven Rostedt Reviewed-by: Xie XiuQi Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 6 ++++- drivers/firmware/efi/cper.c | 1 + drivers/ras/ras.c | 6 +++++ include/linux/ras.h | 3 +++ include/ras/ras_event.h | 45 +++++++++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ab36ad628c68..5073d035bb6e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -517,7 +517,11 @@ static void ghes_do_proc(struct ghes *ghes, } #endif - else { + else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { + struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + + log_arm_hw_error(err); + } else { void *err = acpi_hest_get_payload(gdata); log_non_standard_event(sec_type, fru_id, fru_text, diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index d5a5855906d6..48a8f69da42a 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -35,6 +35,7 @@ #include #include #include +#include #define INDENT_SP " " diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index e87fd9e32ee2..39701a5c3f49 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -20,6 +20,11 @@ void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id, trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len); } +void log_arm_hw_error(struct cper_sec_proc_arm *err) +{ + trace_arm_event(err); +} + static int __init ras_init(void) { int rc = 0; @@ -36,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); #endif EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event); +EXPORT_TRACEPOINT_SYMBOL_GPL(arm_event); int __init parse_ras_param(char *str) { diff --git a/include/linux/ras.h b/include/linux/ras.h index 62fac3042dce..7d61863ff265 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -3,6 +3,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_FS int ras_userspace_consumers(void); @@ -27,11 +28,13 @@ static inline int cec_add_elem(u64 pfn) { return -ENODEV; } void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); +void log_arm_hw_error(struct cper_sec_proc_arm *err); #else static void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len) { return; } +static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif #endif /* __RAS_H__ */ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 4f79ba94fa6b..429f46fb61e4 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -161,6 +161,51 @@ TRACE_EVENT(mc_event, __get_str(driver_detail)) ); +/* + * ARM Processor Events Report + * + * This event is generated when hardware detects an ARM processor error + * has occurred. UEFI 2.6 spec section N.2.4.4. + */ +TRACE_EVENT(arm_event, + + TP_PROTO(const struct cper_sec_proc_arm *proc), + + TP_ARGS(proc), + + TP_STRUCT__entry( + __field(u64, mpidr) + __field(u64, midr) + __field(u32, running_state) + __field(u32, psci_state) + __field(u8, affinity) + ), + + TP_fast_assign( + if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL) + __entry->affinity = proc->affinity_level; + else + __entry->affinity = ~0; + if (proc->validation_bits & CPER_ARM_VALID_MPIDR) + __entry->mpidr = proc->mpidr; + else + __entry->mpidr = 0ULL; + __entry->midr = proc->midr; + if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) { + __entry->running_state = proc->running_state; + __entry->psci_state = proc->psci_state; + } else { + __entry->running_state = ~0; + __entry->psci_state = ~0; + } + ), + + TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " + "running state: %d; PSCI state: %d", + __entry->affinity, __entry->mpidr, __entry->midr, + __entry->running_state, __entry->psci_state) +); + /* * Non-Standard Section Report * From 621f48e40ee9b0100a802531069166d7d94796e0 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:14 -0600 Subject: [PATCH 248/249] arm/arm64: KVM: add guest SEA support Currently external aborts are unsupported by the guest abort handling. Add handling for SEAs so that the host kernel reports SEAs which occur in the guest kernel. When an SEA occurs in the guest kernel, the guest exits and is routed to kvm_handle_guest_abort(). Prior to this patch, a print message of an unsupported FSC would be printed and nothing else would happen. With this patch, the code gets routed to the APEI handling of SEAs in the host kernel to report the SEA information. Signed-off-by: Tyler Baicar Acked-by: Catalin Marinas Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm/include/asm/kvm_arm.h | 10 ++++++++ arch/arm/include/asm/system_misc.h | 5 ++++ arch/arm64/include/asm/kvm_arm.h | 10 ++++++++ arch/arm64/include/asm/system_misc.h | 2 ++ arch/arm64/mm/fault.c | 22 +++++++++++++++-- drivers/acpi/apei/ghes.c | 17 ++++++++----- include/acpi/ghes.h | 2 +- virt/kvm/arm/mmu.c | 36 +++++++++++++++++++++++++--- 8 files changed, 92 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index a3f0b3d50089..ebf020b02bc8 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -187,6 +187,16 @@ #define FSC_FAULT (0x04) #define FSC_ACCESS (0x08) #define FSC_PERM (0x0c) +#define FSC_SEA (0x10) +#define FSC_SEA_TTW0 (0x14) +#define FSC_SEA_TTW1 (0x15) +#define FSC_SEA_TTW2 (0x16) +#define FSC_SEA_TTW3 (0x17) +#define FSC_SECC (0x18) +#define FSC_SECC_TTW0 (0x1c) +#define FSC_SECC_TTW1 (0x1d) +#define FSC_SECC_TTW2 (0x1e) +#define FSC_SECC_TTW3 (0x1f) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~0xf) diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h index a3d61ad984af..8c4a89f5ce7d 100644 --- a/arch/arm/include/asm/system_misc.h +++ b/arch/arm/include/asm/system_misc.h @@ -22,6 +22,11 @@ extern void (*arm_pm_idle)(void); extern unsigned int user_debug; +static inline int handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + return -1; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_ARM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6e99978e83bd..61d694c2eae5 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -204,6 +204,16 @@ #define FSC_FAULT ESR_ELx_FSC_FAULT #define FSC_ACCESS ESR_ELx_FSC_ACCESS #define FSC_PERM ESR_ELx_FSC_PERM +#define FSC_SEA ESR_ELx_FSC_EXTABT +#define FSC_SEA_TTW0 (0x14) +#define FSC_SEA_TTW1 (0x15) +#define FSC_SEA_TTW2 (0x16) +#define FSC_SEA_TTW3 (0x17) +#define FSC_SECC (0x18) +#define FSC_SECC_TTW0 (0x1c) +#define FSC_SECC_TTW1 (0x1d) +#define FSC_SECC_TTW2 (0x1e) +#define FSC_SECC_TTW3 (0x1f) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index bc812435bc76..95aa4423247d 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -56,6 +56,8 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); __show_ratelimited; \ }) +int handle_guest_sea(phys_addr_t addr, unsigned int esr); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 07481af15fd7..a8c9f2db20ba 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -532,6 +532,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct siginfo info; const struct fault_info *inf; + int ret = 0; inf = esr_to_fault_info(esr); pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", @@ -546,7 +547,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) if (interrupts_enabled(regs)) nmi_enter(); - ghes_notify_sea(); + ret = ghes_notify_sea(); if (interrupts_enabled(regs)) nmi_exit(); @@ -561,7 +562,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) info.si_addr = (void __user *)addr; arm64_notify_die("", regs, &info, esr); - return 0; + return ret; } static const struct fault_info fault_info[] = { @@ -631,6 +632,23 @@ static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "unknown 63" }, }; +/* + * Handle Synchronous External Aborts that occur in a guest kernel. + * + * The return value will be zero if the SEA was successfully handled + * and non-zero if there was an error processing the error or there was + * no error to process. + */ +int handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + int ret = -ENOENT; + + if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) + ret = ghes_notify_sea(); + + return ret; +} + /* * Dispatch a data abort to the relevant handler. */ diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 5073d035bb6e..bc717bdf50f1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -816,17 +816,22 @@ static struct notifier_block ghes_notifier_sci = { #ifdef CONFIG_ACPI_APEI_SEA static LIST_HEAD(ghes_sea); -void ghes_notify_sea(void) +/* + * Return 0 only if one of the SEA error sources successfully reported an error + * record sent from the firmware. + */ +int ghes_notify_sea(void) { struct ghes *ghes; + int ret = -ENOENT; - /* - * synchronize_rcu() will wait for nmi_exit(), so no need to - * rcu_read_lock(). - */ + rcu_read_lock(); list_for_each_entry_rcu(ghes, &ghes_sea, list) { - ghes_proc(ghes); + if (!ghes_proc(ghes)) + ret = 0; } + rcu_read_unlock(); + return ret; } static void ghes_sea_add(struct ghes *ghes) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 95305aeb13ff..9f26e01186ae 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -113,6 +113,6 @@ static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata) return (void *)(gdata) + acpi_hest_get_record_size(gdata); } -void ghes_notify_sea(void); +int ghes_notify_sea(void); #endif /* GHES_H */ diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index a2d63247d1bb..bde0cdd60997 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "trace.h" @@ -1427,6 +1428,25 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) kvm_set_pfn_accessed(pfn); } +static bool is_abort_sea(unsigned long fault_status) +{ + switch (fault_status) { + case FSC_SEA: + case FSC_SEA_TTW0: + case FSC_SEA_TTW1: + case FSC_SEA_TTW2: + case FSC_SEA_TTW3: + case FSC_SECC: + case FSC_SECC_TTW0: + case FSC_SECC_TTW1: + case FSC_SECC_TTW2: + case FSC_SECC_TTW3: + return true; + default: + return false; + } +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1449,19 +1469,29 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) gfn_t gfn; int ret, idx; + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); + + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + + /* + * The host kernel will handle the synchronous external abort. There + * is no need to pass the error into the guest. + */ + if (is_abort_sea(fault_status)) { + if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) + return 1; + } + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) { kvm_inject_vabt(vcpu); return 1; } - fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = kvm_vcpu_trap_get_fault_type(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM && fault_status != FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", From 77b246b32b2c4bc21e352dcb8b53a8aba81ee5a4 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:15 -0600 Subject: [PATCH 249/249] acpi: apei: check for pending errors when probing GHES entries Check for pending errors when probing GHES entries. It is possible that a fatal error is already pending at this point, so we should handle it as soon as the driver is probed. This also avoids a potential issue if there was an interrupt that was already cleared for an error since the GHES driver wasn't present. Signed-off-by: Tyler Baicar Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index bc717bdf50f1..bb830444df28 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1179,6 +1179,9 @@ static int ghes_probe(struct platform_device *ghes_dev) } platform_set_drvdata(ghes_dev, ghes); + /* Handle any pending errors right away */ + ghes_proc(ghes); + return 0; err_edac_unreg: ghes_edac_unregister(ghes);