From c282222a45cb9503cbfbebfdb60491f06ae84b49 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Feb 2017 11:52:29 +0100 Subject: [PATCH 001/384] xfrm: policy: init locks early Dmitry reports following splat: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 13059 Comm: syz-executor1 Not tainted 4.10.0-rc7-next-20170207 #1 [..] spin_lock_bh include/linux/spinlock.h:304 [inline] xfrm_policy_flush+0x32/0x470 net/xfrm/xfrm_policy.c:963 xfrm_policy_fini+0xbf/0x560 net/xfrm/xfrm_policy.c:3041 xfrm_net_init+0x79f/0x9e0 net/xfrm/xfrm_policy.c:3091 ops_init+0x10a/0x530 net/core/net_namespace.c:115 setup_net+0x2ed/0x690 net/core/net_namespace.c:291 copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396 create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205 SYSC_unshare kernel/fork.c:2281 [inline] Problem is that when we get error during xfrm_net_init we will call xfrm_policy_fini which will acquire xfrm_policy_lock before it was initialized. Just move it around so locks get set up first. Reported-by: Dmitry Vyukov Fixes: 283bc9f35bbbcb0e9 ("xfrm: Namespacify xfrm state/policy locks") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 177e208e8ff5..3c8f5b70abf8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3062,6 +3062,11 @@ static int __net_init xfrm_net_init(struct net *net) { int rv; + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + spin_lock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; @@ -3078,11 +3083,6 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out; - /* Initialize the per-net locks here */ - spin_lock_init(&net->xfrm.xfrm_state_lock); - spin_lock_init(&net->xfrm.xfrm_policy_lock); - mutex_init(&net->xfrm.xfrm_cfg_mutex); - return 0; out: From 4c86d77743a54fb2d8a4d18a037a074c892bb3be Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 14 Feb 2017 07:43:56 +0100 Subject: [PATCH 002/384] xfrm: Don't use sk_family for socket policy lookups On IPv4-mapped IPv6 addresses sk_family is AF_INET6, but the flow informations are created based on AF_INET. So the routing set up 'struct flowi4' but we try to access 'struct flowi6' what leads to an out of bounds access. Fix this by using the family we get with the dst_entry, like we do it for the standard policy lookup. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3c8f5b70abf8..a9af17f0fce6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1248,7 +1248,7 @@ static inline int policy_to_flow_dir(int dir) } static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, - const struct flowi *fl) + const struct flowi *fl, u16 family) { struct xfrm_policy *pol; @@ -1256,8 +1256,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, - sk->sk_family); + bool match = xfrm_selector_match(&pol->selector, fl, family); int err = 0; if (match) { @@ -2253,7 +2252,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; - pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2532,7 +2531,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { - pol = xfrm_sk_policy_lookup(sk, dir, &fl); + pol = xfrm_sk_policy_lookup(sk, dir, &fl, family); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; From e3dc847a5f85b43ee2bfc8eae407a7e383483228 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 15 Feb 2017 11:38:58 +0100 Subject: [PATCH 003/384] vti6: Don't report path MTU below IPV6_MIN_MTU. In vti6_xmit(), the check for IPV6_MIN_MTU before we send a ICMPV6_PKT_TOOBIG message is missing. So we might report a PMTU below 1280. Fix this by adding the required check. Fixes: ccd740cbc6e ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d82042c8d8fd..9156d6b9eb24 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -484,11 +484,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (!skb->ignore_df && skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); - if (skb->protocol == htons(ETH_P_IPV6)) + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - else + } else { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + } return -EMSGSIZE; } From 9fc5cf6e14fc1db40ea8a1c10061bb9586e78585 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:24 +0100 Subject: [PATCH 004/384] platform/x86: fujitsu-laptop: clearly denote backlight-related symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify naming for all backlight-related functions, structures, variables and constants by using a consistent "_bl"/"_BL" suffix/infix. Adjust indentation to make checkpatch happy. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 214 +++++++++++++------------- 1 file changed, 107 insertions(+), 107 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 2b218b1d13e5..e1737b9d9d95 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -78,10 +78,10 @@ #define FUJITSU_LCD_N_LEVELS 8 -#define ACPI_FUJITSU_CLASS "fujitsu" -#define ACPI_FUJITSU_HID "FUJ02B1" -#define ACPI_FUJITSU_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver" -#define ACPI_FUJITSU_DEVICE_NAME "Fujitsu FUJ02B1" +#define ACPI_FUJITSU_CLASS "fujitsu" +#define ACPI_FUJITSU_BL_HID "FUJ02B1" +#define ACPI_FUJITSU_BL_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver" +#define ACPI_FUJITSU_BL_DEVICE_NAME "Fujitsu FUJ02B1" #define ACPI_FUJITSU_HOTKEY_HID "FUJ02E3" #define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" #define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3" @@ -136,7 +136,7 @@ #endif /* Device controlling the backlight and associated keys */ -struct fujitsu_t { +struct fujitsu_bl { acpi_handle acpi_handle; struct acpi_device *dev; struct input_dev *input; @@ -150,7 +150,7 @@ struct fujitsu_t { unsigned int brightness_level; }; -static struct fujitsu_t *fujitsu; +static struct fujitsu_bl *fujitsu_bl; static int use_alt_lcd_levels = -1; static int disable_brightness_adjust = -1; @@ -222,7 +222,7 @@ static struct led_classdev eco_led = { static u32 dbg_level = 0x03; #endif -static void acpi_fujitsu_notify(struct acpi_device *device, u32 event); +static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event); /* Fujitsu ACPI interface function */ @@ -373,10 +373,10 @@ static int set_lcd_level(int level) vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n", level); - if (level < 0 || level >= fujitsu->max_brightness) + if (level < 0 || level >= fujitsu_bl->max_brightness) return -EINVAL; - status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle); + status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBLL", &handle); if (ACPI_FAILURE(status)) { vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n"); return -ENODEV; @@ -398,10 +398,10 @@ static int set_lcd_level_alt(int level) vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n", level); - if (level < 0 || level >= fujitsu->max_brightness) + if (level < 0 || level >= fujitsu_bl->max_brightness) return -EINVAL; - status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle); + status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBL2", &handle); if (ACPI_FAILURE(status)) { vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n"); return -ENODEV; @@ -421,19 +421,19 @@ static int get_lcd_level(void) vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n"); - status = - acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state); + status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "GBLL", NULL, + &state); if (ACPI_FAILURE(status)) return 0; - fujitsu->brightness_level = state & 0x0fffffff; + fujitsu_bl->brightness_level = state & 0x0fffffff; if (state & 0x80000000) - fujitsu->brightness_changed = 1; + fujitsu_bl->brightness_changed = 1; else - fujitsu->brightness_changed = 0; + fujitsu_bl->brightness_changed = 0; - return fujitsu->brightness_level; + return fujitsu_bl->brightness_level; } static int get_max_brightness(void) @@ -443,14 +443,14 @@ static int get_max_brightness(void) vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n"); - status = - acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state); + status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "RBLL", NULL, + &state); if (ACPI_FAILURE(status)) return -1; - fujitsu->max_brightness = state; + fujitsu_bl->max_brightness = state; - return fujitsu->max_brightness; + return fujitsu_bl->max_brightness; } /* Backlight device stuff */ @@ -483,7 +483,7 @@ static int bl_update_status(struct backlight_device *b) return ret; } -static const struct backlight_ops fujitsubl_ops = { +static const struct backlight_ops fujitsu_bl_ops = { .get_brightness = bl_get_brightness, .update_status = bl_update_status, }; @@ -511,7 +511,7 @@ show_brightness_changed(struct device *dev, int ret; - ret = fujitsu->brightness_changed; + ret = fujitsu_bl->brightness_changed; if (ret < 0) return ret; @@ -539,7 +539,7 @@ static ssize_t store_lcd_level(struct device *dev, int level, ret; if (sscanf(buf, "%i", &level) != 1 - || (level < 0 || level >= fujitsu->max_brightness)) + || (level < 0 || level >= fujitsu_bl->max_brightness)) return -EINVAL; if (use_alt_lcd_levels) @@ -644,25 +644,25 @@ static void __init dmi_check_cb_common(const struct dmi_system_id *id) static int __init dmi_check_cb_s6410(const struct dmi_system_id *id) { dmi_check_cb_common(id); - fujitsu->keycode1 = KEY_SCREENLOCK; /* "Lock" */ - fujitsu->keycode2 = KEY_HELP; /* "Mobility Center" */ + fujitsu_bl->keycode1 = KEY_SCREENLOCK; /* "Lock" */ + fujitsu_bl->keycode2 = KEY_HELP; /* "Mobility Center" */ return 1; } static int __init dmi_check_cb_s6420(const struct dmi_system_id *id) { dmi_check_cb_common(id); - fujitsu->keycode1 = KEY_SCREENLOCK; /* "Lock" */ - fujitsu->keycode2 = KEY_HELP; /* "Mobility Center" */ + fujitsu_bl->keycode1 = KEY_SCREENLOCK; /* "Lock" */ + fujitsu_bl->keycode2 = KEY_HELP; /* "Mobility Center" */ return 1; } static int __init dmi_check_cb_p8010(const struct dmi_system_id *id) { dmi_check_cb_common(id); - fujitsu->keycode1 = KEY_HELP; /* "Support" */ - fujitsu->keycode3 = KEY_SWITCHVIDEOMODE; /* "Presentation" */ - fujitsu->keycode4 = KEY_WWW; /* "Internet" */ + fujitsu_bl->keycode1 = KEY_HELP; /* "Support" */ + fujitsu_bl->keycode3 = KEY_SWITCHVIDEOMODE; /* "Presentation" */ + fujitsu_bl->keycode4 = KEY_WWW; /* "Internet" */ return 1; } @@ -693,7 +693,7 @@ static const struct dmi_system_id fujitsu_dmi_table[] __initconst = { /* ACPI device for LCD brightness control */ -static int acpi_fujitsu_add(struct acpi_device *device) +static int acpi_fujitsu_bl_add(struct acpi_device *device) { int state = 0; struct input_dev *input; @@ -702,22 +702,22 @@ static int acpi_fujitsu_add(struct acpi_device *device) if (!device) return -EINVAL; - fujitsu->acpi_handle = device->handle; - sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME); + fujitsu_bl->acpi_handle = device->handle; + sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_BL_DEVICE_NAME); sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS); - device->driver_data = fujitsu; + device->driver_data = fujitsu_bl; - fujitsu->input = input = input_allocate_device(); + fujitsu_bl->input = input = input_allocate_device(); if (!input) { error = -ENOMEM; goto err_stop; } - snprintf(fujitsu->phys, sizeof(fujitsu->phys), + snprintf(fujitsu_bl->phys, sizeof(fujitsu_bl->phys), "%s/video/input0", acpi_device_hid(device)); input->name = acpi_device_name(device); - input->phys = fujitsu->phys; + input->phys = fujitsu_bl->phys; input->id.bustype = BUS_HOST; input->id.product = 0x06; input->dev.parent = &device->dev; @@ -730,7 +730,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) if (error) goto err_free_input_dev; - error = acpi_bus_update_power(fujitsu->acpi_handle, &state); + error = acpi_bus_update_power(fujitsu_bl->acpi_handle, &state); if (error) { pr_err("Error reading power state\n"); goto err_unregister_input_dev; @@ -740,7 +740,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); - fujitsu->dev = device; + fujitsu_bl->dev = device; if (acpi_has_method(device->handle, METHOD_NAME__INI)) { vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n"); @@ -758,7 +758,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) use_alt_lcd_levels, disable_brightness_adjust); if (get_max_brightness() <= 0) - fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS; + fujitsu_bl->max_brightness = FUJITSU_LCD_N_LEVELS; get_lcd_level(); return 0; @@ -772,38 +772,38 @@ static int acpi_fujitsu_add(struct acpi_device *device) return error; } -static int acpi_fujitsu_remove(struct acpi_device *device) +static int acpi_fujitsu_bl_remove(struct acpi_device *device) { - struct fujitsu_t *fujitsu = acpi_driver_data(device); - struct input_dev *input = fujitsu->input; + struct fujitsu_bl *fujitsu_bl = acpi_driver_data(device); + struct input_dev *input = fujitsu_bl->input; input_unregister_device(input); - fujitsu->acpi_handle = NULL; + fujitsu_bl->acpi_handle = NULL; return 0; } /* Brightness notify */ -static void acpi_fujitsu_notify(struct acpi_device *device, u32 event) +static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event) { struct input_dev *input; int keycode; int oldb, newb; - input = fujitsu->input; + input = fujitsu_bl->input; switch (event) { case ACPI_FUJITSU_NOTIFY_CODE1: keycode = 0; - oldb = fujitsu->brightness_level; + oldb = fujitsu_bl->brightness_level; get_lcd_level(); - newb = fujitsu->brightness_level; + newb = fujitsu_bl->brightness_level; vdbg_printk(FUJLAPTOP_DBG_TRACE, "brightness button event [%i -> %i (%i)]\n", - oldb, newb, fujitsu->brightness_changed); + oldb, newb, fujitsu_bl->brightness_changed); if (oldb < newb) { if (disable_brightness_adjust != 1) { @@ -882,11 +882,11 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) input->dev.parent = &device->dev; set_bit(EV_KEY, input->evbit); - set_bit(fujitsu->keycode1, input->keybit); - set_bit(fujitsu->keycode2, input->keybit); - set_bit(fujitsu->keycode3, input->keybit); - set_bit(fujitsu->keycode4, input->keybit); - set_bit(fujitsu->keycode5, input->keybit); + set_bit(fujitsu_bl->keycode1, input->keybit); + set_bit(fujitsu_bl->keycode2, input->keybit); + set_bit(fujitsu_bl->keycode3, input->keybit); + set_bit(fujitsu_bl->keycode4, input->keybit); + set_bit(fujitsu_bl->keycode5, input->keybit); set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit); set_bit(KEY_UNKNOWN, input->keybit); @@ -937,7 +937,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) #if IS_ENABLED(CONFIG_LEDS_CLASS) if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) { - result = led_classdev_register(&fujitsu->pf_device->dev, + result = led_classdev_register(&fujitsu_bl->pf_device->dev, &logolamp_led); if (result == 0) { fujitsu_hotkey->logolamp_registered = 1; @@ -949,7 +949,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) && (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) { - result = led_classdev_register(&fujitsu->pf_device->dev, + result = led_classdev_register(&fujitsu_bl->pf_device->dev, &kblamps_led); if (result == 0) { fujitsu_hotkey->kblamps_registered = 1; @@ -966,7 +966,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) * that an RF LED is present. */ if (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) { - result = led_classdev_register(&fujitsu->pf_device->dev, + result = led_classdev_register(&fujitsu_bl->pf_device->dev, &radio_led); if (result == 0) { fujitsu_hotkey->radio_led_registered = 1; @@ -983,7 +983,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) */ if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) && (call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) { - result = led_classdev_register(&fujitsu->pf_device->dev, + result = led_classdev_register(&fujitsu_bl->pf_device->dev, &eco_led); if (result == 0) { fujitsu_hotkey->eco_led_registered = 1; @@ -1103,19 +1103,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) { switch (irb & 0x4ff) { case KEY1_CODE: - keycode = fujitsu->keycode1; + keycode = fujitsu_bl->keycode1; break; case KEY2_CODE: - keycode = fujitsu->keycode2; + keycode = fujitsu_bl->keycode2; break; case KEY3_CODE: - keycode = fujitsu->keycode3; + keycode = fujitsu_bl->keycode3; break; case KEY4_CODE: - keycode = fujitsu->keycode4; + keycode = fujitsu_bl->keycode4; break; case KEY5_CODE: - keycode = fujitsu->keycode5; + keycode = fujitsu_bl->keycode5; break; case 0: keycode = 0; @@ -1150,19 +1150,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) /* Initialization */ -static const struct acpi_device_id fujitsu_device_ids[] = { - {ACPI_FUJITSU_HID, 0}, +static const struct acpi_device_id fujitsu_bl_device_ids[] = { + {ACPI_FUJITSU_BL_HID, 0}, {"", 0}, }; -static struct acpi_driver acpi_fujitsu_driver = { - .name = ACPI_FUJITSU_DRIVER_NAME, +static struct acpi_driver acpi_fujitsu_bl_driver = { + .name = ACPI_FUJITSU_BL_DRIVER_NAME, .class = ACPI_FUJITSU_CLASS, - .ids = fujitsu_device_ids, + .ids = fujitsu_bl_device_ids, .ops = { - .add = acpi_fujitsu_add, - .remove = acpi_fujitsu_remove, - .notify = acpi_fujitsu_notify, + .add = acpi_fujitsu_bl_add, + .remove = acpi_fujitsu_bl_remove, + .notify = acpi_fujitsu_bl_notify, }, }; @@ -1183,7 +1183,7 @@ static struct acpi_driver acpi_fujitsu_hotkey_driver = { }; static const struct acpi_device_id fujitsu_ids[] __used = { - {ACPI_FUJITSU_HID, 0}, + {ACPI_FUJITSU_BL_HID, 0}, {ACPI_FUJITSU_HOTKEY_HID, 0}, {"", 0} }; @@ -1196,17 +1196,17 @@ static int __init fujitsu_init(void) if (acpi_disabled) return -ENODEV; - fujitsu = kzalloc(sizeof(struct fujitsu_t), GFP_KERNEL); - if (!fujitsu) + fujitsu_bl = kzalloc(sizeof(struct fujitsu_bl), GFP_KERNEL); + if (!fujitsu_bl) return -ENOMEM; - fujitsu->keycode1 = KEY_PROG1; - fujitsu->keycode2 = KEY_PROG2; - fujitsu->keycode3 = KEY_PROG3; - fujitsu->keycode4 = KEY_PROG4; - fujitsu->keycode5 = KEY_RFKILL; + fujitsu_bl->keycode1 = KEY_PROG1; + fujitsu_bl->keycode2 = KEY_PROG2; + fujitsu_bl->keycode3 = KEY_PROG3; + fujitsu_bl->keycode4 = KEY_PROG4; + fujitsu_bl->keycode5 = KEY_RFKILL; dmi_check_system(fujitsu_dmi_table); - result = acpi_bus_register_driver(&acpi_fujitsu_driver); + result = acpi_bus_register_driver(&acpi_fujitsu_bl_driver); if (result < 0) { ret = -ENODEV; goto fail_acpi; @@ -1214,18 +1214,18 @@ static int __init fujitsu_init(void) /* Register platform stuff */ - fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1); - if (!fujitsu->pf_device) { + fujitsu_bl->pf_device = platform_device_alloc("fujitsu-laptop", -1); + if (!fujitsu_bl->pf_device) { ret = -ENOMEM; goto fail_platform_driver; } - ret = platform_device_add(fujitsu->pf_device); + ret = platform_device_add(fujitsu_bl->pf_device); if (ret) goto fail_platform_device1; ret = - sysfs_create_group(&fujitsu->pf_device->dev.kobj, + sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj, &fujitsupf_attribute_group); if (ret) goto fail_platform_device2; @@ -1236,19 +1236,19 @@ static int __init fujitsu_init(void) struct backlight_properties props; memset(&props, 0, sizeof(struct backlight_properties)); - max_brightness = fujitsu->max_brightness; + max_brightness = fujitsu_bl->max_brightness; props.type = BACKLIGHT_PLATFORM; props.max_brightness = max_brightness - 1; - fujitsu->bl_device = backlight_device_register("fujitsu-laptop", - NULL, NULL, - &fujitsubl_ops, - &props); - if (IS_ERR(fujitsu->bl_device)) { - ret = PTR_ERR(fujitsu->bl_device); - fujitsu->bl_device = NULL; + fujitsu_bl->bl_device = backlight_device_register("fujitsu-laptop", + NULL, NULL, + &fujitsu_bl_ops, + &props); + if (IS_ERR(fujitsu_bl->bl_device)) { + ret = PTR_ERR(fujitsu_bl->bl_device); + fujitsu_bl->bl_device = NULL; goto fail_sysfs_group; } - fujitsu->bl_device->props.brightness = fujitsu->brightness_level; + fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level; } ret = platform_driver_register(&fujitsupf_driver); @@ -1272,9 +1272,9 @@ static int __init fujitsu_init(void) /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */ if (acpi_video_get_backlight_type() == acpi_backlight_vendor) { if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3) - fujitsu->bl_device->props.power = FB_BLANK_POWERDOWN; + fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN; else - fujitsu->bl_device->props.power = FB_BLANK_UNBLANK; + fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK; } pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n"); @@ -1286,18 +1286,18 @@ static int __init fujitsu_init(void) fail_hotkey: platform_driver_unregister(&fujitsupf_driver); fail_backlight: - backlight_device_unregister(fujitsu->bl_device); + backlight_device_unregister(fujitsu_bl->bl_device); fail_sysfs_group: - sysfs_remove_group(&fujitsu->pf_device->dev.kobj, + sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj, &fujitsupf_attribute_group); fail_platform_device2: - platform_device_del(fujitsu->pf_device); + platform_device_del(fujitsu_bl->pf_device); fail_platform_device1: - platform_device_put(fujitsu->pf_device); + platform_device_put(fujitsu_bl->pf_device); fail_platform_driver: - acpi_bus_unregister_driver(&acpi_fujitsu_driver); + acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver); fail_acpi: - kfree(fujitsu); + kfree(fujitsu_bl); return ret; } @@ -1310,16 +1310,16 @@ static void __exit fujitsu_cleanup(void) platform_driver_unregister(&fujitsupf_driver); - backlight_device_unregister(fujitsu->bl_device); + backlight_device_unregister(fujitsu_bl->bl_device); - sysfs_remove_group(&fujitsu->pf_device->dev.kobj, + sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj, &fujitsupf_attribute_group); - platform_device_unregister(fujitsu->pf_device); + platform_device_unregister(fujitsu_bl->pf_device); - acpi_bus_unregister_driver(&acpi_fujitsu_driver); + acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver); - kfree(fujitsu); + kfree(fujitsu_bl); pr_info("driver unloaded\n"); } From 6942eabc140eac54d5c0db2695eed63768209c34 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:25 +0100 Subject: [PATCH 005/384] platform/x86: fujitsu-laptop: replace "hotkey" with "laptop" in symbol names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functions, structures, variables and constants whose names currently contain the "hotkey" keyword are not only responsible for handling hotkeys, but also other laptop-related features (rfkill, lid, dock, LEDs). Fix their naming by using a consistent "_laptop"/"_LAPTOP" suffix/infix. Update comments so that they reflect this change. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 158 +++++++++++++------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index e1737b9d9d95..70124004b346 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -82,9 +82,9 @@ #define ACPI_FUJITSU_BL_HID "FUJ02B1" #define ACPI_FUJITSU_BL_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver" #define ACPI_FUJITSU_BL_DEVICE_NAME "Fujitsu FUJ02B1" -#define ACPI_FUJITSU_HOTKEY_HID "FUJ02E3" -#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" -#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3" +#define ACPI_FUJITSU_LAPTOP_HID "FUJ02E3" +#define ACPI_FUJITSU_LAPTOP_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" +#define ACPI_FUJITSU_LAPTOP_DEVICE_NAME "Fujitsu FUJ02E3" #define ACPI_FUJITSU_NOTIFY_CODE1 0x80 @@ -154,8 +154,8 @@ static struct fujitsu_bl *fujitsu_bl; static int use_alt_lcd_levels = -1; static int disable_brightness_adjust = -1; -/* Device used to access other hotkeys on the laptop */ -struct fujitsu_hotkey_t { +/* Device used to access hotkeys and other features on the laptop */ +struct fujitsu_laptop { acpi_handle acpi_handle; struct acpi_device *dev; struct input_dev *input; @@ -171,9 +171,9 @@ struct fujitsu_hotkey_t { int eco_led_registered; }; -static struct fujitsu_hotkey_t *fujitsu_hotkey; +static struct fujitsu_laptop *fujitsu_laptop; -static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event); +static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event); #if IS_ENABLED(CONFIG_LEDS_CLASS) static enum led_brightness logolamp_get(struct led_classdev *cdev); @@ -239,7 +239,7 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2) unsigned long long value; acpi_handle handle = NULL; - status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle); + status = acpi_get_handle(fujitsu_laptop->acpi_handle, "FUNC", &handle); if (ACPI_FAILURE(status)) { vdbg_printk(FUJLAPTOP_DBG_ERROR, "FUNC interface is not present\n"); @@ -567,9 +567,9 @@ static ssize_t show_lid_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_hotkey->rfkill_supported & 0x100)) + if (!(fujitsu_laptop->rfkill_supported & 0x100)) return sprintf(buf, "unknown\n"); - if (fujitsu_hotkey->rfkill_state & 0x100) + if (fujitsu_laptop->rfkill_state & 0x100) return sprintf(buf, "open\n"); else return sprintf(buf, "closed\n"); @@ -579,9 +579,9 @@ static ssize_t show_dock_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_hotkey->rfkill_supported & 0x200)) + if (!(fujitsu_laptop->rfkill_supported & 0x200)) return sprintf(buf, "unknown\n"); - if (fujitsu_hotkey->rfkill_state & 0x200) + if (fujitsu_laptop->rfkill_state & 0x200) return sprintf(buf, "docked\n"); else return sprintf(buf, "undocked\n"); @@ -591,9 +591,9 @@ static ssize_t show_radios_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_hotkey->rfkill_supported & 0x20)) + if (!(fujitsu_laptop->rfkill_supported & 0x20)) return sprintf(buf, "unknown\n"); - if (fujitsu_hotkey->rfkill_state & 0x20) + if (fujitsu_laptop->rfkill_state & 0x20) return sprintf(buf, "on\n"); else return sprintf(buf, "killed\n"); @@ -840,7 +840,7 @@ static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event) /* ACPI device for hotkey handling */ -static int acpi_fujitsu_hotkey_add(struct acpi_device *device) +static int acpi_fujitsu_laptop_add(struct acpi_device *device) { int result = 0; int state = 0; @@ -851,32 +851,32 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (!device) return -EINVAL; - fujitsu_hotkey->acpi_handle = device->handle; + fujitsu_laptop->acpi_handle = device->handle; sprintf(acpi_device_name(device), "%s", - ACPI_FUJITSU_HOTKEY_DEVICE_NAME); + ACPI_FUJITSU_LAPTOP_DEVICE_NAME); sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS); - device->driver_data = fujitsu_hotkey; + device->driver_data = fujitsu_laptop; /* kfifo */ - spin_lock_init(&fujitsu_hotkey->fifo_lock); - error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int), + spin_lock_init(&fujitsu_laptop->fifo_lock); + error = kfifo_alloc(&fujitsu_laptop->fifo, RINGBUFFERSIZE * sizeof(int), GFP_KERNEL); if (error) { pr_err("kfifo_alloc failed\n"); goto err_stop; } - fujitsu_hotkey->input = input = input_allocate_device(); + fujitsu_laptop->input = input = input_allocate_device(); if (!input) { error = -ENOMEM; goto err_free_fifo; } - snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys), + snprintf(fujitsu_laptop->phys, sizeof(fujitsu_laptop->phys), "%s/video/input0", acpi_device_hid(device)); input->name = acpi_device_name(device); - input->phys = fujitsu_hotkey->phys; + input->phys = fujitsu_laptop->phys; input->id.bustype = BUS_HOST; input->id.product = 0x06; input->dev.parent = &device->dev; @@ -894,7 +894,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (error) goto err_free_input_dev; - error = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state); + error = acpi_bus_update_power(fujitsu_laptop->acpi_handle, &state); if (error) { pr_err("Error reading power state\n"); goto err_unregister_input_dev; @@ -904,7 +904,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); - fujitsu_hotkey->dev = device; + fujitsu_laptop->dev = device; if (acpi_has_method(device->handle, METHOD_NAME__INI)) { vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n"); @@ -920,16 +920,16 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) ; /* No action, result is discarded */ vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i); - fujitsu_hotkey->rfkill_supported = + fujitsu_laptop->rfkill_supported = call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0); /* Make sure our bitmask of supported functions is cleared if the RFKILL function block is not implemented, like on the S7020. */ - if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD) - fujitsu_hotkey->rfkill_supported = 0; + if (fujitsu_laptop->rfkill_supported == UNSUPPORTED_CMD) + fujitsu_laptop->rfkill_supported = 0; - if (fujitsu_hotkey->rfkill_supported) - fujitsu_hotkey->rfkill_state = + if (fujitsu_laptop->rfkill_supported) + fujitsu_laptop->rfkill_state = call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); /* Suspect this is a keymap of the application panel, print it */ @@ -940,7 +940,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = led_classdev_register(&fujitsu_bl->pf_device->dev, &logolamp_led); if (result == 0) { - fujitsu_hotkey->logolamp_registered = 1; + fujitsu_laptop->logolamp_registered = 1; } else { pr_err("Could not register LED handler for logo lamp, error %i\n", result); @@ -952,7 +952,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = led_classdev_register(&fujitsu_bl->pf_device->dev, &kblamps_led); if (result == 0) { - fujitsu_hotkey->kblamps_registered = 1; + fujitsu_laptop->kblamps_registered = 1; } else { pr_err("Could not register LED handler for keyboard lamps, error %i\n", result); @@ -969,7 +969,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = led_classdev_register(&fujitsu_bl->pf_device->dev, &radio_led); if (result == 0) { - fujitsu_hotkey->radio_led_registered = 1; + fujitsu_laptop->radio_led_registered = 1; } else { pr_err("Could not register LED handler for radio LED, error %i\n", result); @@ -986,7 +986,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = led_classdev_register(&fujitsu_bl->pf_device->dev, &eco_led); if (result == 0) { - fujitsu_hotkey->eco_led_registered = 1; + fujitsu_laptop->eco_led_registered = 1; } else { pr_err("Could not register LED handler for eco LED, error %i\n", result); @@ -1002,47 +1002,47 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) err_free_input_dev: input_free_device(input); err_free_fifo: - kfifo_free(&fujitsu_hotkey->fifo); + kfifo_free(&fujitsu_laptop->fifo); err_stop: return error; } -static int acpi_fujitsu_hotkey_remove(struct acpi_device *device) +static int acpi_fujitsu_laptop_remove(struct acpi_device *device) { - struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device); - struct input_dev *input = fujitsu_hotkey->input; + struct fujitsu_laptop *fujitsu_laptop = acpi_driver_data(device); + struct input_dev *input = fujitsu_laptop->input; #if IS_ENABLED(CONFIG_LEDS_CLASS) - if (fujitsu_hotkey->logolamp_registered) + if (fujitsu_laptop->logolamp_registered) led_classdev_unregister(&logolamp_led); - if (fujitsu_hotkey->kblamps_registered) + if (fujitsu_laptop->kblamps_registered) led_classdev_unregister(&kblamps_led); - if (fujitsu_hotkey->radio_led_registered) + if (fujitsu_laptop->radio_led_registered) led_classdev_unregister(&radio_led); - if (fujitsu_hotkey->eco_led_registered) + if (fujitsu_laptop->eco_led_registered) led_classdev_unregister(&eco_led); #endif input_unregister_device(input); - kfifo_free(&fujitsu_hotkey->fifo); + kfifo_free(&fujitsu_laptop->fifo); - fujitsu_hotkey->acpi_handle = NULL; + fujitsu_laptop->acpi_handle = NULL; return 0; } -static void acpi_fujitsu_hotkey_press(int keycode) +static void acpi_fujitsu_laptop_press(int keycode) { - struct input_dev *input = fujitsu_hotkey->input; + struct input_dev *input = fujitsu_laptop->input; int status; - status = kfifo_in_locked(&fujitsu_hotkey->fifo, + status = kfifo_in_locked(&fujitsu_laptop->fifo, (unsigned char *)&keycode, sizeof(keycode), - &fujitsu_hotkey->fifo_lock); + &fujitsu_laptop->fifo_lock); if (status != sizeof(keycode)) { vdbg_printk(FUJLAPTOP_DBG_WARN, "Could not push keycode [0x%x]\n", keycode); @@ -1054,16 +1054,16 @@ static void acpi_fujitsu_hotkey_press(int keycode) "Push keycode into ringbuffer [%d]\n", keycode); } -static void acpi_fujitsu_hotkey_release(void) +static void acpi_fujitsu_laptop_release(void) { - struct input_dev *input = fujitsu_hotkey->input; + struct input_dev *input = fujitsu_laptop->input; int keycode, status; while (true) { - status = kfifo_out_locked(&fujitsu_hotkey->fifo, + status = kfifo_out_locked(&fujitsu_laptop->fifo, (unsigned char *)&keycode, sizeof(keycode), - &fujitsu_hotkey->fifo_lock); + &fujitsu_laptop->fifo_lock); if (status != sizeof(keycode)) return; input_report_key(input, keycode, 0); @@ -1073,14 +1073,14 @@ static void acpi_fujitsu_hotkey_release(void) } } -static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) +static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event) { struct input_dev *input; int keycode; unsigned int irb = 1; int i; - input = fujitsu_hotkey->input; + input = fujitsu_laptop->input; if (event != ACPI_FUJITSU_NOTIFY_CODE1) { keycode = KEY_UNKNOWN; @@ -1093,8 +1093,8 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) return; } - if (fujitsu_hotkey->rfkill_supported) - fujitsu_hotkey->rfkill_state = + if (fujitsu_laptop->rfkill_supported) + fujitsu_laptop->rfkill_state = call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); i = 0; @@ -1128,16 +1128,16 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) } if (keycode > 0) - acpi_fujitsu_hotkey_press(keycode); + acpi_fujitsu_laptop_press(keycode); else if (keycode == 0) - acpi_fujitsu_hotkey_release(); + acpi_fujitsu_laptop_release(); } /* On some models (first seen on the Skylake-based Lifebook * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is * handled in software; its state is queried using FUNC_RFKILL */ - if ((fujitsu_hotkey->rfkill_supported & BIT(26)) && + if ((fujitsu_laptop->rfkill_supported & BIT(26)) && (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) { keycode = KEY_TOUCHPAD_TOGGLE; input_report_key(input, keycode, 1); @@ -1166,25 +1166,25 @@ static struct acpi_driver acpi_fujitsu_bl_driver = { }, }; -static const struct acpi_device_id fujitsu_hotkey_device_ids[] = { - {ACPI_FUJITSU_HOTKEY_HID, 0}, +static const struct acpi_device_id fujitsu_laptop_device_ids[] = { + {ACPI_FUJITSU_LAPTOP_HID, 0}, {"", 0}, }; -static struct acpi_driver acpi_fujitsu_hotkey_driver = { - .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME, +static struct acpi_driver acpi_fujitsu_laptop_driver = { + .name = ACPI_FUJITSU_LAPTOP_DRIVER_NAME, .class = ACPI_FUJITSU_CLASS, - .ids = fujitsu_hotkey_device_ids, + .ids = fujitsu_laptop_device_ids, .ops = { - .add = acpi_fujitsu_hotkey_add, - .remove = acpi_fujitsu_hotkey_remove, - .notify = acpi_fujitsu_hotkey_notify, + .add = acpi_fujitsu_laptop_add, + .remove = acpi_fujitsu_laptop_remove, + .notify = acpi_fujitsu_laptop_notify, }, }; static const struct acpi_device_id fujitsu_ids[] __used = { {ACPI_FUJITSU_BL_HID, 0}, - {ACPI_FUJITSU_HOTKEY_HID, 0}, + {ACPI_FUJITSU_LAPTOP_HID, 0}, {"", 0} }; MODULE_DEVICE_TABLE(acpi, fujitsu_ids); @@ -1255,18 +1255,18 @@ static int __init fujitsu_init(void) if (ret) goto fail_backlight; - /* Register hotkey driver */ + /* Register laptop driver */ - fujitsu_hotkey = kzalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL); - if (!fujitsu_hotkey) { + fujitsu_laptop = kzalloc(sizeof(struct fujitsu_laptop), GFP_KERNEL); + if (!fujitsu_laptop) { ret = -ENOMEM; - goto fail_hotkey; + goto fail_laptop; } - result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver); + result = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver); if (result < 0) { ret = -ENODEV; - goto fail_hotkey1; + goto fail_laptop1; } /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */ @@ -1281,9 +1281,9 @@ static int __init fujitsu_init(void) return 0; -fail_hotkey1: - kfree(fujitsu_hotkey); -fail_hotkey: +fail_laptop1: + kfree(fujitsu_laptop); +fail_laptop: platform_driver_unregister(&fujitsupf_driver); fail_backlight: backlight_device_unregister(fujitsu_bl->bl_device); @@ -1304,9 +1304,9 @@ static int __init fujitsu_init(void) static void __exit fujitsu_cleanup(void) { - acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver); + acpi_bus_unregister_driver(&acpi_fujitsu_laptop_driver); - kfree(fujitsu_hotkey); + kfree(fujitsu_laptop); platform_driver_unregister(&fujitsupf_driver); From 1650602691f4e8ea8f6e306452a72ac9a611932a Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:26 +0100 Subject: [PATCH 006/384] platform/x86: fujitsu-laptop: make platform-related variables match naming convention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace "fujitsupf" with "fujitsu_pf" in all platform-related variable names to match the module-wide naming convention. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 70124004b346..6cdd1b334e8a 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -607,7 +607,7 @@ static DEVICE_ATTR(lid, 0444, show_lid_state, ignore_store); static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store); static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store); -static struct attribute *fujitsupf_attributes[] = { +static struct attribute *fujitsu_pf_attributes[] = { &dev_attr_brightness_changed.attr, &dev_attr_max_brightness.attr, &dev_attr_lcd_level.attr, @@ -617,11 +617,11 @@ static struct attribute *fujitsupf_attributes[] = { NULL }; -static struct attribute_group fujitsupf_attribute_group = { - .attrs = fujitsupf_attributes +static struct attribute_group fujitsu_pf_attribute_group = { + .attrs = fujitsu_pf_attributes }; -static struct platform_driver fujitsupf_driver = { +static struct platform_driver fujitsu_pf_driver = { .driver = { .name = "fujitsu-laptop", } @@ -1226,7 +1226,7 @@ static int __init fujitsu_init(void) ret = sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj, - &fujitsupf_attribute_group); + &fujitsu_pf_attribute_group); if (ret) goto fail_platform_device2; @@ -1251,7 +1251,7 @@ static int __init fujitsu_init(void) fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level; } - ret = platform_driver_register(&fujitsupf_driver); + ret = platform_driver_register(&fujitsu_pf_driver); if (ret) goto fail_backlight; @@ -1284,12 +1284,12 @@ static int __init fujitsu_init(void) fail_laptop1: kfree(fujitsu_laptop); fail_laptop: - platform_driver_unregister(&fujitsupf_driver); + platform_driver_unregister(&fujitsu_pf_driver); fail_backlight: backlight_device_unregister(fujitsu_bl->bl_device); fail_sysfs_group: sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj, - &fujitsupf_attribute_group); + &fujitsu_pf_attribute_group); fail_platform_device2: platform_device_del(fujitsu_bl->pf_device); fail_platform_device1: @@ -1308,12 +1308,12 @@ static void __exit fujitsu_cleanup(void) kfree(fujitsu_laptop); - platform_driver_unregister(&fujitsupf_driver); + platform_driver_unregister(&fujitsu_pf_driver); backlight_device_unregister(fujitsu_bl->bl_device); sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj, - &fujitsupf_attribute_group); + &fujitsu_pf_attribute_group); platform_device_unregister(fujitsu_bl->pf_device); From 8ef27bd3410c4e5856bac2315ef51224926020e0 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:27 +0100 Subject: [PATCH 007/384] platform/x86: fujitsu-laptop: rename FUNC_RFKILL to FUNC_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FUNC subfunction 0x1000 is currently referred to as FUNC_RFKILL, which is misleading, because it handles more than just radio devices (also lid, dock, LEDs). Rename the FUNC_RFKILL constant to FUNC_FLAGS. Replace "rfkill" with "flags" in the names of its associated fields inside struct fujitsu_laptop. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 50 +++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 6cdd1b334e8a..55d696262301 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -89,7 +89,7 @@ #define ACPI_FUJITSU_NOTIFY_CODE1 0x80 /* FUNC interface - command values */ -#define FUNC_RFKILL 0x1000 +#define FUNC_FLAGS 0x1000 #define FUNC_LEDS 0x1001 #define FUNC_BUTTONS 0x1002 #define FUNC_BACKLIGHT 0x1004 @@ -163,8 +163,8 @@ struct fujitsu_laptop { struct platform_device *pf_device; struct kfifo fifo; spinlock_t fifo_lock; - int rfkill_supported; - int rfkill_state; + int flags_supported; + int flags_state; int logolamp_registered; int kblamps_registered; int radio_led_registered; @@ -300,9 +300,9 @@ static int radio_led_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) - return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON); + return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, RADIO_LED_ON); else - return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0); + return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, 0x0); } static int eco_led_set(struct led_classdev *cdev, @@ -346,7 +346,7 @@ static enum led_brightness radio_led_get(struct led_classdev *cdev) { enum led_brightness brightness = LED_OFF; - if (call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0) & RADIO_LED_ON) + if (call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0) & RADIO_LED_ON) brightness = LED_FULL; return brightness; @@ -567,9 +567,9 @@ static ssize_t show_lid_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->rfkill_supported & 0x100)) + if (!(fujitsu_laptop->flags_supported & 0x100)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->rfkill_state & 0x100) + if (fujitsu_laptop->flags_state & 0x100) return sprintf(buf, "open\n"); else return sprintf(buf, "closed\n"); @@ -579,9 +579,9 @@ static ssize_t show_dock_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->rfkill_supported & 0x200)) + if (!(fujitsu_laptop->flags_supported & 0x200)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->rfkill_state & 0x200) + if (fujitsu_laptop->flags_state & 0x200) return sprintf(buf, "docked\n"); else return sprintf(buf, "undocked\n"); @@ -591,9 +591,9 @@ static ssize_t show_radios_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->rfkill_supported & 0x20)) + if (!(fujitsu_laptop->flags_supported & 0x20)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->rfkill_state & 0x20) + if (fujitsu_laptop->flags_state & 0x20) return sprintf(buf, "on\n"); else return sprintf(buf, "killed\n"); @@ -920,17 +920,17 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device) ; /* No action, result is discarded */ vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i); - fujitsu_laptop->rfkill_supported = - call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0); + fujitsu_laptop->flags_supported = + call_fext_func(FUNC_FLAGS, 0x0, 0x0, 0x0); /* Make sure our bitmask of supported functions is cleared if the RFKILL function block is not implemented, like on the S7020. */ - if (fujitsu_laptop->rfkill_supported == UNSUPPORTED_CMD) - fujitsu_laptop->rfkill_supported = 0; + if (fujitsu_laptop->flags_supported == UNSUPPORTED_CMD) + fujitsu_laptop->flags_supported = 0; - if (fujitsu_laptop->rfkill_supported) - fujitsu_laptop->rfkill_state = - call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); + if (fujitsu_laptop->flags_supported) + fujitsu_laptop->flags_state = + call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0); /* Suspect this is a keymap of the application panel, print it */ pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0)); @@ -1093,9 +1093,9 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event) return; } - if (fujitsu_laptop->rfkill_supported) - fujitsu_laptop->rfkill_state = - call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); + if (fujitsu_laptop->flags_supported) + fujitsu_laptop->flags_state = + call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0); i = 0; while ((irb = @@ -1135,10 +1135,10 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event) /* On some models (first seen on the Skylake-based Lifebook * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is - * handled in software; its state is queried using FUNC_RFKILL + * handled in software; its state is queried using FUNC_FLAGS */ - if ((fujitsu_laptop->rfkill_supported & BIT(26)) && - (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) { + if ((fujitsu_laptop->flags_supported & BIT(26)) && + (call_fext_func(FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) { keycode = KEY_TOUCHPAD_TOGGLE; input_report_key(input, keycode, 1); input_sync(input); From d3dd4480f8a911198d85b3cdb97f22db6539d2d1 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:28 +0100 Subject: [PATCH 008/384] platform/x86: fujitsu-laptop: replace numeric values with constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace three repeating numeric values with constants to improve code clarity. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 55d696262301..deef40a03b6d 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -97,6 +97,11 @@ /* FUNC interface - responses */ #define UNSUPPORTED_CMD 0x80000000 +/* FUNC interface - status flags */ +#define FLAG_RFKILL 0x020 +#define FLAG_LID 0x100 +#define FLAG_DOCK 0x200 + #if IS_ENABLED(CONFIG_LEDS_CLASS) /* FUNC interface - LED control */ #define FUNC_LED_OFF 0x1 @@ -567,9 +572,9 @@ static ssize_t show_lid_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->flags_supported & 0x100)) + if (!(fujitsu_laptop->flags_supported & FLAG_LID)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->flags_state & 0x100) + if (fujitsu_laptop->flags_state & FLAG_LID) return sprintf(buf, "open\n"); else return sprintf(buf, "closed\n"); @@ -579,9 +584,9 @@ static ssize_t show_dock_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->flags_supported & 0x200)) + if (!(fujitsu_laptop->flags_supported & FLAG_DOCK)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->flags_state & 0x200) + if (fujitsu_laptop->flags_state & FLAG_DOCK) return sprintf(buf, "docked\n"); else return sprintf(buf, "undocked\n"); @@ -591,9 +596,9 @@ static ssize_t show_radios_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->flags_supported & 0x20)) + if (!(fujitsu_laptop->flags_supported & FLAG_RFKILL)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->flags_state & 0x20) + if (fujitsu_laptop->flags_state & FLAG_RFKILL) return sprintf(buf, "on\n"); else return sprintf(buf, "killed\n"); From 8c590e339f37022dff209ef16cf699531df1a0ca Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:29 +0100 Subject: [PATCH 009/384] platform/x86: fujitsu-laptop: remove redundant forward declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both acpi_fujitsu_bl_notify() and acpi_fujitsu_laptop_notify() are defined before they are first used, so remove their forward declarations as they are redundant. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index deef40a03b6d..ba0c0c211251 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -178,8 +178,6 @@ struct fujitsu_laptop { static struct fujitsu_laptop *fujitsu_laptop; -static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event); - #if IS_ENABLED(CONFIG_LEDS_CLASS) static enum led_brightness logolamp_get(struct led_classdev *cdev); static int logolamp_set(struct led_classdev *cdev, @@ -227,8 +225,6 @@ static struct led_classdev eco_led = { static u32 dbg_level = 0x03; #endif -static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event); - /* Fujitsu ACPI interface function */ static int call_fext_func(int cmd, int arg0, int arg1, int arg2) From c1d1e8a051761fb808308dab32b9351e1d1fbb9b Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:30 +0100 Subject: [PATCH 010/384] platform/x86: fujitsu-laptop: simplify acpi_bus_register_driver() error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A separate variable is not needed to handle error codes returned by acpi_bus_register_driver(). If the latter fails, just use the value it returned as the value returned by fujitsu_init(). Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index ba0c0c211251..a5478a011b90 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -1192,7 +1192,7 @@ MODULE_DEVICE_TABLE(acpi, fujitsu_ids); static int __init fujitsu_init(void) { - int ret, result, max_brightness; + int ret, max_brightness; if (acpi_disabled) return -ENODEV; @@ -1207,11 +1207,9 @@ static int __init fujitsu_init(void) fujitsu_bl->keycode5 = KEY_RFKILL; dmi_check_system(fujitsu_dmi_table); - result = acpi_bus_register_driver(&acpi_fujitsu_bl_driver); - if (result < 0) { - ret = -ENODEV; + ret = acpi_bus_register_driver(&acpi_fujitsu_bl_driver); + if (ret) goto fail_acpi; - } /* Register platform stuff */ @@ -1264,11 +1262,9 @@ static int __init fujitsu_init(void) goto fail_laptop; } - result = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver); - if (result < 0) { - ret = -ENODEV; + ret = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver); + if (ret) goto fail_laptop1; - } /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */ if (acpi_video_get_backlight_type() == acpi_backlight_vendor) { From 5296a73613814a15e54ebddc2843ec0f84951d60 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:32 +0100 Subject: [PATCH 011/384] platform/x86: fujitsu-laptop: autodetect LCD interface on all models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Presence of ACPI method SBL2 should be checked on all models rather than just the ones with predefined hotkey keycode overrides. Move most of dmi_check_cb_common() to acpi_fujitsu_bl_add(). Adjust indentation to make checkpatch happy. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index a5478a011b90..56804c4db33f 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -631,15 +631,6 @@ static struct platform_driver fujitsu_pf_driver = { static void __init dmi_check_cb_common(const struct dmi_system_id *id) { pr_info("Identified laptop model '%s'\n", id->ident); - if (use_alt_lcd_levels == -1) { - if (acpi_has_method(NULL, - "\\_SB.PCI0.LPCB.FJEX.SBL2")) - use_alt_lcd_levels = 1; - else - use_alt_lcd_levels = 0; - vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as " - "%i\n", use_alt_lcd_levels); - } } static int __init dmi_check_cb_s6410(const struct dmi_system_id *id) @@ -751,6 +742,15 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device) pr_err("_INI Method failed\n"); } + if (use_alt_lcd_levels == -1) { + if (acpi_has_method(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2")) + use_alt_lcd_levels = 1; + else + use_alt_lcd_levels = 0; + vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as %i\n", + use_alt_lcd_levels); + } + /* do config (detect defaults) */ use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0; disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0; From 5802d0bc3fe9f598f0ff3f5fd7fd8c5c935a1b5d Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:33 +0100 Subject: [PATCH 012/384] platform/x86: fujitsu-laptop: remove redundant MODULE_ALIAS entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MODULE_DEVICE_TABLE is all that is needed for fujitsu-laptop to be properly autoloaded based on presence of its associated ACPI devices, so remove redundant MODULE_ALIAS entries. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 56804c4db33f..e12cc3504d48 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -1338,7 +1338,3 @@ MODULE_AUTHOR("Jonathan Woithe, Peter Gruber, Tony Vroon"); MODULE_DESCRIPTION("Fujitsu laptop extras support"); MODULE_VERSION(FUJITSU_DRIVER_VERSION); MODULE_LICENSE("GPL"); - -MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*"); -MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*"); -MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*"); From 71050ae7bf83e4d71a859257d11adc5de517073e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Mon, 20 Feb 2017 14:50:22 -0500 Subject: [PATCH 013/384] platform/x86: asus-wmi: Detect quirk_no_rfkill from the DSDT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Asus laptops that have an airplane-mode indicator LED, also have the WMI WLAN user bit set, and the following bits in their DSDT: Scope (_SB) { (...) Device (ATKD) { (...) Method (WMNB, 3, Serialized) { (...) If (LEqual (IIA0, 0x00010002)) { OWGD (IIA1) Return (One) } } } } So when asus-wmi uses ASUS_WMI_DEVID_WLAN_LED (0x00010002) to store the wlan state, it drives the airplane-mode indicator LED (through the call to OWGD) in an inverted fashion: the LED is ON when airplane mode is OFF (since wlan is ON), and vice-versa. This commit skips registering RFKill switches at all for these laptops, to allow the asus-wireless driver to drive the airplane mode LED correctly through the ASHS ACPI device. Relying on the presence of ASHS and ASUS_WMI_DSTS_USER_BIT avoids adding DMI-based quirks for at least 21 different laptops. Signed-off-by: João Paulo Rechi Vita Signed-off-by: Andy Shevchenko --- drivers/platform/x86/asus-wmi.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 43cb680adbb4..8499d3ae4257 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -159,6 +159,8 @@ MODULE_LICENSE("GPL"); #define USB_INTEL_XUSB2PR 0xD0 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 +static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; + struct bios_args { u32 arg0; u32 arg1; @@ -2051,6 +2053,16 @@ static int asus_wmi_fan_init(struct asus_wmi *asus) return 0; } +static bool ashs_present(void) +{ + int i = 0; + while (ashs_ids[i]) { + if (acpi_dev_found(ashs_ids[i++])) + return true; + } + return false; +} + /* * WMI Driver */ @@ -2095,6 +2107,13 @@ static int asus_wmi_add(struct platform_device *pdev) if (err) goto fail_leds; + asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result); + if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) + asus->driver->wlan_ctrl_by_user = 1; + + if (asus->driver->wlan_ctrl_by_user && ashs_present()) + asus->driver->quirks->no_rfkill = 1; + if (!asus->driver->quirks->no_rfkill) { err = asus_wmi_rfkill_init(asus); if (err) @@ -2134,10 +2153,6 @@ static int asus_wmi_add(struct platform_device *pdev) if (err) goto fail_debugfs; - asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result); - if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) - asus->driver->wlan_ctrl_by_user = 1; - return 0; fail_debugfs: From d1a9ccc4b1374a5a7762031fd8e4e398c68549e6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Feb 2017 11:02:48 +0000 Subject: [PATCH 014/384] scsi: qedi: fix missing return error code check on call to qedi_setup_int The call to qedi_setup_int is not updating the return code rc yet rc is being checked for an error. Fix this by assigning rc to the return code from the call to qedi_setup_int. Signed-off-by: Colin Ian King Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 5eda21d903e9..8e3d92807cb8 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1805,7 +1805,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) */ qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params); - qedi_setup_int(qedi); + rc = qedi_setup_int(qedi); if (rc) goto stop_iscsi_func; From 6f8830f5bbab16e54f261de187f3df4644a5b977 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Mon, 27 Feb 2017 16:58:36 -0800 Subject: [PATCH 015/384] scsi: libiscsi: add lock around task lists to fix list corruption regression There's a rather long standing regression from the commit "libiscsi: Reduce locking contention in fast path" Depending on iSCSI target behavior, it's possible to hit the case in iscsi_complete_task where the task is still on a pending list (!list_empty(&task->running)). When that happens the task is removed from the list while holding the session back_lock, but other task list modification occur under the frwd_lock. That leads to linked list corruption and eventually a panicked system. Rather than back out the session lock split entirely, in order to try and keep some of the performance gains this patch adds another lock to maintain the task lists integrity. Major enterprise supported kernels have been backing out the lock split for while now, thanks to the efforts at IBM where a lab setup has the most reliable reproducer I've seen on this issue. This patch has been tested there successfully. Signed-off-by: Chris Leech Fixes: 659743b02c41 ("[SCSI] libiscsi: Reduce locking contention in fast path") Reported-by: Prashantha Subbarao Reviewed-by: Guilherme G. Piccoli Cc: # v3.15+ Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++- include/scsi/libiscsi.h | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 834d1212b6d5..3fca34a675af 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); task->state = state; - if (!list_empty(&task->running)) + spin_lock_bh(&conn->taskqueuelock); + if (!list_empty(&task->running)) { + pr_debug_once("%s while task on list", __func__); list_del_init(&task->running); + } + spin_unlock_bh(&conn->taskqueuelock); if (conn->task == task) conn->task = NULL; @@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, if (session->tt->xmit_task(task)) goto free_task; } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->mgmtqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task) * this may be on the requeue list already if the xmit_task callout * is handling the r2ts while we are adding new ones */ + spin_lock_bh(&conn->taskqueuelock); if (list_empty(&task->running)) list_add_tail(&task->running, &conn->requeue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } EXPORT_SYMBOL_GPL(iscsi_requeue_task); @@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) * only have one nop-out as a ping from us and targets should not * overflow us with nop-ins */ + spin_lock_bh(&conn->taskqueuelock); check_mgmt: while (!list_empty(&conn->mgmtqueue)) { conn->task = list_entry(conn->mgmtqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (iscsi_prep_mgmt_task(conn, conn->task)) { /* regular RX path uses back_lock */ spin_lock_bh(&conn->session->back_lock); __iscsi_put_task(conn->task); spin_unlock_bh(&conn->session->back_lock); conn->task = NULL; + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); } /* process pending command queue */ @@ -1535,19 +1547,24 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { fail_scsi_task(conn->task, DID_IMM_RETRY); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { if (rc == -ENOMEM || rc == -EACCES) { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&conn->task->running, &conn->cmdqueue); conn->task = NULL; + spin_unlock_bh(&conn->taskqueuelock); goto done; } else fail_scsi_task(conn->task, DID_ABORT); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); @@ -1558,6 +1575,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) * we need to check the mgmt queue for nops that need to * be sent to aviod starvation */ + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } @@ -1577,12 +1595,15 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) conn->task = task; list_del_init(&conn->task->running); conn->task->state = ISCSI_TASK_RUNNING; + spin_unlock_bh(&conn->taskqueuelock); rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } + spin_unlock_bh(&conn->taskqueuelock); spin_unlock_bh(&conn->session->frwd_lock); return -ENODATA; @@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_reject; } } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->cmdqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -2896,6 +2919,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, INIT_LIST_HEAD(&conn->mgmtqueue); INIT_LIST_HEAD(&conn->cmdqueue); INIT_LIST_HEAD(&conn->requeue); + spin_lock_init(&conn->taskqueuelock); INIT_WORK(&conn->xmitwork, iscsi_xmitworker); /* allocate login_task used for the login/text sequences */ diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index b0e275de6dec..583875ea136a 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -196,6 +196,7 @@ struct iscsi_conn { struct iscsi_task *task; /* xmit task in progress */ /* xmit */ + spinlock_t taskqueuelock; /* protects the next three lists */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ From a4b0e8a4e92b1baa860e744847fbdb84a50a5071 Mon Sep 17 00:00:00 2001 From: "Potomski, MichalX" Date: Thu, 23 Feb 2017 09:05:30 +0000 Subject: [PATCH 016/384] scsi: ufs: Factor out ufshcd_read_desc_param Since in UFS 2.1 specification some of the descriptor lengths differs from 2.0 specification and some devices, which are reporting spec version 2.0 have different descriptor lengths we can not rely on hardcoded values taken from 2.0 specification. This patch introduces reading these lengths per each device from descriptor headers at probe time to ensure their correctness. Signed-off-by: Michal' Potomski Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 22 ++-- drivers/scsi/ufs/ufshcd.c | 231 ++++++++++++++++++++++++++++---------- drivers/scsi/ufs/ufshcd.h | 15 +++ 3 files changed, 196 insertions(+), 72 deletions(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 318e4a1f76c9..54deeb754db5 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -146,7 +146,7 @@ enum attr_idn { /* Descriptor idn for Query requests */ enum desc_idn { QUERY_DESC_IDN_DEVICE = 0x0, - QUERY_DESC_IDN_CONFIGURAION = 0x1, + QUERY_DESC_IDN_CONFIGURATION = 0x1, QUERY_DESC_IDN_UNIT = 0x2, QUERY_DESC_IDN_RFU_0 = 0x3, QUERY_DESC_IDN_INTERCONNECT = 0x4, @@ -162,19 +162,13 @@ enum desc_header_offset { QUERY_DESC_DESC_TYPE_OFFSET = 0x01, }; -enum ufs_desc_max_size { - QUERY_DESC_DEVICE_MAX_SIZE = 0x40, - QUERY_DESC_CONFIGURAION_MAX_SIZE = 0x90, - QUERY_DESC_UNIT_MAX_SIZE = 0x23, - QUERY_DESC_INTERCONNECT_MAX_SIZE = 0x06, - /* - * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes - * of descriptor header. - */ - QUERY_DESC_STRING_MAX_SIZE = 0xFE, - QUERY_DESC_GEOMETRY_MAX_SIZE = 0x44, - QUERY_DESC_POWER_MAX_SIZE = 0x62, - QUERY_DESC_RFU_MAX_SIZE = 0x00, +enum ufs_desc_def_size { + QUERY_DESC_DEVICE_DEF_SIZE = 0x40, + QUERY_DESC_CONFIGURATION_DEF_SIZE = 0x90, + QUERY_DESC_UNIT_DEF_SIZE = 0x23, + QUERY_DESC_INTERCONNECT_DEF_SIZE = 0x06, + QUERY_DESC_GEOMETRY_DEF_SIZE = 0x44, + QUERY_DESC_POWER_DEF_SIZE = 0x62, }; /* Unit descriptor parameters offsets in bytes*/ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index dc6efbd1be8e..1359913bf840 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -100,19 +100,6 @@ #define ufshcd_hex_dump(prefix_str, buf, len) \ print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, false) -static u32 ufs_query_desc_max_size[] = { - QUERY_DESC_DEVICE_MAX_SIZE, - QUERY_DESC_CONFIGURAION_MAX_SIZE, - QUERY_DESC_UNIT_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, - QUERY_DESC_INTERCONNECT_MAX_SIZE, - QUERY_DESC_STRING_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, - QUERY_DESC_GEOMETRY_MAX_SIZE, - QUERY_DESC_POWER_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, -}; - enum { UFSHCD_MAX_CHANNEL = 0, UFSHCD_MAX_ID = 1, @@ -2857,7 +2844,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, goto out; } - if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) { + if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) { dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n", __func__, *buf_len); err = -EINVAL; @@ -2937,6 +2924,92 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba, return err; } +/** + * ufshcd_read_desc_length - read the specified descriptor length from header + * @hba: Pointer to adapter instance + * @desc_id: descriptor idn value + * @desc_index: descriptor index + * @desc_length: pointer to variable to read the length of descriptor + * + * Return 0 in case of success, non-zero otherwise + */ +static int ufshcd_read_desc_length(struct ufs_hba *hba, + enum desc_idn desc_id, + int desc_index, + int *desc_length) +{ + int ret; + u8 header[QUERY_DESC_HDR_SIZE]; + int header_len = QUERY_DESC_HDR_SIZE; + + if (desc_id >= QUERY_DESC_IDN_MAX) + return -EINVAL; + + ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC, + desc_id, desc_index, 0, header, + &header_len); + + if (ret) { + dev_err(hba->dev, "%s: Failed to get descriptor header id %d", + __func__, desc_id); + return ret; + } else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) { + dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch", + __func__, header[QUERY_DESC_DESC_TYPE_OFFSET], + desc_id); + ret = -EINVAL; + } + + *desc_length = header[QUERY_DESC_LENGTH_OFFSET]; + return ret; + +} + +/** + * ufshcd_map_desc_id_to_length - map descriptor IDN to its length + * @hba: Pointer to adapter instance + * @desc_id: descriptor idn value + * @desc_len: mapped desc length (out) + * + * Return 0 in case of success, non-zero otherwise + */ +int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, + enum desc_idn desc_id, int *desc_len) +{ + switch (desc_id) { + case QUERY_DESC_IDN_DEVICE: + *desc_len = hba->desc_size.dev_desc; + break; + case QUERY_DESC_IDN_POWER: + *desc_len = hba->desc_size.pwr_desc; + break; + case QUERY_DESC_IDN_GEOMETRY: + *desc_len = hba->desc_size.geom_desc; + break; + case QUERY_DESC_IDN_CONFIGURATION: + *desc_len = hba->desc_size.conf_desc; + break; + case QUERY_DESC_IDN_UNIT: + *desc_len = hba->desc_size.unit_desc; + break; + case QUERY_DESC_IDN_INTERCONNECT: + *desc_len = hba->desc_size.interc_desc; + break; + case QUERY_DESC_IDN_STRING: + *desc_len = QUERY_DESC_MAX_SIZE; + break; + case QUERY_DESC_IDN_RFU_0: + case QUERY_DESC_IDN_RFU_1: + *desc_len = 0; + break; + default: + *desc_len = 0; + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL(ufshcd_map_desc_id_to_length); + /** * ufshcd_read_desc_param - read the specified descriptor parameter * @hba: Pointer to adapter instance @@ -2951,42 +3024,49 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba, static int ufshcd_read_desc_param(struct ufs_hba *hba, enum desc_idn desc_id, int desc_index, - u32 param_offset, + u8 param_offset, u8 *param_read_buf, - u32 param_size) + u8 param_size) { int ret; u8 *desc_buf; - u32 buff_len; + int buff_len; bool is_kmalloc = true; - /* safety checks */ - if (desc_id >= QUERY_DESC_IDN_MAX) + /* Safety check */ + if (desc_id >= QUERY_DESC_IDN_MAX || !param_size) return -EINVAL; - buff_len = ufs_query_desc_max_size[desc_id]; - if ((param_offset + param_size) > buff_len) - return -EINVAL; + /* Get the max length of descriptor from structure filled up at probe + * time. + */ + ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len); - if (!param_offset && (param_size == buff_len)) { - /* memory space already available to hold full descriptor */ - desc_buf = param_read_buf; - is_kmalloc = false; - } else { - /* allocate memory to hold full descriptor */ + /* Sanity checks */ + if (ret || !buff_len) { + dev_err(hba->dev, "%s: Failed to get full descriptor length", + __func__); + return ret; + } + + /* Check whether we need temp memory */ + if (param_offset != 0 || param_size < buff_len) { desc_buf = kmalloc(buff_len, GFP_KERNEL); if (!desc_buf) return -ENOMEM; + } else { + desc_buf = param_read_buf; + is_kmalloc = false; } + /* Request for full descriptor */ ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC, - desc_id, desc_index, 0, desc_buf, - &buff_len); + desc_id, desc_index, 0, + desc_buf, &buff_len); if (ret) { dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d", __func__, desc_id, desc_index, param_offset, ret); - goto out; } @@ -2998,25 +3078,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba, goto out; } - /* - * While reading variable size descriptors (like string descriptor), - * some UFS devices may report the "LENGTH" (field in "Transaction - * Specific fields" of Query Response UPIU) same as what was requested - * in Query Request UPIU instead of reporting the actual size of the - * variable size descriptor. - * Although it's safe to ignore the "LENGTH" field for variable size - * descriptors as we can always derive the length of the descriptor from - * the descriptor header fields. Hence this change impose the length - * match check only for fixed size descriptors (for which we always - * request the correct size as part of Query Request UPIU). - */ - if ((desc_id != QUERY_DESC_IDN_STRING) && - (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) { - dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d", - __func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]); - ret = -EINVAL; - goto out; - } + /* Check wherher we will not copy more data, than available */ + if (is_kmalloc && param_size > buff_len) + param_size = buff_len; if (is_kmalloc) memcpy(param_read_buf, &desc_buf[param_offset], param_size); @@ -5919,8 +5983,8 @@ static int ufshcd_set_icc_levels_attr(struct ufs_hba *hba, u32 icc_level) static void ufshcd_init_icc_levels(struct ufs_hba *hba) { int ret; - int buff_len = QUERY_DESC_POWER_MAX_SIZE; - u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE]; + int buff_len = hba->desc_size.pwr_desc; + u8 desc_buf[hba->desc_size.pwr_desc]; ret = ufshcd_read_power_desc(hba, desc_buf, buff_len); if (ret) { @@ -6017,11 +6081,10 @@ static int ufs_get_device_desc(struct ufs_hba *hba, { int err; u8 model_index; - u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0}; - u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE]; + u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0}; + u8 desc_buf[hba->desc_size.dev_desc]; - err = ufshcd_read_device_desc(hba, desc_buf, - QUERY_DESC_DEVICE_MAX_SIZE); + err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc); if (err) { dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n", __func__, err); @@ -6038,14 +6101,14 @@ static int ufs_get_device_desc(struct ufs_hba *hba, model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; err = ufshcd_read_string_desc(hba, model_index, str_desc_buf, - QUERY_DESC_STRING_MAX_SIZE, ASCII_STD); + QUERY_DESC_MAX_SIZE, ASCII_STD); if (err) { dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n", __func__, err); goto out; } - str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0'; + str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0'; strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE), min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET], MAX_MODEL_LEN)); @@ -6251,6 +6314,51 @@ static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba) hba->req_abort_count = 0; } +static void ufshcd_init_desc_sizes(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0, + &hba->desc_size.dev_desc); + if (err) + hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0, + &hba->desc_size.pwr_desc); + if (err) + hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0, + &hba->desc_size.interc_desc); + if (err) + hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0, + &hba->desc_size.conf_desc); + if (err) + hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0, + &hba->desc_size.unit_desc); + if (err) + hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0, + &hba->desc_size.geom_desc); + if (err) + hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE; +} + +static void ufshcd_def_desc_sizes(struct ufs_hba *hba) +{ + hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE; + hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE; + hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE; + hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE; + hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE; + hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE; +} + /** * ufshcd_probe_hba - probe hba to detect device and initialize * @hba: per-adapter instance @@ -6285,6 +6393,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba) if (ret) goto out; + /* Init check for device descriptor sizes */ + ufshcd_init_desc_sizes(hba); + ret = ufs_get_device_desc(hba, &card); if (ret) { dev_err(hba->dev, "%s: Failed getting device info. err = %d\n", @@ -6320,6 +6431,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba) /* set the state as operational after switching to desired gear */ hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; + /* * If we are in error handling context or in power management callbacks * context, no need to scan the host @@ -7774,6 +7886,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->mmio_base = mmio_base; hba->irq = irq; + /* Set descriptor lengths to specification defaults */ + ufshcd_def_desc_sizes(hba); + err = ufshcd_hba_init(hba); if (err) goto out_error; diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 7630600217a2..cdc8bd05f7df 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -220,6 +220,15 @@ struct ufs_dev_cmd { struct ufs_query query; }; +struct ufs_desc_size { + int dev_desc; + int pwr_desc; + int geom_desc; + int interc_desc; + int unit_desc; + int conf_desc; +}; + /** * struct ufs_clk_info - UFS clock related info * @list: list headed by hba->clk_list_head @@ -483,6 +492,7 @@ struct ufs_stats { * @clk_list_head: UFS host controller clocks list node head * @pwr_info: holds current power mode * @max_pwr_info: keeps the device max valid pwm + * @desc_size: descriptor sizes reported by device * @urgent_bkops_lvl: keeps track of urgent bkops level for device * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for * device is known or not. @@ -666,6 +676,7 @@ struct ufs_hba { bool is_urgent_bkops_lvl_checked; struct rw_semaphore clk_scaling_lock; + struct ufs_desc_size desc_size; }; /* Returns true if clocks can be gated. Otherwise false */ @@ -832,6 +843,10 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, enum flag_idn idn, bool *flag_res); int ufshcd_hold(struct ufs_hba *hba, bool async); void ufshcd_release(struct ufs_hba *hba); + +int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id, + int *desc_length); + u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba); /* Wrapper functions for safely calling variant operations */ From c46f09175dabd5dd6a1507f36250bfa734a0156e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 1 Mar 2017 17:27:00 +0900 Subject: [PATCH 017/384] scsi: sd: Check for unaligned partial completion Commit ("mpt3sas: Force request partial completion alignment") was not considering the case of commands not operating on logical block size units (e.g. REQ_OP_ZONE_REPORT and its 64B aligned partial replies). In this case, forcing alignment of resid to the device logical block size can break the command result, e.g. in the case of REQ_OP_ZONE_REPORT, the exact number of zone reported by the device. Move the partial completion alignement check of mpt3sas to a generic implementation in sd_done(). The check is added within the default section of the initial req_op() switch case so that the report and reset zone commands are ignored. In addition, as sd_done() is not called for passthrough requests, resid corrections are not done as intended by the initial mpt3sas patch. Fixes: f2e767bb5d6e ("mpt3sas: Force request partial completion alignment") Cc: # v4.10 Signed-off-by: Damien Le Moal Acked-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 --------------- drivers/scsi/sd.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 46e866c36c8a..69c29c560575 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4677,7 +4677,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) struct MPT3SAS_DEVICE *sas_device_priv_data; u32 response_code = 0; unsigned long flags; - unsigned int sector_sz; mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply); @@ -4742,20 +4741,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) } xfer_cnt = le32_to_cpu(mpi_reply->TransferCount); - - /* In case of bogus fw or device, we could end up having - * unaligned partial completion. We can force alignment here, - * then scsi-ml does not need to handle this misbehavior. - */ - sector_sz = scmd->device->sector_size; - if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz && - xfer_cnt % sector_sz)) { - sdev_printk(KERN_INFO, scmd->device, - "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n", - xfer_cnt, sector_sz); - xfer_cnt = round_down(xfer_cnt, sector_sz); - } - scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt); if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) log_info = le32_to_cpu(mpi_reply->IOCLogInfo); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c7839f6c35cc..fb9b4d29af0b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1783,6 +1783,8 @@ static int sd_done(struct scsi_cmnd *SCpnt) { int result = SCpnt->result; unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt); + unsigned int sector_size = SCpnt->device->sector_size; + unsigned int resid; struct scsi_sense_hdr sshdr; struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk); struct request *req = SCpnt->request; @@ -1813,6 +1815,21 @@ static int sd_done(struct scsi_cmnd *SCpnt) scsi_set_resid(SCpnt, blk_rq_bytes(req)); } break; + default: + /* + * In case of bogus fw or device, we could end up having + * an unaligned partial completion. Check this here and force + * alignment. + */ + resid = scsi_get_resid(SCpnt); + if (resid & (sector_size - 1)) { + sd_printk(KERN_INFO, sdkp, + "Unaligned partial completion (resid=%u, sector_sz=%u)\n", + resid, sector_size); + resid = min(scsi_bufflen(SCpnt), + round_up(resid, sector_size)); + scsi_set_resid(SCpnt, resid); + } } if (result) { From 8893cf6cb1cf56334c05120e23092dbfc9423ebb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 1 Mar 2017 09:00:36 -0800 Subject: [PATCH 018/384] scsi: mpt3sas: Avoid sleeping in interrupt context Commit 669f044170d8 ("scsi: srp_transport: Move queuecommand() wait code to SCSI core") can make scsi_internal_device_block() sleep. However, the mpt3sas driver can call this function from an interrupt handler. Hence add a second argument to scsi_internal_device_block() that restores the old behavior of this function for the mpt3sas handler. The call chain that triggered an "IRQ handler enabled interrupts" complaint is as follows: _base_interrupt() -> _base_async_event() -> mpt3sas_scsih_event_callback() -> _scsih_check_topo_delete_events() -> _scsih_block_io_to_children_attached_directly() -> _scsih_block_io_device() -> _scsih_internal_device_block() -> scsi_internal_device_block() Reported-by: Omar Sandoval Signed-off-by: Bart Van Assche Cc: Omar Sandoval Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Sathya Prakash Cc: Chaitra P B Cc: Suganath Prabu Subramani Cc: Sreekanth Reddy Cc: # v4.10+ Tested-by: Omar Sandoval Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.h | 3 --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- drivers/scsi/scsi_lib.c | 14 ++++++++++---- drivers/scsi/scsi_priv.h | 3 --- include/scsi/scsi_device.h | 4 ++++ 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 7fe7e6ed595b..8981806fb13f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, u16 handle, u8 phy_number, u8 link_rate); extern struct sas_function_template mpt3sas_transport_functions; extern struct scsi_transport_template *mpt3sas_transport_template; -extern int scsi_internal_device_block(struct scsi_device *sdev); -extern int scsi_internal_device_unblock(struct scsi_device *sdev, - enum scsi_device_state new_state); /* trigger data externs */ void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc, struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data); diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 69c29c560575..919ba2bb15f1 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev, sas_device_priv_data->sas_target->handle); sas_device_priv_data->block = 1; - r = scsi_internal_device_block(sdev); + r = scsi_internal_device_block(sdev, false); if (r == -EINVAL) sdev_printk(KERN_WARNING, sdev, "device_block failed with return(%d) for handle(0x%04x)\n", @@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev, "performing a block followed by an unblock\n", r, sas_device_priv_data->sas_target->handle); sas_device_priv_data->block = 1; - r = scsi_internal_device_block(sdev); + r = scsi_internal_device_block(sdev, false); if (r) sdev_printk(KERN_WARNING, sdev, "retried device_block " "failed with return(%d) for handle(0x%04x)\n", diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index f5e45a252485..f41e6b84a1bd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2932,6 +2932,8 @@ EXPORT_SYMBOL(scsi_target_resume); /** * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state * @sdev: device to block + * @wait: Whether or not to wait until ongoing .queuecommand() / + * .queue_rq() calls have finished. * * Block request made by scsi lld's to temporarily stop all * scsi commands on the specified device. May sleep. @@ -2949,7 +2951,7 @@ EXPORT_SYMBOL(scsi_target_resume); * remove the rport mutex lock and unlock calls from srp_queuecommand(). */ int -scsi_internal_device_block(struct scsi_device *sdev) +scsi_internal_device_block(struct scsi_device *sdev, bool wait) { struct request_queue *q = sdev->request_queue; unsigned long flags; @@ -2969,12 +2971,16 @@ scsi_internal_device_block(struct scsi_device *sdev) * request queue. */ if (q->mq_ops) { - blk_mq_quiesce_queue(q); + if (wait) + blk_mq_quiesce_queue(q); + else + blk_mq_stop_hw_queues(q); } else { spin_lock_irqsave(q->queue_lock, flags); blk_stop_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); - scsi_wait_for_queuecommand(sdev); + if (wait) + scsi_wait_for_queuecommand(sdev); } return 0; @@ -3036,7 +3042,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock); static void device_block(struct scsi_device *sdev, void *data) { - scsi_internal_device_block(sdev); + scsi_internal_device_block(sdev, true); } static int diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 99bfc985e190..f11bd102d6d5 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { } */ #define SCSI_DEVICE_BLOCK_MAX_TIMEOUT 600 /* units in seconds */ -extern int scsi_internal_device_block(struct scsi_device *sdev); -extern int scsi_internal_device_unblock(struct scsi_device *sdev, - enum scsi_device_state new_state); #endif /* _SCSI_PRIV_H */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6f22b39f1b0c..080c7ce9bae8 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev) sdev->sdev_state == SDEV_CREATED_BLOCK; } +int scsi_internal_device_block(struct scsi_device *sdev, bool wait); +int scsi_internal_device_unblock(struct scsi_device *sdev, + enum scsi_device_state new_state); + /* accessor functions for the SCSI parameters */ static inline int scsi_device_sync(struct scsi_device *sdev) { From f38837b08d23e66de17d46d030e0d9ac5172ad1f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 5 Mar 2017 09:41:08 -0800 Subject: [PATCH 019/384] bpf: add get_next_key callback to LPM map map_get_next_key callback is mandatory. Supply dummy handler. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Dmitry Vyukov Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/lpm_trie.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 8bfe0afaee10..b37bd9ab7f57 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -500,9 +500,15 @@ static void trie_free(struct bpf_map *map) raw_spin_unlock(&trie->lock); } +static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + static const struct bpf_map_ops trie_ops = { .map_alloc = trie_alloc, .map_free = trie_free, + .map_get_next_key = trie_get_next_key, .map_lookup_elem = trie_lookup_elem, .map_update_elem = trie_update_elem, .map_delete_elem = trie_delete_elem, From 0878fff1f42c18e448ab5b8b4f6a3eb32365b5b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Mar 2017 12:34:49 -0800 Subject: [PATCH 020/384] net: phy: Do not perform software reset for Generic PHY The Generic PHY driver is a catch-all PHY driver and it should preserve whatever prior initialization has been done by boot loader or firmware agents. For specific PHY device configuration it is expected that a specialized PHY driver would take over that role. Resetting the generic PHY was a bad idea that has lead to several complaints and downstream workarounds e.g: in OpenWrt/LEDE so restore the behavior prior to 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()"). Reported-by: Felix Fietkau Fixes: 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- include/linux/phy.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index daec6555f3b1..5198ccfa347f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1864,7 +1864,7 @@ static struct phy_driver genphy_driver[] = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic PHY", - .soft_reset = genphy_soft_reset, + .soft_reset = genphy_no_soft_reset, .config_init = genphy_config_init, .features = PHY_GBIT_FEATURES | SUPPORTED_MII | SUPPORTED_AUI | SUPPORTED_FIBRE | diff --git a/include/linux/phy.h b/include/linux/phy.h index 772476028a65..43a774873aa9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_soft_reset(struct phy_device *phydev); +static inline int genphy_no_soft_reset(struct phy_device *phydev) +{ + return 0; +} void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); From 9ce9f7999741f342eeffd036ab09213a2fa93040 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 13 Feb 2017 13:49:58 -0600 Subject: [PATCH 021/384] gpio: altera-a10sr: Set gpio_chip parent property Set the gpio_chip parent property since some recent functions such as devprop_gpiochip_set_names() can use it. Signed-off-by: Thor Thayer Signed-off-by: Linus Walleij --- drivers/gpio/gpio-altera-a10sr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-altera-a10sr.c b/drivers/gpio/gpio-altera-a10sr.c index 9e1a138fed53..16a8951b2bed 100644 --- a/drivers/gpio/gpio-altera-a10sr.c +++ b/drivers/gpio/gpio-altera-a10sr.c @@ -96,7 +96,7 @@ static int altr_a10sr_gpio_probe(struct platform_device *pdev) gpio->regmap = a10sr->regmap; gpio->gp = altr_a10sr_gc; - + gpio->gp.parent = pdev->dev.parent; gpio->gp.of_node = pdev->dev.of_node; ret = devm_gpiochip_add_data(&pdev->dev, &gpio->gp, gpio); From fa6256db033067b57318decdc1c583512a1f8f68 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 15 Feb 2017 02:02:06 +0300 Subject: [PATCH 022/384] gpio: mockup: return -EFAULT if copy_from_user() fails copy_from_user() returns the number of bytes remaining to be copied but we want to return negative error codes on failue. Fixes: 9202ba2397d1 ("gpio: mockup: implement event injecting over debugfs") Signed-off-by: Dan Carpenter Acked-by: Bartosz Golaszewski Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mockup.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 06dac72cb69c..d99338689213 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -197,7 +197,7 @@ static ssize_t gpio_mockup_event_write(struct file *file, struct seq_file *sfile; struct gpio_desc *desc; struct gpio_chip *gc; - int status, val; + int val; char buf; sfile = file->private_data; @@ -206,9 +206,8 @@ static ssize_t gpio_mockup_event_write(struct file *file, chip = priv->chip; gc = &chip->gc; - status = copy_from_user(&buf, usr_buf, 1); - if (status) - return status; + if (copy_from_user(&buf, usr_buf, 1)) + return -EFAULT; if (buf == '0') val = 0; From b115bebc07f282067eccc06fd5aa3060ab1426da Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Feb 2017 16:13:44 +0100 Subject: [PATCH 023/384] gpio: xgene: mark PM functions as __maybe_unused When CONFIG_PM_SLEEP is disabled, we get a warning about unused functions: drivers/gpio/gpio-xgene.c:155:12: warning: 'xgene_gpio_resume' defined but not used [-Wunused-function] static int xgene_gpio_resume(struct device *dev) ^~~~~~~~~~~~~~~~~ drivers/gpio/gpio-xgene.c:142:12: warning: 'xgene_gpio_suspend' defined but not used [-Wunused-function] static int xgene_gpio_suspend(struct device *dev) The warnings are harmless and can be avoided by simplifying the code and marking the functions as __maybe_unused. Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij --- drivers/gpio/gpio-xgene.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-xgene.c b/drivers/gpio/gpio-xgene.c index 40a8881c2ce8..f1c6ec17b90a 100644 --- a/drivers/gpio/gpio-xgene.c +++ b/drivers/gpio/gpio-xgene.c @@ -42,9 +42,7 @@ struct xgene_gpio { struct gpio_chip chip; void __iomem *base; spinlock_t lock; -#ifdef CONFIG_PM u32 set_dr_val[XGENE_MAX_GPIO_BANKS]; -#endif }; static int xgene_gpio_get(struct gpio_chip *gc, unsigned int offset) @@ -138,8 +136,7 @@ static int xgene_gpio_dir_out(struct gpio_chip *gc, return 0; } -#ifdef CONFIG_PM -static int xgene_gpio_suspend(struct device *dev) +static __maybe_unused int xgene_gpio_suspend(struct device *dev) { struct xgene_gpio *gpio = dev_get_drvdata(dev); unsigned long bank_offset; @@ -152,7 +149,7 @@ static int xgene_gpio_suspend(struct device *dev) return 0; } -static int xgene_gpio_resume(struct device *dev) +static __maybe_unused int xgene_gpio_resume(struct device *dev) { struct xgene_gpio *gpio = dev_get_drvdata(dev); unsigned long bank_offset; @@ -166,10 +163,6 @@ static int xgene_gpio_resume(struct device *dev) } static SIMPLE_DEV_PM_OPS(xgene_gpio_pm, xgene_gpio_suspend, xgene_gpio_resume); -#define XGENE_GPIO_PM_OPS (&xgene_gpio_pm) -#else -#define XGENE_GPIO_PM_OPS NULL -#endif static int xgene_gpio_probe(struct platform_device *pdev) { @@ -241,7 +234,7 @@ static struct platform_driver xgene_gpio_driver = { .name = "xgene-gpio", .of_match_table = xgene_gpio_of_match, .acpi_match_table = ACPI_PTR(xgene_gpio_acpi_match), - .pm = XGENE_GPIO_PM_OPS, + .pm = &xgene_gpio_pm, }, .probe = xgene_gpio_probe, }; From f759921cfbf4847319d197a6ed7c9534d593f8bc Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Mon, 20 Feb 2017 09:41:45 +0800 Subject: [PATCH 024/384] gpio: altera: Use handle_level_irq when configured as a level_high When a threaded irq handler is chained attached to one of the gpio pins when configure for level irq the altera_gpio_irq_leveL_high_handler does not mask the interrupt while being handled by the chained irq. This resulting in the threaded irq not getting enough cycles to complete quickly enough before the irq was disabled as faulty. handle_level_irq should be used in this situation instead of handle_simple_irq. In gpiochip_irqchip_add set default handler to handle_bad_irq as per Documentation/gpio/driver.txt. Then set the correct handler in the set_type callback. Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-altera.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 5bddbd507ca9..3fe6a21e05a5 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -90,21 +90,18 @@ static int altera_gpio_irq_set_type(struct irq_data *d, altera_gc = gpiochip_get_data(irq_data_get_irq_chip_data(d)); - if (type == IRQ_TYPE_NONE) + if (type == IRQ_TYPE_NONE) { + irq_set_handler_locked(d, handle_bad_irq); return 0; - if (type == IRQ_TYPE_LEVEL_HIGH && - altera_gc->interrupt_trigger == IRQ_TYPE_LEVEL_HIGH) + } + if (type == altera_gc->interrupt_trigger) { + if (type == IRQ_TYPE_LEVEL_HIGH) + irq_set_handler_locked(d, handle_level_irq); + else + irq_set_handler_locked(d, handle_simple_irq); return 0; - if (type == IRQ_TYPE_EDGE_RISING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_RISING) - return 0; - if (type == IRQ_TYPE_EDGE_FALLING && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_FALLING) - return 0; - if (type == IRQ_TYPE_EDGE_BOTH && - altera_gc->interrupt_trigger == IRQ_TYPE_EDGE_BOTH) - return 0; - + } + irq_set_handler_locked(d, handle_bad_irq); return -EINVAL; } @@ -230,7 +227,6 @@ static void altera_gpio_irq_edge_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); } - static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; @@ -310,7 +306,7 @@ static int altera_gpio_probe(struct platform_device *pdev) altera_gc->interrupt_trigger = reg; ret = gpiochip_irqchip_add(&altera_gc->mmchip.gc, &altera_irq_chip, 0, - handle_simple_irq, IRQ_TYPE_NONE); + handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(&pdev->dev, "could not add irqchip\n"); From f2f10b7e722a75c6d75a7f7cd06b0eee3ae20f7c Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Fri, 17 Feb 2017 07:40:52 -0600 Subject: [PATCH 025/384] HID: chicony: Add support for another ASUS Zen AiO keyboard Add support for media keys on the keyboard that comes with the Asus V221ID and ZN241IC All In One computers. The keys to support here are WLAN, BRIGHTNESSDOWN and BRIGHTNESSUP. This device is not visibly branded as Chicony, and the USB Vendor ID suggests that it is a JESS device. However this seems like the right place to put it: the usage codes are identical to the currently supported devices, and this driver already supports the ASUS AIO keyboard AK1D. Signed-off-by: Daniel Drake Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 4 ++-- drivers/hid/hid-chicony.c | 1 + drivers/hid/hid-core.c | 1 + drivers/hid/hid-ids.h | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 1aeb80e52424..8eab3200ac9a 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -175,11 +175,11 @@ config HID_CHERRY Support for Cherry Cymotion keyboard. config HID_CHICONY - tristate "Chicony Tactical pad" + tristate "Chicony devices" depends on HID default !EXPERT ---help--- - Support for Chicony Tactical pad. + Support for Chicony Tactical pad and special keys on Chicony keyboards. config HID_CORSAIR tristate "Corsair devices" diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index bc3cec199fee..f04ed9aabc3f 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -86,6 +86,7 @@ static const struct hid_device_id ch_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { } }; MODULE_DEVICE_TABLE(hid, ch_devices); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e9e87d337446..ae01ae601d74 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1910,6 +1910,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A081) }, { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A0C2) }, { HID_USB_DEVICE(USB_VENDOR_ID_HUION, USB_DEVICE_ID_HUION_TABLET) }, + { HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_JESS_ZEN_AIO_KBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_JESS2, USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ION, USB_DEVICE_ID_ICADE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KENSINGTON, USB_DEVICE_ID_KS_SLIMBLADE) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 86c95d30ac80..b3df60da0297 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -557,6 +557,7 @@ #define USB_VENDOR_ID_JESS 0x0c45 #define USB_DEVICE_ID_JESS_YUREX 0x1010 +#define USB_DEVICE_ID_JESS_ZEN_AIO_KBD 0x5112 #define USB_VENDOR_ID_JESS2 0x0f30 #define USB_DEVICE_ID_JESS2_COLOR_RUMBLE_PAD 0x0111 From a687c5765b5ae19fe559e14615ddc87ebb46d409 Mon Sep 17 00:00:00 2001 From: Roderick Colenbrander Date: Fri, 24 Feb 2017 16:14:15 -0800 Subject: [PATCH 026/384] HID: sony: Fix input device leak when connecting a DS4 twice using USB/BT When a user connects a DS4 twice using USB and BT, we reject the second device connection after the setup work. We then perform a cleanup, but during cleanup we are not removing the touchpad device. This leads to leakage of an input device, which we would never remove. It can likely result into a kernel oops as well when the touchpad evdev node is accessed and the underlaying HID device has been removed from the system. [jkosina@suse.cz: added stable annotation] Fixes: ac797b95f532 ("HID: sony: Make the DS4 touchpad a separate device") Cc: stable@vger.kernel.org Signed-off-by: Roderick Colenbrander Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-sony.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index f405b07d0381..740996f9bdd4 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2632,6 +2632,8 @@ static int sony_input_configured(struct hid_device *hdev, sony_leds_remove(sc); if (sc->quirks & SONY_BATTERY_SUPPORT) sony_battery_remove(sc); + if (sc->touchpad) + sony_unregister_touchpad(sc); sony_cancel_work_sync(sc); kfree(sc->output_report_dmabuf); sony_remove_dev_list(sc); From 312eb712e15868236dd03c67971ab2c1d79b4ce6 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 17 Feb 2017 18:44:11 +0100 Subject: [PATCH 027/384] cgroup: Fix indenting in PID controller documentation Follow the common documentation style in the file and indent the interface file description by a tab instead of just a space. Signed-off-by: Tobias Klauser Signed-off-by: Tejun Heo --- Documentation/cgroup-v2.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 3b8449f8ac7e..49d7c997fa1e 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -1142,16 +1142,17 @@ used by the kernel. pids.max - A read-write single value file which exists on non-root cgroups. The - default is "max". + A read-write single value file which exists on non-root + cgroups. The default is "max". - Hard limit of number of processes. + Hard limit of number of processes. pids.current - A read-only single value file which exists on all cgroups. + A read-only single value file which exists on all cgroups. - The number of processes currently in the cgroup and its descendants. + The number of processes currently in the cgroup and its + descendants. Organisational operations are not blocked by cgroup policies, so it is possible to have pids.current > pids.max. This can be done by either From 1d18c2747f937f1b5ec65ce6bf4ccb9ca1aea9e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Mar 2017 15:39:07 -0500 Subject: [PATCH 028/384] cgroup/pids: remove spurious suspicious RCU usage warning pids_can_fork() is special in that the css association is guaranteed to be stable throughout the function and thus doesn't need RCU protection around task_css access. When determining the css to charge the pid, task_css_check() is used to override the RCU sanity check. While adding a warning message on fork rejection from pids limit, 135b8b37bd91 ("cgroup: Add pids controller event when fork fails because of pid limit") incorrectly added a task_css access which is neither RCU protected or explicitly annotated. This triggers the following suspicious RCU usage warning when RCU debugging is enabled. cgroup: fork rejected by pids controller in =============================== [ ERR: suspicious RCU usage. ] 4.10.0-work+ #1 Not tainted ------------------------------- ./include/linux/cgroup.h:435 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 1 lock held by bash/1748: #0: (&cgroup_threadgroup_rwsem){+++++.}, at: [] _do_fork+0xe6/0x6e0 stack backtrace: CPU: 3 PID: 1748 Comm: bash Not tainted 4.10.0-work+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014 Call Trace: dump_stack+0x68/0x93 lockdep_rcu_suspicious+0xd7/0x110 pids_can_fork+0x1c7/0x1d0 cgroup_can_fork+0x67/0xc0 copy_process.part.58+0x1709/0x1e90 _do_fork+0xe6/0x6e0 SyS_clone+0x19/0x20 do_syscall_64+0x5c/0x140 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7f7853fab93a RSP: 002b:00007ffc12d05c90 EFLAGS: 00000246 ORIG_RAX: 0000000000000038 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7853fab93a RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011 RBP: 00007ffc12d05cc0 R08: 0000000000000000 R09: 00007f78548db700 R10: 00007f78548db9d0 R11: 0000000000000246 R12: 00000000000006d4 R13: 0000000000000001 R14: 0000000000000000 R15: 000055e3ebe2c04d /asdf There's no reason to dereference task_css again here when the associated css is already available. Fix it by replacing the task_cgroup() call with css->cgroup. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Fixes: 135b8b37bd91 ("cgroup: Add pids controller event when fork fails because of pid limit") Cc: Kenny Yu Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Tejun Heo --- kernel/cgroup/pids.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index e756dae49300..2237201d66d5 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task) /* Only log the first time events_limit is incremented. */ if (atomic64_inc_return(&pids->events_limit) == 1) { pr_info("cgroup: fork rejected by pids controller in "); - pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id)); + pr_cont_cgroup_path(css->cgroup); pr_cont("\n"); } cgroup_file_notify(&pids->events_file); From b6a6759daf55dade2b65089957832759d502acfb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 25 Feb 2017 01:56:48 -0800 Subject: [PATCH 029/384] cgroups: censor kernel pointer in debug files As found in grsecurity, this avoids exposing a kernel pointer through the cgroup debug entries. Signed-off-by: Kees Cook Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 56eba9caa632..1dc22f6b49f5 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1329,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) struct task_struct *task; int count = 0; - seq_printf(seq, "css_set %p\n", cset); + seq_printf(seq, "css_set %pK\n", cset); list_for_each_entry(task, &cset->tasks, cg_list) { if (count++ > MAX_TASKS_SHOWN_PER_CSS) From d85fc67dd11e9a32966140677d4d6429ca540b25 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 3 Mar 2017 09:00:09 -0800 Subject: [PATCH 030/384] libata: transport: Remove circular dependency at free time Without this patch, failed probe would not free resources like irq. ata port tdev object currently hold a reference to the ata port object. Therefore the ata port object release function will not get called until the ata_tport_release is called. But that would never happen, releasing the last reference of ata port dev is done by scsi_host_release, which is called by ata_host_release when the ata port object is released. The ata device objects actually do not need to explicitly hold a reference to their real counterpart, given the transport objects are the children of these objects and device_add() is call for each child. We know the parent will not be deleted until we call the child's device_del(). Reported-by: Matthew Whitehead Tested-by: Matthew Whitehead Suggested-by: Tejun Heo Signed-off-by: Gwendal Grignou Signed-off-by: Tejun Heo --- drivers/ata/libata-transport.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 46698232e6bf..19e6e539a061 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -224,7 +224,6 @@ static DECLARE_TRANSPORT_CLASS(ata_port_class, static void ata_tport_release(struct device *dev) { - put_device(dev->parent); } /** @@ -284,7 +283,7 @@ int ata_tport_add(struct device *parent, device_initialize(dev); dev->type = &ata_port_type; - dev->parent = get_device(parent); + dev->parent = parent; dev->release = ata_tport_release; dev_set_name(dev, "ata%d", ap->print_id); transport_setup_device(dev); @@ -348,7 +347,6 @@ static DECLARE_TRANSPORT_CLASS(ata_link_class, static void ata_tlink_release(struct device *dev) { - put_device(dev->parent); } /** @@ -410,7 +408,7 @@ int ata_tlink_add(struct ata_link *link) int error; device_initialize(dev); - dev->parent = get_device(&ap->tdev); + dev->parent = &ap->tdev; dev->release = ata_tlink_release; if (ata_is_host_link(link)) dev_set_name(dev, "link%d", ap->print_id); @@ -589,7 +587,6 @@ static DECLARE_TRANSPORT_CLASS(ata_dev_class, static void ata_tdev_release(struct device *dev) { - put_device(dev->parent); } /** @@ -662,7 +659,7 @@ static int ata_tdev_add(struct ata_device *ata_dev) int error; device_initialize(dev); - dev->parent = get_device(&link->tdev); + dev->parent = &link->tdev; dev->release = ata_tdev_release; if (ata_is_host_link(link)) dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno); From 0580b762a4d6b70817476b90042813f8573283fa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Mar 2017 15:26:54 -0500 Subject: [PATCH 031/384] libata: drop WARN from protocol error in ata_sff_qc_issue() ata_sff_qc_issue() expects upper layers to never issue commands on a command protocol that it doesn't implement. While the assumption holds fine with the usual IO path, nothing filters based on the command protocol in the passthrough path (which was added later), allowing the warning to be tripped with a passthrough command with the right (well, wrong) protocol. Failing with AC_ERR_SYSTEM is the right thing to do anyway. Remove the unnecessary WARN. Reported-by: Dmitry Vyukov Link: http://lkml.kernel.org/r/CACT4Y+bXkvevNZU8uP6X0QVqsj6wNoUA_1exfTSOzc+SmUtMOA@mail.gmail.com Signed-off-by: Tejun Heo --- drivers/ata/libata-sff.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 2bd92dca3e62..274d6d7193d7 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1482,7 +1482,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) break; default: - WARN_ON_ONCE(1); return AC_ERR_SYSTEM; } From 637fdbae60d6cb9f6e963c1079d7e0445c86ff7d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Mar 2017 15:33:42 -0500 Subject: [PATCH 032/384] workqueue: trigger WARN if queue_delayed_work() is called with NULL @wq If queue_delayed_work() gets called with NULL @wq, the kernel will oops asynchronuosly on timer expiration which isn't too helpful in tracking down the offender. This actually happened with smc. __queue_delayed_work() already does several input sanity checks synchronously. Add NULL @wq check. Reported-by: Dave Jones Link: http://lkml.kernel.org/r/20170227171439.jshx3qplflyrgcv7@codemonkey.org.uk Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 072cbc9b175d..c0168b7da1ea 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1507,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; + WARN_ON_ONCE(!wq); WARN_ON_ONCE(timer->function != delayed_work_timer_fn || timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); From 320661b08dd6f1746d5c7ab4eb435ec64b97cd45 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 25 Feb 2017 13:00:19 -0800 Subject: [PATCH 033/384] percpu: acquire pcpu_lock when updating pcpu_nr_empty_pop_pages Update to pcpu_nr_empty_pop_pages in pcpu_alloc() is currently done without holding pcpu_lock. This can lead to bad updates to the variable. Add missing lock calls. Fixes: b539b87fed37 ("percpu: implmeent pcpu_nr_empty_pop_pages and chunk->nr_populated") Signed-off-by: Tahsin Erdogan Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v3.18+ --- mm/percpu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index 5696039b5c07..60a6488e9e6d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1011,8 +1011,11 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, mutex_unlock(&pcpu_alloc_mutex); } - if (chunk != pcpu_reserved_chunk) + if (chunk != pcpu_reserved_chunk) { + spin_lock_irqsave(&pcpu_lock, flags); pcpu_nr_empty_pop_pages -= occ_pages; + spin_unlock_irqrestore(&pcpu_lock, flags); + } if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) pcpu_schedule_balance_work(); From 8a1df543de8ad879d3c80bdda4c67ac4f82e7ee0 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 25 Feb 2017 12:59:26 -0800 Subject: [PATCH 034/384] percpu: remove unused chunk_alloc parameter from pcpu_get_pages() pcpu_get_pages() doesn't use chunk_alloc parameter, remove it. Fixes: fbbb7f4e149f ("percpu: remove the usage of separate populated bitmap in percpu-vm") Signed-off-by: Tahsin Erdogan Signed-off-by: Tejun Heo --- mm/percpu-vm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 538998a137d2..9ac639499bd1 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -21,7 +21,6 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, /** * pcpu_get_pages - get temp pages array - * @chunk: chunk of interest * * Returns pointer to array of pointers to struct page which can be indexed * with pcpu_page_idx(). Note that there is only one array and accesses @@ -30,7 +29,7 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, * RETURNS: * Pointer to temp pages array on success. */ -static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc) +static struct page **pcpu_get_pages(void) { static struct page **pages; size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); @@ -275,7 +274,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, { struct page **pages; - pages = pcpu_get_pages(chunk); + pages = pcpu_get_pages(); if (!pages) return -ENOMEM; @@ -313,7 +312,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, * successful population attempt so the temp pages array must * be available now. */ - pages = pcpu_get_pages(chunk); + pages = pcpu_get_pages(); BUG_ON(!pages); /* unmap and free */ From bd571195c9535c0b074fc7cd1b541b93817ed647 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Mar 2017 15:58:03 +0100 Subject: [PATCH 035/384] scsi: qedi: fix build error without DEBUG_FS Without CONFIG_DEBUG_FS, we run into a link error: drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_poll': qedi_iscsi.c:(.text.qedi_ep_poll+0x134): undefined reference to `do_not_recover' drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_disconnect': qedi_iscsi.c:(.text.qedi_ep_disconnect+0x36c): undefined reference to `do_not_recover' drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_connect': qedi_iscsi.c:(.text.qedi_ep_connect+0x350): undefined reference to `do_not_recover' drivers/scsi/qedi/qedi_fw.o: In function `qedi_tmf_work': qedi_fw.c:(.text.qedi_tmf_work+0x3b4): undefined reference to `do_not_recover' This defines the symbol as a constant in this case, as there is no way to set it to anything other than zero without DEBUG_FS. In addition, I'm renaming it to qedi_do_not_recover in order to put it into a driver specific namespace, as "do_not_recover" is a really bad name for a kernel-wide global identifier when it is used only in one driver. Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.") Reviewed-by: Johannes Thumshirn Signed-off-by: Arnd Bergmann Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_debugfs.c | 16 ++++++++-------- drivers/scsi/qedi/qedi_fw.c | 4 ++-- drivers/scsi/qedi/qedi_gbl.h | 8 +++++++- drivers/scsi/qedi/qedi_iscsi.c | 8 ++++---- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/qedi/qedi_debugfs.c b/drivers/scsi/qedi/qedi_debugfs.c index 955936274241..59417199bf36 100644 --- a/drivers/scsi/qedi/qedi_debugfs.c +++ b/drivers/scsi/qedi/qedi_debugfs.c @@ -14,7 +14,7 @@ #include #include -int do_not_recover; +int qedi_do_not_recover; static struct dentry *qedi_dbg_root; void @@ -74,22 +74,22 @@ qedi_dbg_exit(void) static ssize_t qedi_dbg_do_not_recover_enable(struct qedi_dbg_ctx *qedi_dbg) { - if (!do_not_recover) - do_not_recover = 1; + if (!qedi_do_not_recover) + qedi_do_not_recover = 1; QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n", - do_not_recover); + qedi_do_not_recover); return 0; } static ssize_t qedi_dbg_do_not_recover_disable(struct qedi_dbg_ctx *qedi_dbg) { - if (do_not_recover) - do_not_recover = 0; + if (qedi_do_not_recover) + qedi_do_not_recover = 0; QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n", - do_not_recover); + qedi_do_not_recover); return 0; } @@ -141,7 +141,7 @@ qedi_dbg_do_not_recover_cmd_read(struct file *filp, char __user *buffer, if (*ppos) return 0; - cnt = sprintf(buffer, "do_not_recover=%d\n", do_not_recover); + cnt = sprintf(buffer, "do_not_recover=%d\n", qedi_do_not_recover); cnt = min_t(int, count, cnt - *ppos); *ppos += cnt; return cnt; diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index c9f0ef4e11b3..2bce3efc66a4 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -1461,9 +1461,9 @@ static void qedi_tmf_work(struct work_struct *work) get_itt(tmf_hdr->rtt), get_itt(ctask->itt), cmd->task_id, qedi_conn->iscsi_conn_id); - if (do_not_recover) { + if (qedi_do_not_recover) { QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n", - do_not_recover); + qedi_do_not_recover); goto abort_ret; } diff --git a/drivers/scsi/qedi/qedi_gbl.h b/drivers/scsi/qedi/qedi_gbl.h index 8e488de88ece..63d793f46064 100644 --- a/drivers/scsi/qedi/qedi_gbl.h +++ b/drivers/scsi/qedi/qedi_gbl.h @@ -12,8 +12,14 @@ #include "qedi_iscsi.h" +#ifdef CONFIG_DEBUG_FS +extern int qedi_do_not_recover; +#else +#define qedi_do_not_recover (0) +#endif + extern uint qedi_io_tracing; -extern int do_not_recover; + extern struct scsi_host_template qedi_host_template; extern struct iscsi_transport qedi_iscsi_transport; extern const struct qed_iscsi_ops *qedi_ops; diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index b9f79d36142d..4cc474364c50 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -833,7 +833,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, return ERR_PTR(ret); } - if (do_not_recover) { + if (qedi_do_not_recover) { ret = -ENOMEM; return ERR_PTR(ret); } @@ -957,7 +957,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) struct qedi_endpoint *qedi_ep; int ret = 0; - if (do_not_recover) + if (qedi_do_not_recover) return 1; qedi_ep = ep->dd_data; @@ -1025,7 +1025,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) } if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) { - if (do_not_recover) { + if (qedi_do_not_recover) { QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Do not recover cid=0x%x\n", qedi_ep->iscsi_cid); @@ -1039,7 +1039,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) } } - if (do_not_recover) + if (qedi_do_not_recover) goto ep_exit_recover; switch (qedi_ep->state) { From 934767c56b0d9dbb95a40e9e6e4d9dcdc3a165ad Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Thu, 2 Mar 2017 09:21:33 -0800 Subject: [PATCH 036/384] scsi: aacraid: Fix typo in blink status The return status of the adapter check on KERNEL_PANIC is supposed to be the upper 16 bits of the OMR status register. Fixes: c421530bf848604e (scsi: aacraid: Reorder Adpater status check) Reported-by: Dan Carpenter Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/src.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 2e5338dec621..7b0410e0f569 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -468,7 +468,7 @@ static int aac_src_check_health(struct aac_dev *dev) return -1; err_blink: - return (status > 16) & 0xFF; + return (status >> 16) & 0xFF; } static inline u32 aac_get_vector(struct aac_dev *dev) From 23456565acf6d452e0368f7380aecd584c019c67 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 2 Mar 2017 17:14:47 -0800 Subject: [PATCH 037/384] scsi: qla2xxx: Fix ql_dump_buffer Recent printk changes for KERN_CONT cause this logging to be defectively emitted on multiple lines. Fix it. Also reduces object size a trivial amount. $ size drivers/scsi/qla2xxx/qla_dbg.o* text data bss dec hex filename 39125 0 0 39125 98d5 drivers/scsi/qla2xxx/qla_dbg.o.new 39164 0 0 39164 98fc drivers/scsi/qla2xxx/qla_dbg.o.old Signed-off-by: Joe Perches Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 21d9fb7fc887..51b4179469d1 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id, "%-+5d 0 1 2 3 4 5 6 7 8 9 A B C D E F\n", size); ql_dbg(level, vha, id, "----- -----------------------------------------------\n"); - for (cnt = 0; cnt < size; cnt++, buf++) { - if (cnt % 16 == 0) - ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU); - printk(" %02x", *buf); - if (cnt % 16 == 15) - printk("\n"); + for (cnt = 0; cnt < size; cnt += 16) { + ql_dbg(level, vha, id, "%04x: ", cnt); + print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, + buf + cnt, min(16U, size - cnt), false); } - if (cnt % 16 != 0) - printk("\n"); } From c527de41aea24c2cdb6638818008d810013b4d39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Mar 2017 07:57:16 -0700 Subject: [PATCH 038/384] scsi: vmw_pvscsi: handle the return value from pci_alloc_irq_vectors correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It returns the number of vectors allocated when successful, so check for a negative error only. Fixes: 2e48e349 ("scsi: vmw_pvscsi: switch to pci_alloc_irq_vectors") Signed-off-by: Christoph Hellwig Reported-by: Loïc Yhuel Tested-by: Loïc Yhuel Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index ef474a748744..c374e3b5c678 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -1487,7 +1487,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id) irq_flag &= ~PCI_IRQ_MSI; error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag); - if (error) + if (error < 0) goto out_reset_adapter; adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true); From db6b2060bcae1ce1f9bde73a423e710b625ae1ef Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 4 Mar 2017 00:07:04 -0800 Subject: [PATCH 039/384] scsi: qedf: Fix defective logging format and argument mismatches Add __printf compiler verification of format and arguments. Fix fallout. Signed-off-by: Joe Perches Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_dbg.h | 13 ++++++++----- drivers/scsi/qedf/qedf_fip.c | 2 +- drivers/scsi/qedf/qedf_io.c | 4 ++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h index 23bd70628a2f..7d173f48a81e 100644 --- a/drivers/scsi/qedf/qedf_dbg.h +++ b/drivers/scsi/qedf/qedf_dbg.h @@ -81,14 +81,17 @@ struct qedf_dbg_ctx { #define QEDF_INFO(pdev, level, fmt, ...) \ qedf_dbg_info(pdev, __func__, __LINE__, level, fmt, \ ## __VA_ARGS__) - -extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line, +__printf(4, 5) +void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line, const char *fmt, ...); -extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line, +__printf(4, 5) +void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line, const char *, ...); -extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func, +__printf(4, 5) +void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func, u32 line, const char *, ...); -extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line, +__printf(5, 6) +void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line, u32 info, const char *fmt, ...); /* GRC Dump related defines */ diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c index 868d423380d1..ed58b9104f58 100644 --- a/drivers/scsi/qedf/qedf_fip.c +++ b/drivers/scsi/qedf/qedf_fip.c @@ -203,7 +203,7 @@ void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb) case FIP_DT_MAC: mp = (struct fip_mac_desc *)desc; QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, - "fd_mac=%pM.\n", __func__, mp->fd_mac); + "fd_mac=%pM\n", mp->fd_mac); ether_addr_copy(cvl_mac, mp->fd_mac); break; case FIP_DT_NAME: diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index ee0dcf9d3aba..46debe5034af 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1342,7 +1342,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe, } else { refcount = kref_read(&io_req->refcount); QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, - "%d:0:%d:%d xid=0x%0x op=0x%02x " + "%d:0:%d:%lld xid=0x%0x op=0x%02x " "lba=%02x%02x%02x%02x cdb_status=%d " "fcp_resid=0x%x refcount=%d.\n", qedf->lport->host->host_no, sc_cmd->device->id, @@ -1426,7 +1426,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req, sc_cmd->result = result << 16; refcount = kref_read(&io_req->refcount); - QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing " + QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%lld: Completing " "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, " "allowed=%d retries=%d refcount=%d.\n", qedf->lport->host->host_no, sc_cmd->device->id, From fd2b18b4a7bf01453324733a18297ae9a197a4ae Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 6 Mar 2017 10:32:27 -0800 Subject: [PATCH 040/384] scsi: qedf: Use vsprintf extension %pad Using %llx for a dma_addr_t can lead to format/argument mismatches. Use %pad and the address of the dma_addr_t instead. Signed-off-by: Joe Perches Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index d9d7a86b5f8b..8e2a160490e6 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2456,8 +2456,8 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf) } QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, - "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl, - qedf->bdq_pbl_dma); + "BDQ PBL addr=0x%p dma=%pad\n", + qedf->bdq_pbl, &qedf->bdq_pbl_dma); /* * Populate BDQ PBL with physical and virtual address of individual From 33cc559a81397bc813c392617d40ddfdfa3cffbd Mon Sep 17 00:00:00 2001 From: Tomas Jasek Date: Fri, 3 Mar 2017 13:45:48 +0100 Subject: [PATCH 041/384] scsi: lpfc: replace init_timer by setup_timer This patch shortens every init_timer in lpfc module followed by function and data assignment using setup_timer. This is purely cleanup patch, it does not add new functionality nor remove any existing functionality. An init_timer call in this form: init_timer(&vport->fc_disctmo); vport->fc_disctmo.function = lpfc_disc_timeout; vport->fc_disctmo.data = vport; is shortened to: setup_timer(&vport->fc_disctmo, lpfc_disc_timeout, vport); It increases readability and reduces chances of mistakes done by developers. Signed-off-by: Tomas Jasek Signed-off-by: Jiri Slaby Cc: James Smart Cc: Dick Kennedy Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Acked-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 5 ++-- drivers/scsi/lpfc/lpfc_init.c | 47 ++++++++++++-------------------- 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 194a14d5f8a9..2612dac75186 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4344,9 +4344,8 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, { INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp); INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp); - init_timer(&ndlp->nlp_delayfunc); - ndlp->nlp_delayfunc.function = lpfc_els_retry_delay; - ndlp->nlp_delayfunc.data = (unsigned long)ndlp; + setup_timer(&ndlp->nlp_delayfunc, lpfc_els_retry_delay, + (unsigned long)ndlp); ndlp->nlp_DID = did; ndlp->vport = vport; ndlp->phba = vport->phba; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 0ee429d773f3..f395f2e4aa97 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3734,17 +3734,14 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) INIT_LIST_HEAD(&vport->rcv_buffer_list); spin_lock_init(&vport->work_port_lock); - init_timer(&vport->fc_disctmo); - vport->fc_disctmo.function = lpfc_disc_timeout; - vport->fc_disctmo.data = (unsigned long)vport; + setup_timer(&vport->fc_disctmo, lpfc_disc_timeout, + (unsigned long)vport); - init_timer(&vport->els_tmofunc); - vport->els_tmofunc.function = lpfc_els_timeout; - vport->els_tmofunc.data = (unsigned long)vport; + setup_timer(&vport->els_tmofunc, lpfc_els_timeout, + (unsigned long)vport); - init_timer(&vport->delayed_disc_tmo); - vport->delayed_disc_tmo.function = lpfc_delayed_disc_tmo; - vport->delayed_disc_tmo.data = (unsigned long)vport; + setup_timer(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo, + (unsigned long)vport); error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev); if (error) @@ -5406,21 +5403,15 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->luns); /* MBOX heartbeat timer */ - init_timer(&psli->mbox_tmo); - psli->mbox_tmo.function = lpfc_mbox_timeout; - psli->mbox_tmo.data = (unsigned long) phba; + setup_timer(&psli->mbox_tmo, lpfc_mbox_timeout, (unsigned long)phba); /* Fabric block timer */ - init_timer(&phba->fabric_block_timer); - phba->fabric_block_timer.function = lpfc_fabric_block_timeout; - phba->fabric_block_timer.data = (unsigned long) phba; + setup_timer(&phba->fabric_block_timer, lpfc_fabric_block_timeout, + (unsigned long)phba); /* EA polling mode timer */ - init_timer(&phba->eratt_poll); - phba->eratt_poll.function = lpfc_poll_eratt; - phba->eratt_poll.data = (unsigned long) phba; + setup_timer(&phba->eratt_poll, lpfc_poll_eratt, + (unsigned long)phba); /* Heartbeat timer */ - init_timer(&phba->hb_tmofunc); - phba->hb_tmofunc.function = lpfc_hb_timeout; - phba->hb_tmofunc.data = (unsigned long)phba; + setup_timer(&phba->hb_tmofunc, lpfc_hb_timeout, (unsigned long)phba); return 0; } @@ -5446,9 +5437,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) */ /* FCP polling mode timer */ - init_timer(&phba->fcp_poll_timer); - phba->fcp_poll_timer.function = lpfc_poll_timeout; - phba->fcp_poll_timer.data = (unsigned long) phba; + setup_timer(&phba->fcp_poll_timer, lpfc_poll_timeout, + (unsigned long)phba); /* Host attention work mask setup */ phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT); @@ -5617,14 +5607,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) * Initialize timers used by driver */ - init_timer(&phba->rrq_tmr); - phba->rrq_tmr.function = lpfc_rrq_timeout; - phba->rrq_tmr.data = (unsigned long)phba; + setup_timer(&phba->rrq_tmr, lpfc_rrq_timeout, (unsigned long)phba); /* FCF rediscover timer */ - init_timer(&phba->fcf.redisc_wait); - phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo; - phba->fcf.redisc_wait.data = (unsigned long)phba; + setup_timer(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo, + (unsigned long)phba); /* * Control structure for handling external multi-buffer mailbox From 70e5afd57d6d97d69bf5fd0187c2bb5a79990504 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:21 -0800 Subject: [PATCH 042/384] scsi: lpfc: remove redundant assignment of sgel From: Colin Ian King In the NVMET_FCOP_RSP case, sgel is assigned but never used and hence is redundant and can be removed. Detected by CoverityScan, CID#1411658 ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvmet.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index c421e1738ee9..e59a0a89d357 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1445,7 +1445,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, case NVMET_FCOP_RSP: /* Words 0 - 2 */ - sgel = &rsp->sg[0]; physaddr = rsp->rspdma; wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64; wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen; From 7aabe84b8a1bb7c3f75fb6a23dc96f8999bdcc8f Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:22 -0800 Subject: [PATCH 043/384] scsi: lpfc: sanity check hrq is null before dereferencing it From: Colin Ian King The sanity check for hrq should be moved to before the deference of hrq to ensure we don't perform a null pointer deference. Detected by CoverityScan, CID#1411650 ("Dereference before null check") Signed-off-by: Colin Ian King Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d8d8693040ac..562a5286bc47 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -15185,17 +15185,17 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp, drq = drqp[idx]; cq = cqp[idx]; - if (hrq->entry_count != drq->entry_count) { - status = -EINVAL; - goto out; - } - /* sanity check on queue memory */ if (!hrq || !drq || !cq) { status = -ENODEV; goto out; } + if (hrq->entry_count != drq->entry_count) { + status = -EINVAL; + goto out; + } + if (idx == 0) { bf_set(lpfc_mbx_rq_create_num_pages, &rq_create->u.request, From 332ba3b5d6d27a60d445704ed7c88c7e9f958a30 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:23 -0800 Subject: [PATCH 044/384] scsi: lpfc: don't dereference dma_buf->iocbq before null check From: Colin Ian King dma_buf->iocbq is being dereferenced immediately before it is being null checked, so we have a potential null pointer dereference bug. Fix this by only dereferencing it only once we have passed a null check on the pointer. Detected by CoverityScan, CID#1411652 ("Dereference before null check") Signed-off-by: Colin Ian King Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index c61d8d692ede..5986c7957199 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -646,7 +646,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) } dma_buf->iocbq = lpfc_sli_get_iocbq(phba); - dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET; if (!dma_buf->iocbq) { kfree(dma_buf->context); pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt, @@ -658,6 +657,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) "2621 Ran out of nvmet iocb/WQEs\n"); return NULL; } + dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET; nvmewqe = dma_buf->iocbq; wqe = (union lpfc_wqe128 *)&nvmewqe->wqe; /* Initialize WQE */ From d11f54b7d1d40463024be3b40ab7be2c8a84fa43 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:24 -0800 Subject: [PATCH 045/384] scsi: lpfc: fix missing spin_unlock on sql_list_lock From: Colin Ian King In the case where sglq is null, the current code just returns without unlocking the spinlock sql_list_lock. Fix this by breaking out of the while loop and the exit path will then unlock and return NULL as was the original intention. Detected by CoverityScan, CID#1411635 ("Missing unlock") Signed-off-by: Colin Ian King Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 562a5286bc47..7389c130ffcc 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -952,7 +952,7 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq) start_sglq = sglq; while (!found) { if (!sglq) - return NULL; + break; if (ndlp && ndlp->active_rrqs_xri_bitmap && test_bit(sglq->sli4_lxritag, ndlp->active_rrqs_xri_bitmap)) { From 5d181531bc6169e19a02a27d202cf0e982db9d0e Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:25 -0800 Subject: [PATCH 046/384] scsi: lpfc: Fix crash during Hardware error recovery on SLI3 adapters if REG_VPI fails, the driver was incorrectly issuing INIT_VFI (a SLI4 command) on a SLI3 adapter. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 2d26440e6f2f..d260a1391baf 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8371,11 +8371,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) spin_lock_irq(shost->host_lock); vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; spin_unlock_irq(shost->host_lock); - if (vport->port_type == LPFC_PHYSICAL_PORT - && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) - lpfc_issue_init_vfi(vport); - else + if (mb->mbxStatus == MBX_NOT_FINISHED) + break; + if ((vport->port_type == LPFC_PHYSICAL_PORT) && + !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) { + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_issue_init_vfi(vport); + else + lpfc_initial_flogi(vport); + } else { lpfc_initial_fdisc(vport); + } break; } } else { From 8b3616392d32d2b04ce649caf6684da1b7050d8c Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:26 -0800 Subject: [PATCH 047/384] scsi: lpfc: Fix RCTL value on NVME LS request and response NVME LS requests and responses had wrong R_CTL values. Use the FC4 ELS Request and Response defines (defines badly named, they are FC4 LS's) instead of the base ELS values. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 609a908ea9db..6346d4dee9ff 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -316,7 +316,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0); bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1); bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1); - bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL); + bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ); bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME); /* Word 6 */ diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index e59a0a89d357..6c2221aac394 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1114,7 +1114,7 @@ lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba, bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0); bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1); bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0); - bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL); + bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_ELS4_REP); bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME); /* Word 6 */ From b06a622ffe8dddcc09dad23dc768f7d1540fb4d6 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:27 -0800 Subject: [PATCH 048/384] scsi: lpfc: Fix NVME CMD IU byte swapped word 1 problem Word 1 in NVME CMD IU appears byte swapped from value placed in WQE Should be Big Endian value in WQE word 16 Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 6346d4dee9ff..bf3ccc5d59ac 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -620,15 +620,15 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport, * Embed the payload in the last half of the WQE * WQE words 16-30 get the NVME CMD IU payload * - * WQE Word 16 is already setup with flags - * WQE words 17-19 get payload Words 2-4 + * WQE words 16-19 get payload Words 1-4 * WQE words 20-21 get payload Words 6-7 * WQE words 22-29 get payload Words 16-23 */ - wptr = &wqe->words[17]; /* WQE ptr */ + wptr = &wqe->words[16]; /* WQE ptr */ dptr = (uint32_t *)nCmd->cmdaddr; /* payload ptr */ - dptr += 2; /* Skip Words 0-1 in payload */ + dptr++; /* Skip Word 0 in payload */ + *wptr++ = *dptr++; /* Word 1 */ *wptr++ = *dptr++; /* Word 2 */ *wptr++ = *dptr++; /* Word 3 */ *wptr++ = *dptr++; /* Word 4 */ @@ -978,9 +978,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_WRITE_CMD); - /* Word 16 */ - wqe->words[16] = LPFC_NVME_EMBED_WRITE; - phba->fc4NvmeOutputRequests++; } else { /* Word 7 */ @@ -1002,9 +999,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD); - /* Word 16 */ - wqe->words[16] = LPFC_NVME_EMBED_READ; - phba->fc4NvmeInputRequests++; } } else { @@ -1026,9 +1020,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, /* Word 11 */ bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD); - /* Word 16 */ - wqe->words[16] = LPFC_NVME_EMBED_CMD; - phba->fc4NvmeControlRequests++; } /* From a5068b46958870633ea340692f301507ef78d00b Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:28 -0800 Subject: [PATCH 049/384] scsi: lpfc: Fix IO submission if WQ is full For both initiator and target: if WQ is full, return -EBUSY. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index bf3ccc5d59ac..e1bf31eca845 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1310,7 +1310,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, "sid: x%x did: x%x oxid: x%x\n", ret, vport->fc_myDID, ndlp->nlp_DID, lpfc_ncmd->cur_iocbq.sli4_xritag); - ret = -EINVAL; + ret = -EBUSY; goto out_free_nvme_buf; } diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 6c2221aac394..735e2ba70198 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -571,6 +571,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6102 Bad state IO x%x aborted\n", ctxp->oxid); + rc = -ENXIO; goto aerr; } @@ -580,6 +581,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6152 FCP Drop IO x%x: Prep\n", ctxp->oxid); + rc = -ENXIO; goto aerr; } @@ -618,8 +620,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, ctxp->wqeq->hba_wqidx = 0; nvmewqeq->context2 = NULL; nvmewqeq->context3 = NULL; + rc = -EBUSY; aerr: - return -ENXIO; + return rc; } static void From 3ebd9b4701ef77358030a0ef9cb6586db2149712 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:29 -0800 Subject: [PATCH 050/384] scsi: lpfc: Fix nvme allocation bug on failed nvme_fc_register_localport nvme bufs get allocated even when the registration fails. Move allocation into the rsgistration success path. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index e1bf31eca845..26cde7c040ba 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2164,10 +2164,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) lport->vport = vport; INIT_LIST_HEAD(&lport->rport_list); vport->nvmei_support = 1; + len = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max); + vport->phba->total_nvme_bufs += len; } - len = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max); - vport->phba->total_nvme_bufs += len; return ret; } From 318083ad9230ff13cdac34ae4c4135e0c4e2d9ad Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:30 -0800 Subject: [PATCH 051/384] scsi: lpfc: add NVME exchange aborts previous code did little more than log a message. This patch adds abort path support, modeled after the SCSI code paths. Currently addresses only the initiator path. Target path under development, but stubbed out. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_hbadisc.c | 2 + drivers/scsi/lpfc/lpfc_init.c | 7 ++++ drivers/scsi/lpfc/lpfc_nvme.c | 64 +++++++++++++++++++++++++++++--- drivers/scsi/lpfc/lpfc_nvme.h | 1 + drivers/scsi/lpfc/lpfc_nvmet.c | 21 +++++++++-- drivers/scsi/lpfc/lpfc_sli.c | 51 ++++++++++++++++++++++++- drivers/scsi/lpfc/lpfc_sli4.h | 6 +++ 8 files changed, 143 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 0bba2e30b4f0..de6cd57dc7f6 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -692,6 +692,7 @@ struct lpfc_hba { * capability */ #define HBA_NVME_IOQ_FLUSH 0x80000 /* NVME IO queues flushed. */ +#define NVME_XRI_ABORT_EVENT 0x100000 uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/ struct lpfc_dmabuf slim2p; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 2612dac75186..bd8635d303d2 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -641,6 +641,8 @@ lpfc_work_done(struct lpfc_hba *phba) lpfc_handle_rrq_active(phba); if (phba->hba_flag & FCP_XRI_ABORT_EVENT) lpfc_sli4_fcp_xri_abort_event_proc(phba); + if (phba->hba_flag & NVME_XRI_ABORT_EVENT) + lpfc_sli4_nvme_xri_abort_event_proc(phba); if (phba->hba_flag & ELS_XRI_ABORT_EVENT) lpfc_sli4_els_xri_abort_event_proc(phba); if (phba->hba_flag & ASYNC_EVENT) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f395f2e4aa97..a0cba631869e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5723,6 +5723,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the Abort nvme buffer list used by driver */ spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list); + /* Fast-path XRI aborted CQ Event work queue list */ + INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue); } /* This abort list used by worker thread */ @@ -8960,6 +8962,11 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba) /* Pending ELS XRI abort events */ list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue, &cqelist); + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + /* Pending NVME XRI abort events */ + list_splice_init(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue, + &cqelist); + } /* Pending asynnc events */ list_splice_init(&phba->sli4_hba.sp_asynce_work_queue, &cqelist); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 26cde7c040ba..b9012fee1632 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1277,6 +1277,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, pnvme_fcreq->private = (void *)lpfc_ncmd; lpfc_ncmd->nvmeCmd = pnvme_fcreq; lpfc_ncmd->nrport = rport; + lpfc_ncmd->ndlp = ndlp; lpfc_ncmd->start_time = jiffies; lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp); @@ -1812,10 +1813,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba, pdma_phys_sgl1, cur_xritag); if (status) { /* failure, put on abort nvme list */ - lpfc_ncmd->exch_busy = 1; + lpfc_ncmd->flags |= LPFC_SBUF_XBUSY; } else { /* success, put on NVME buffer list */ - lpfc_ncmd->exch_busy = 0; + lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; lpfc_ncmd->status = IOSTAT_SUCCESS; num_posted++; } @@ -1845,10 +1846,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba, struct lpfc_nvme_buf, list); if (status) { /* failure, put on abort nvme list */ - lpfc_ncmd->exch_busy = 1; + lpfc_ncmd->flags |= LPFC_SBUF_XBUSY; } else { /* success, put on NVME buffer list */ - lpfc_ncmd->exch_busy = 0; + lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; lpfc_ncmd->status = IOSTAT_SUCCESS; num_posted++; } @@ -2090,7 +2091,7 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd) unsigned long iflag = 0; lpfc_ncmd->nonsg_phys = 0; - if (lpfc_ncmd->exch_busy) { + if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) { spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); lpfc_ncmd->nvmeCmd = NULL; @@ -2453,3 +2454,56 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) "6168: State error: lport %p, rport%p FCID x%06x\n", vport->localport, ndlp->rport, ndlp->nlp_DID); } + +/** + * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort + * @phba: pointer to lpfc hba data structure. + * @axri: pointer to the fcp xri abort wcqe structure. + * + * This routine is invoked by the worker thread to process a SLI4 fast-path + * FCP aborted xri. + **/ +void +lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, + struct sli4_wcqe_xri_aborted *axri) +{ + uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri); + uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri); + struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd; + struct lpfc_nodelist *ndlp; + unsigned long iflag = 0; + int rrq_empty = 0; + + if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) + return; + spin_lock_irqsave(&phba->hbalock, iflag); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd, + &phba->sli4_hba.lpfc_abts_nvme_buf_list, + list) { + if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) { + list_del(&lpfc_ncmd->list); + lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; + lpfc_ncmd->status = IOSTAT_SUCCESS; + spin_unlock( + &phba->sli4_hba.abts_nvme_buf_list_lock); + + rrq_empty = list_empty(&phba->active_rrq_list); + spin_unlock_irqrestore(&phba->hbalock, iflag); + ndlp = lpfc_ncmd->ndlp; + if (ndlp) { + lpfc_set_rrq_active( + phba, ndlp, + lpfc_ncmd->cur_iocbq.sli4_lxritag, + rxid, 1); + lpfc_sli4_abts_err_handler(phba, ndlp, axri); + } + lpfc_release_nvme_buf(phba, lpfc_ncmd); + if (rrq_empty) + lpfc_worker_wake_up(phba); + return; + } + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); +} diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index b2fae5e813f8..1347deb8dd6c 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -57,6 +57,7 @@ struct lpfc_nvme_buf { struct list_head list; struct nvmefc_fcp_req *nvmeCmd; struct lpfc_nvme_rport *nrport; + struct lpfc_nodelist *ndlp; uint32_t timeout; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 735e2ba70198..48ce9858bc11 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -734,6 +734,21 @@ lpfc_nvmet_update_targetport(struct lpfc_hba *phba) return 0; } +/** + * lpfc_sli4_nvmet_xri_aborted - Fast-path process of nvmet xri abort + * @phba: pointer to lpfc hba data structure. + * @axri: pointer to the nvmet xri abort wcqe structure. + * + * This routine is invoked by the worker thread to process a SLI4 fast-path + * NVMET aborted xri. + **/ +void +lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, + struct sli4_wcqe_xri_aborted *axri) +{ + /* TODO: work in progress */ +} + void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) { @@ -958,7 +973,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, atomic_inc(&tgtp->rcv_fcp_cmd_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n", + "6159 FCP Drop IO x%x: err x%x\n", ctxp->oxid, rc); dropit: lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n", @@ -1683,8 +1698,8 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp; lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6067 %s: Entrypoint: sid %x xri %x\n", __func__, - sid, xri); + "6067 Abort: sid %x xri x%x/x%x\n", + sid, xri, ctxp->wqeq->sli4_xritag); tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 7389c130ffcc..8a606d0d2c79 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -12212,6 +12212,41 @@ void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba) } } +/** + * lpfc_sli4_nvme_xri_abort_event_proc - Process nvme xri abort event + * @phba: pointer to lpfc hba data structure. + * + * This routine is invoked by the worker thread to process all the pending + * SLI4 NVME abort XRI events. + **/ +void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba) +{ + struct lpfc_cq_event *cq_event; + + /* First, declare the fcp xri abort event has been handled */ + spin_lock_irq(&phba->hbalock); + phba->hba_flag &= ~NVME_XRI_ABORT_EVENT; + spin_unlock_irq(&phba->hbalock); + /* Now, handle all the fcp xri abort events */ + while (!list_empty(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue)) { + /* Get the first event from the head of the event queue */ + spin_lock_irq(&phba->hbalock); + list_remove_head(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue, + cq_event, struct lpfc_cq_event, list); + spin_unlock_irq(&phba->hbalock); + /* Notify aborted XRI for NVME work queue */ + if (phba->nvmet_support) { + lpfc_sli4_nvmet_xri_aborted(phba, + &cq_event->cqe.wcqe_axri); + } else { + lpfc_sli4_nvme_xri_aborted(phba, + &cq_event->cqe.wcqe_axri); + } + /* Free the event processed back to the free pool */ + lpfc_sli4_cq_event_release(phba, cq_event); + } +} + /** * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event * @phba: pointer to lpfc hba data structure. @@ -12709,10 +12744,22 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba, spin_unlock_irqrestore(&phba->hbalock, iflags); workposted = true; break; + case LPFC_NVME: + spin_lock_irqsave(&phba->hbalock, iflags); + list_add_tail(&cq_event->list, + &phba->sli4_hba.sp_nvme_xri_aborted_work_queue); + /* Set the nvme xri abort event flag */ + phba->hba_flag |= NVME_XRI_ABORT_EVENT; + spin_unlock_irqrestore(&phba->hbalock, iflags); + workposted = true; + break; default: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, - "0603 Invalid work queue CQE subtype (x%x)\n", - cq->subtype); + "0603 Invalid CQ subtype %d: " + "%08x %08x %08x %08x\n", + cq->subtype, wcqe->word0, wcqe->parameter, + wcqe->word2, wcqe->word3); + lpfc_sli4_cq_event_release(phba, cq_event); workposted = false; break; } diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 91153c9f6d18..710458cf11d6 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -642,6 +642,7 @@ struct lpfc_sli4_hba { struct list_head sp_asynce_work_queue; struct list_head sp_fcp_xri_aborted_work_queue; struct list_head sp_els_xri_aborted_work_queue; + struct list_head sp_nvme_xri_aborted_work_queue; struct list_head sp_unsol_work_queue; struct lpfc_sli4_link link_state; struct lpfc_sli4_lnk_info lnk_info; @@ -794,9 +795,14 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *); int lpfc_sli4_resume_rpi(struct lpfc_nodelist *, void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *); void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *); +void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba); void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *); void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *, struct sli4_wcqe_xri_aborted *); +void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, + struct sli4_wcqe_xri_aborted *axri); +void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, + struct sli4_wcqe_xri_aborted *axri); void lpfc_sli4_els_xri_aborted(struct lpfc_hba *, struct sli4_wcqe_xri_aborted *); void lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *); From 96418b5e2c8867da3279d877f5d1ffabfe460c3d Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:31 -0800 Subject: [PATCH 052/384] scsi: lpfc: Fix eh_deadline setting for sli3 adapters. A previous change unilaterally removed the hba reset entry point from the sli3 host template. This was done to allow tape devices being used for back up from being removed. Why was this done ? When there was non-responding device on the fabric, the error escalation policy would escalate to the reset handler. When the reset handler was called, it would reset the adapter, dropping link, thus logging out and terminating all i/o's - on any target. If there was a tape device on the same adapter that wasn't in error, it would kill the tape i/o's, effectively killing the tape device state. With the reset point removed, the adapter reset avoided the fabric logout, allowing the other devices to continue to operate unaffected. A hack - yes. Hint: we really need a transport I_T nexus reset callback added to the eh process (in between the SCSI target reset and hba reset points), so a fc logout could occur to the one bad target only and stop the error escalation process. This patch commonizes the approach so it can be used for sli3 and sli4 adapters, but mandates the admin, via module parameter, specifically identify which adapters the resets are to be removed for. Additionally, bus_reset, which sends Target Reset TMFs to all targets, is also removed from the template as it too has the same effect as the adapter reset. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_attr.c | 6 ++++ drivers/scsi/lpfc/lpfc_crtn.h | 4 ++- drivers/scsi/lpfc/lpfc_init.c | 61 +++++++++++++++++++++++++++++++++-- drivers/scsi/lpfc/lpfc_scsi.c | 3 +- 5 files changed, 69 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index de6cd57dc7f6..763f32dd2d23 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -99,6 +99,7 @@ struct lpfc_sli2_slim; #define FC_MAX_ADPTMSG 64 #define MAX_HBAEVT 32 +#define MAX_HBAS_NO_RESET 16 /* Number of MSI-X vectors the driver uses */ #define LPFC_MSIX_VECTORS 2 diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 4114cf4308d0..b741dcb8ee64 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3010,6 +3010,12 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:" static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR, lpfc_poll_show, lpfc_poll_store); +int lpfc_no_hba_reset_cnt; +unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +module_param_array(lpfc_no_hba_reset, ulong, &lpfc_no_hba_reset_cnt, 0444); +MODULE_PARM_DESC(lpfc_no_hba_reset, "WWPN of HBAs that should not be reset"); + LPFC_ATTR(sli_mode, 0, 0, 3, "SLI mode selector:" " 0 - auto (SLI-3 if supported)," diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 843dd73004da..54e6ac42fbcd 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -384,7 +384,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *); extern struct device_attribute *lpfc_hba_attrs[]; extern struct device_attribute *lpfc_vport_attrs[]; extern struct scsi_host_template lpfc_template; -extern struct scsi_host_template lpfc_template_s3; +extern struct scsi_host_template lpfc_template_no_hr; extern struct scsi_host_template lpfc_template_nvme; extern struct scsi_host_template lpfc_vport_template; extern struct fc_function_template lpfc_transport_functions; @@ -554,3 +554,5 @@ void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_wcqe_complete *abts_cmpl); extern int lpfc_enable_nvmet_cnt; extern unsigned long long lpfc_enable_nvmet[]; +extern int lpfc_no_hba_reset_cnt; +extern unsigned long lpfc_no_hba_reset[]; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a0cba631869e..4fa21a9fd883 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3555,6 +3555,44 @@ lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba) return rc; } +static uint64_t +lpfc_get_wwpn(struct lpfc_hba *phba) +{ + uint64_t wwn; + int rc; + LPFC_MBOXQ_t *mboxq; + MAILBOX_t *mb; + + + mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, + GFP_KERNEL); + if (!mboxq) + return (uint64_t)-1; + + /* First get WWN of HBA instance */ + lpfc_read_nv(phba, mboxq); + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + if (rc != MBX_SUCCESS) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "6019 Mailbox failed , mbxCmd x%x " + "READ_NV, mbxStatus x%x\n", + bf_get(lpfc_mqe_command, &mboxq->u.mqe), + bf_get(lpfc_mqe_status, &mboxq->u.mqe)); + mempool_free(mboxq, phba->mbox_mem_pool); + return (uint64_t) -1; + } + mb = &mboxq->u.mb; + memcpy(&wwn, (char *)mb->un.varRDnvp.portname, sizeof(uint64_t)); + /* wwn is WWPN of HBA instance */ + mempool_free(mboxq, phba->mbox_mem_pool); + if (phba->sli_rev == LPFC_SLI_REV4) + return be64_to_cpu(wwn); + else + return (((wwn & 0xffffffff00000000) >> 32) | + ((wwn & 0x00000000ffffffff) << 32)); + +} + /** * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping * @phba: pointer to lpfc hba data structure. @@ -3676,17 +3714,32 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) struct lpfc_vport *vport; struct Scsi_Host *shost = NULL; int error = 0; + int i; + uint64_t wwn; + bool use_no_reset_hba = false; + + wwn = lpfc_get_wwpn(phba); + + for (i = 0; i < lpfc_no_hba_reset_cnt; i++) { + if (wwn == lpfc_no_hba_reset[i]) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "6020 Setting use_no_reset port=%llx\n", + wwn); + use_no_reset_hba = true; + break; + } + } if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { if (dev != &phba->pcidev->dev) { shost = scsi_host_alloc(&lpfc_vport_template, sizeof(struct lpfc_vport)); } else { - if (phba->sli_rev == LPFC_SLI_REV4) + if (!use_no_reset_hba) shost = scsi_host_alloc(&lpfc_template, sizeof(struct lpfc_vport)); else - shost = scsi_host_alloc(&lpfc_template_s3, + shost = scsi_host_alloc(&lpfc_template_no_hr, sizeof(struct lpfc_vport)); } } else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { @@ -5472,7 +5525,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the host templates the configured values. */ lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt; - lpfc_template_s3.sg_tablesize = phba->cfg_sg_seg_cnt; + lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt; + lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt; /* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */ if (phba->cfg_enable_bg) { @@ -5693,6 +5747,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the host templates with the updated values. */ lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt; lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt; + lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt; if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ) phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 9d6384af9fce..bcfd6d6ab16d 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5953,7 +5953,7 @@ struct scsi_host_template lpfc_template_nvme = { .track_queue_depth = 0, }; -struct scsi_host_template lpfc_template_s3 = { +struct scsi_host_template lpfc_template_no_hr = { .module = THIS_MODULE, .name = LPFC_DRIVER_NAME, .proc_name = LPFC_DRIVER_NAME, @@ -6015,7 +6015,6 @@ struct scsi_host_template lpfc_vport_template = { .eh_abort_handler = lpfc_abort_handler, .eh_device_reset_handler = lpfc_device_reset_handler, .eh_target_reset_handler = lpfc_target_reset_handler, - .eh_bus_reset_handler = lpfc_bus_reset_handler, .slave_alloc = lpfc_slave_alloc, .slave_configure = lpfc_slave_configure, .slave_destroy = lpfc_slave_destroy, From 856984b71cefab1580e9439e5382db1247d689b0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:32 -0800 Subject: [PATCH 053/384] scsi: lpfc: add transport eh_timed_out reference Christoph's prior patch missed the template for the sli3 adapters, which is now the "no host reset" template. Add the transport eh_timed_out handler to the no host reset template Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index bcfd6d6ab16d..54fd0c81ceaf 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5959,6 +5959,7 @@ struct scsi_host_template lpfc_template_no_hr = { .proc_name = LPFC_DRIVER_NAME, .info = lpfc_info, .queuecommand = lpfc_queuecommand, + .eh_timed_out = fc_eh_timed_out, .eh_abort_handler = lpfc_abort_handler, .eh_device_reset_handler = lpfc_device_reset_handler, .eh_target_reset_handler = lpfc_target_reset_handler, From 166d721120c1cf768af11706c3e0411324bf138f Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:33 -0800 Subject: [PATCH 054/384] scsi: lpfc: Rework lpfc Kconfig for NVME options Reworked Kconfig so that lfpc only requires the scsi stack. NVME Initiator and NVME Target support can be enabled if the other NVMe subsystems have been enabled. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 19 ++++++++++++++++--- drivers/scsi/lpfc/lpfc_nvme.c | 18 +++++++++++++++--- drivers/scsi/lpfc/lpfc_nvmet.c | 10 ++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 230043c1c90f..4bf55b5d78be 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1241,19 +1241,32 @@ config SCSI_LPFC tristate "Emulex LightPulse Fibre Channel Support" depends on PCI && SCSI depends on SCSI_FC_ATTRS - depends on NVME_FC && NVME_TARGET_FC select CRC_T10DIF - help + ---help--- This lpfc driver supports the Emulex LightPulse Family of Fibre Channel PCI host adapters. config SCSI_LPFC_DEBUG_FS bool "Emulex LightPulse Fibre Channel debugfs Support" depends on SCSI_LPFC && DEBUG_FS - help + ---help--- This makes debugging information from the lpfc driver available via the debugfs filesystem. +config LPFC_NVME_INITIATOR + bool "Emulex LightPulse Fibre Channel NVME Initiator Support" + depends on SCSI_LPFC && NVME_FC + ---help--- + This enables NVME Initiator support in the Emulex lpfc driver. + +config LPFC_NVME_TARGET + bool "Emulex LightPulse Fibre Channel NVME Initiator Support" + depends on SCSI_LPFC && NVME_TARGET_FC + ---help--- + This enables NVME Target support in the Emulex lpfc driver. + Target enablement must still be enabled on a per adapter + basis by module parameters. + config SCSI_SIM710 tristate "Simple 53c710 SCSI support (Compaq, NCR machines)" depends on (EISA || MCA) && SCSI diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index b9012fee1632..0a4c19081409 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2127,11 +2127,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd) int lpfc_nvme_create_localport(struct lpfc_vport *vport) { + int ret = 0; struct lpfc_hba *phba = vport->phba; struct nvme_fc_port_info nfcp_info; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; - int len, ret = 0; + int len; /* Initialize this localport instance. The vport wwn usage ensures * that NPIV is accounted for. @@ -2148,8 +2149,12 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) /* localport is allocated from the stack, but the registration * call allocates heap memory as well as the private area. */ +#ifdef CONFIG_LPFC_NVME_INITIATOR ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template, &vport->phba->pcidev->dev, &localport); +#else + ret = -ENOMEM; +#endif if (!ret) { lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC, "6005 Successfully registered local " @@ -2185,6 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) void lpfc_nvme_destroy_localport(struct lpfc_vport *vport) { +#ifdef CONFIG_LPFC_NVME_INITIATOR struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL; @@ -2200,7 +2206,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, "6011 Destroying NVME localport %p\n", localport); - list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) { /* The last node ref has to get released now before the rport * private memory area is released by the transport. @@ -2214,6 +2219,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) "6008 rport fail destroy %x\n", ret); wait_for_completion_timeout(&rport->rport_unreg_done, 5); } + /* lport's rport list is clear. Unregister * lport and release resources. */ @@ -2237,6 +2243,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) "Failed, status x%x\n", ret); } +#endif } void @@ -2267,6 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport) int lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { +#ifdef CONFIG_LPFC_NVME_INITIATOR int ret = 0; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; @@ -2340,7 +2348,6 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR; rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn); rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn); - ret = nvme_fc_register_remoteport(localport, &rpinfo, &remote_port); if (!ret) { @@ -2376,6 +2383,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->nlp_type, ndlp->nlp_DID, ndlp); } return ret; +#else + return 0; +#endif } /* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport. @@ -2393,6 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) void lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { +#ifdef CONFIG_LPFC_NVME_INITIATOR int ret; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; @@ -2450,6 +2461,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) return; input_err: +#endif lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, "6168: State error: lport %p, rport%p FCID x%06x\n", vport->localport, ndlp->rport, ndlp->nlp_DID); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 48ce9858bc11..a69ca6ea1d6c 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -671,9 +671,13 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; +#ifdef CONFIG_LPFC_NVME_TARGET error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, &phba->pcidev->dev, &phba->targetport); +#else + error = -ENOMEM; +#endif if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC, "6025 Cannot register NVME targetport " @@ -752,6 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) { +#ifdef CONFIG_LPFC_NVME_TARGET struct lpfc_nvmet_tgtport *tgtp; if (phba->nvmet_support == 0) @@ -763,6 +768,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) wait_for_completion_timeout(&tgtp->tport_unreg_done, 5); } phba->targetport = NULL; +#endif } /** @@ -782,6 +788,7 @@ static void lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct hbq_dmabuf *nvmebuf) { +#ifdef CONFIG_LPFC_NVME_TARGET struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct lpfc_nvmet_rcv_ctx *ctxp; @@ -862,6 +869,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, atomic_inc(&tgtp->xmt_ls_abort); lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid); +#endif } /** @@ -883,6 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, struct rqb_dmabuf *nvmebuf, uint64_t isr_timestamp) { +#ifdef CONFIG_LPFC_NVME_TARGET struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; @@ -988,6 +997,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */ lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf); } +#endif } /** From 43140ca68d1a071ddbe92f10a3256e01701ae390 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:34 -0800 Subject: [PATCH 055/384] scsi: lpfc: Rename LPFC_MAX_EQ_DELAY to LPFC_MAX_EQ_DELAY_EQID_CNT Without apriori understanding of what the define is, the name gives a very different impression of what it is (a max delay value for an EQ). Rename the define so it reflects what it is: the number of EQ IDs that can be set in one instance of the MODIFY_EQ_DELAY mbx command. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 3 ++- drivers/scsi/lpfc/lpfc_hw4.h | 4 ++-- drivers/scsi/lpfc/lpfc_init.c | 7 ++----- drivers/scsi/lpfc/lpfc_sli.c | 5 ++++- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index b741dcb8ee64..fbd3a563be53 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -4457,7 +4457,8 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr, return -EINVAL; phba->cfg_fcp_imax = (uint32_t)val; - for (i = 0; i < phba->io_channel_irqs; i++) + + for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT) lpfc_modify_hba_eq_delay(phba, i); return strlen(buf); diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index cfdb068a3bfc..15277705cb6b 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -1001,7 +1001,7 @@ struct eq_delay_info { uint32_t phase; uint32_t delay_multi; }; -#define LPFC_MAX_EQ_DELAY 8 +#define LPFC_MAX_EQ_DELAY_EQID_CNT 8 struct sgl_page_pairs { uint32_t sgl_pg0_addr_lo; @@ -1070,7 +1070,7 @@ struct lpfc_mbx_modify_eq_delay { union { struct { uint32_t num_eq; - struct eq_delay_info eq[LPFC_MAX_EQ_DELAY]; + struct eq_delay_info eq[LPFC_MAX_EQ_DELAY_EQID_CNT]; } request; struct { uint32_t word0; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4fa21a9fd883..c883110178ea 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -8756,12 +8756,9 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) } } - /* - * Configure EQ delay multipier for interrupt coalescing using - * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time. - */ - for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY) + for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT) lpfc_modify_hba_eq_delay(phba, qidx); + return 0; out_destroy: diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 8a606d0d2c79..618cdacf13dd 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1,3 +1,4 @@ + /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * @@ -13874,6 +13875,8 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) * @startq: The starting FCP EQ to modify * * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA. + * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be + * updated in one mailbox command. * * The @phba struct is used to send mailbox command to HBA. The @startq * is used to get the starting FCP EQ to change. @@ -13926,7 +13929,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq) eq_delay->u.request.eq[cnt].phase = 0; eq_delay->u.request.eq[cnt].delay_multi = dmult; cnt++; - if (cnt >= LPFC_MAX_EQ_DELAY) + if (cnt >= LPFC_MAX_EQ_DELAY_EQID_CNT) break; } eq_delay->u.request.num_eq = cnt; From da6b044a28fe603fe2c3fd908cda8150aa0abe74 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:35 -0800 Subject: [PATCH 056/384] scsi: lpfc: correct double print Correct a merge error that had debug data printed twice for the same element Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 9f4798e9d938..913eed822cb8 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -3653,17 +3653,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf, idiag.ptr_private = phba->sli4_hba.nvmels_cq; goto pass_check; } - /* NVME LS complete queue */ - if (phba->sli4_hba.nvmels_cq && - phba->sli4_hba.nvmels_cq->queue_id == queid) { - /* Sanity check */ - rc = lpfc_idiag_que_param_check( - phba->sli4_hba.nvmels_cq, index, count); - if (rc) - goto error_out; - idiag.ptr_private = phba->sli4_hba.nvmels_cq; - goto pass_check; - } /* FCP complete queue */ if (phba->sli4_hba.fcp_cq) { for (qidx = 0; qidx < phba->cfg_fcp_io_channel; @@ -3738,17 +3727,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf, idiag.ptr_private = phba->sli4_hba.nvmels_wq; goto pass_check; } - /* NVME LS work queue */ - if (phba->sli4_hba.nvmels_wq && - phba->sli4_hba.nvmels_wq->queue_id == queid) { - /* Sanity check */ - rc = lpfc_idiag_que_param_check( - phba->sli4_hba.nvmels_wq, index, count); - if (rc) - goto error_out; - idiag.ptr_private = phba->sli4_hba.nvmels_wq; - goto pass_check; - } /* FCP work queue */ if (phba->sli4_hba.fcp_wq) { for (qidx = 0; qidx < phba->cfg_fcp_io_channel; From ba3bd6e2a9a2753439a1fd1fe39e8d5162fb3aa9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:36 -0800 Subject: [PATCH 057/384] scsi: lpfc: remove dead sli3 nvme code Remove nvme teardown calls that should not be there on sli3 devices Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index c883110178ea..661bd25a404a 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -10446,12 +10446,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev) fc_remove_host(shost); scsi_remove_host(shost); - /* Perform ndlp cleanup on the physical port. The nvme and nvmet - * localports are destroyed after to cleanup all transport memory. - */ lpfc_cleanup(vport); - lpfc_nvmet_destroy_targetport(phba); - lpfc_nvme_destroy_localport(vport); /* * Bring down the SLI Layer. This step disable all interrupts, From cd46cdedb3238f1f878c42650ec05c6344c7083d Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:37 -0800 Subject: [PATCH 058/384] scsi: lpfc: correct rdp diag portnames NVME merge reverted diag port names to the physical port. They should be the vport. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index d260a1391baf..d9c61d030034 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5177,15 +5177,15 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba) static uint32_t lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc, - struct lpfc_hba *phba) + struct lpfc_vport *vport) { desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG); - memcpy(desc->port_names.wwnn, phba->wwnn, + memcpy(desc->port_names.wwnn, &vport->fc_nodename, sizeof(desc->port_names.wwnn)); - memcpy(desc->port_names.wwpn, phba->wwpn, + memcpy(desc->port_names.wwpn, &vport->fc_portname, sizeof(desc->port_names.wwpn)); desc->length = cpu_to_be32(sizeof(desc->port_names)); @@ -5279,7 +5279,7 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context, len += lpfc_rdp_res_link_error((struct fc_rdp_link_error_status_desc *) (len + pcmd), &rdp_context->link_stat); len += lpfc_rdp_res_diag_port_names((struct fc_rdp_port_name_desc *) - (len + pcmd), phba); + (len + pcmd), vport); len += lpfc_rdp_res_attach_port_names((struct fc_rdp_port_name_desc *) (len + pcmd), vport, ndlp); len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd), From 2ade92ae6d6572858acb2bde6d3664af3ad592e2 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:38 -0800 Subject: [PATCH 059/384] scsi: lpfc: code cleanups in NVME initiator base This patch addresses the smatch issues identified by Dan Carpenter in http://www.spinics.net/lists/linux-scsi/msg105663.html The issues are: drivers/scsi/lpfc/lpfc_hbadisc.c:316 lpfc_dev_loss_tmo_handler() warn: we tested 'vport->load_flag & 2' before and it was 'false' Action: removed item from test drivers/scsi/lpfc/lpfc_hbadisc.c:701 lpfc_work_done() warn: test_bit() takes a bit number Action: changed definition so bit number drivers/scsi/lpfc/lpfc_hbadisc.c:2206 lpfc_mbx_cmpl_fcf_scan_read_fcf_rec() error: uninitialized symbol 'vlan_id'. drivers/scsi/lpfc/lpfc_hbadisc.c:2582 lpfc_mbx_cmpl_fcf_rr_read_fcf_rec() error: uninitialized symbol 'vlan_id'. drivers/scsi/lpfc/lpfc_hbadisc.c:2683 lpfc_mbx_cmpl_read_fcf_rec() error: uninitialized symbol 'vlan_id'. Action: initilized value drivers/scsi/lpfc/lpfc_hbadisc.c:4025 lpfc_register_remote_port() error: we previously assumed 'rdata' could be null (see line 4023) Action: refactored check block drivers/scsi/lpfc/lpfc_hbadisc.c:4613 lpfc_sli4_dequeue_nport_iocbs() error: double unlock 'irq:' Action: removed inner irq reference Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 2 +- drivers/scsi/lpfc/lpfc_hbadisc.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 763f32dd2d23..257bbdd0f0b8 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -105,7 +105,7 @@ struct lpfc_sli2_slim; #define LPFC_MSIX_VECTORS 2 /* lpfc wait event data ready flag */ -#define LPFC_DATA_READY (1<<0) +#define LPFC_DATA_READY 0 /* bit 0 */ /* queue dump line buffer size */ #define LPFC_LBUF_SZ 128 diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index bd8635d303d2..180b072beef6 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -313,8 +313,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) ndlp->nlp_state, ndlp->nlp_rpi); } - if (!(vport->load_flag & FC_UNLOADING) && - !(ndlp->nlp_flag & NLP_DELAY_TMO) && + if (!(ndlp->nlp_flag & NLP_DELAY_TMO) && !(ndlp->nlp_flag & NLP_NPR_2B_DISC) && (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) && (ndlp->nlp_state != NLP_STE_REG_LOGIN_ISSUE) && @@ -2175,7 +2174,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) uint32_t boot_flag, addr_mode; uint16_t fcf_index, next_fcf_index; struct lpfc_fcf_rec *fcf_rec = NULL; - uint16_t vlan_id; + uint16_t vlan_id = LPFC_FCOE_NULL_VID; bool select_new_fcf; int rc; @@ -4022,9 +4021,11 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) rdata = rport->dd_data; /* break the link before dropping the ref */ ndlp->rport = NULL; - if (rdata && rdata->pnode == ndlp) - lpfc_nlp_put(ndlp); - rdata->pnode = NULL; + if (rdata) { + if (rdata->pnode == ndlp) + lpfc_nlp_put(ndlp); + rdata->pnode = NULL; + } /* drop reference for earlier registeration */ put_device(&rport->dev); } @@ -4607,9 +4608,9 @@ lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba, pring = qp->pring; if (!pring) continue; - spin_lock_irq(&pring->ring_lock); + spin_lock(&pring->ring_lock); __lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list); - spin_unlock_irq(&pring->ring_lock); + spin_unlock(&pring->ring_lock); } spin_unlock_irq(&phba->hbalock); } From b5ccc7d61c76006f839b41c6f15876342b46cb02 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:39 -0800 Subject: [PATCH 060/384] scsi: lpfc: code cleanups in NVME initiator discovery This patch addresses the smatch issues identified by Dan Carpenter in http://www.spinics.net/lists/linux-scsi/msg105665.html The issues are: drivers/scsi/lpfc/lpfc_ct.c:943 lpfc_cmpl_ct_cmd_gft_id() error: we previously assumed 'ndlp' could be null (see line 928) Action: moved under if check drivers/scsi/lpfc/lpfc_nvmet.c:1694 lpfc_nvmet_unsol_issue_abort() error: we previously assumed 'ndlp' could be null (see line 1690) Action: conditionalized arg in printf stmt drivers/scsi/lpfc/lpfc_nvmet.c:1792 lpfc_nvmet_sol_fcp_issue_abort() error: we previously assumed 'ndlp' could be null (see line 1788) Action: conditionalized arg in printf stmt Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index c22bb3f887e1..d3e9af983015 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -939,8 +939,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, "FC4 x%08x, Data: x%08x x%08x\n", ndlp, did, ndlp->nlp_fc4_type, FC_TYPE_FCP, FC_TYPE_NVME); + ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; } - ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE); lpfc_issue_els_prli(vport, ndlp, 0); } else diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index a69ca6ea1d6c..b7739a554fe0 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1720,7 +1720,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, "6134 Drop ABTS - wrong NDLP state x%x.\n", - ndlp->nlp_state); + (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); /* No failure to an ABTS request. */ return 0; @@ -1818,7 +1818,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, "6160 Drop ABTS - wrong NDLP state x%x.\n", - ndlp->nlp_state); + (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); /* No failure to an ABTS request. */ return 0; From eeb810849a20e32aa1b60335e46ea5446ba07e1f Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:40 -0800 Subject: [PATCH 061/384] scsi: lpfc: revise version number to 11.2.0.10 Revise lpfc version number to 11.2.0.10 Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 86c6c9b26b82..d4e95e28f4e3 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "11.2.0.7" +#define LPFC_DRIVER_VERSION "11.2.0.10" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From a3a4a816b4b194c45d0217e8b9e08b2639802cda Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 27 Feb 2017 21:54:50 +0100 Subject: [PATCH 062/384] dt: emac: document device-tree based phy discovery and setup This patch adds documentation for a new "phy-handle" property, "fixed-link" and "mdio" sub-node. These allows the enumeration of PHYs which are supported by the phy library under drivers/net/phy. The EMAC ethernet controller in IBM and AMCC 4xx chips is currently stuck with a few privately defined phy implementations. It has no support for PHYs which are supported by the generic phylib. Acked-by: Rob Herring Reviewed-by: Florian Fainelli Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- .../devicetree/bindings/powerpc/4xx/emac.txt | 62 ++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt index 712baf6c3e24..44b842b6ca15 100644 --- a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt +++ b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt @@ -71,6 +71,9 @@ For Axon it can be absent, though my current driver doesn't handle phy-address yet so for now, keep 0x00ffffff in it. + - phy-handle : Used to describe configurations where a external PHY + is used. Please refer to: + Documentation/devicetree/bindings/net/ethernet.txt - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec operations (if absent the value is the same as rx-fifo-size). For Axon, either absent or 2048. @@ -81,8 +84,22 @@ offload, phandle of the TAH device node. - tah-channel : 1 cell, optional. If appropriate, channel used on the TAH engine. + - fixed-link : Fixed-link subnode describing a link to a non-MDIO + managed entity. See + Documentation/devicetree/bindings/net/fixed-link.txt + for details. + - mdio subnode : When the EMAC has a phy connected to its local + mdio, which us supported by the kernel's network + PHY library in drivers/net/phy, there must be device + tree subnode with the following required properties: + - #address-cells: Must be <1>. + - #size-cells: Must be <0>. - Example: + For PHY definitions: Please refer to + Documentation/devicetree/bindings/net/phy.txt and + Documentation/devicetree/bindings/net/ethernet.txt + + Examples: EMAC0: ethernet@40000800 { device_type = "network"; @@ -104,6 +121,48 @@ zmii-channel = <0>; }; + EMAC1: ethernet@ef600c00 { + device_type = "network"; + compatible = "ibm,emac-apm821xx", "ibm,emac4sync"; + interrupt-parent = <&EMAC1>; + interrupts = <0 1>; + #interrupt-cells = <1>; + #address-cells = <0>; + #size-cells = <0>; + interrupt-map = <0 &UIC2 0x10 IRQ_TYPE_LEVEL_HIGH /* Status */ + 1 &UIC2 0x14 IRQ_TYPE_LEVEL_HIGH /* Wake */>; + reg = <0xef600c00 0x000000c4>; + local-mac-address = [000000000000]; /* Filled in by U-Boot */ + mal-device = <&MAL0>; + mal-tx-channel = <0>; + mal-rx-channel = <0>; + cell-index = <0>; + max-frame-size = <9000>; + rx-fifo-size = <16384>; + tx-fifo-size = <2048>; + fifo-entry-size = <10>; + phy-mode = "rgmii"; + phy-handle = <&phy0>; + phy-map = <0x00000000>; + rgmii-device = <&RGMII0>; + rgmii-channel = <0>; + tah-device = <&TAH0>; + tah-channel = <0>; + has-inverted-stacr-oc; + has-new-stacr-staopc; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + phy0: ethernet-phy@0 { + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <0>; + }; + }; + }; + + ii) McMAL node Required properties: @@ -145,4 +204,3 @@ - revision : as provided by the RGMII new version register if available. For Axon: 0x0000012a - From 0253f2681f4445e9003fb27dc743ef7d25e8f3b9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 Feb 2017 22:12:04 +0100 Subject: [PATCH 063/384] net/mlx5e: add IPV6 dependency The ethernet support now calls directly into the ipv6 core code, which fails if IPV6 is a loadable module but mlx5 is built-in: drivers/net/ethernet/mellanox/mlx5/core/en_tc.o: In function `mlx5e_create_encap_header_ipv6': en_tc.c:(.text.mlx5e_create_encap_header_ipv6+0x110): undefined reference to `ip6_route_output_flags' This adds a dependency to ensure that MLX5_CORE_EN can only be built if we are able link the kernel successfully. The downside is that the ethernet option can be hidden. Alternatively we could make MLX5_CORE depend on "IPV6 || !IPV6", which would force MLX5_CORE to be a module when IPV6 is, including in configurations where we don't use the ethernet support at all. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index ddb4ca4ff930..117170014e88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -14,6 +14,7 @@ config MLX5_CORE config MLX5_CORE_EN bool "Mellanox Technologies ConnectX-4 Ethernet support" depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE + depends on IPV6=y || IPV6=n || MLX5_CORE=m imply PTP_1588_CLOCK default n ---help--- From 4342696df764ec65dcdfbd0c10d90ea52505f8ba Mon Sep 17 00:00:00 2001 From: "Blomme, Maarten" Date: Thu, 2 Mar 2017 13:08:36 +0100 Subject: [PATCH 064/384] spi_ks8995: fix "BUG: key accdaa28 not in .data!" Signed-off-by: Maarten Blomme Signed-off-by: David S. Miller --- drivers/net/phy/spi_ks8995.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 93ffedfa2994..aa7209d794f3 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -498,6 +498,7 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err; + sysfs_attr_init(&ks->regs_attr.attr); err = sysfs_create_bin_file(&spi->dev.kobj, &ks->regs_attr); if (err) { dev_err(&spi->dev, "unable to create sysfs file, err=%d\n", From 239870f2a0ebf75cc8f6d987dc528c5243f93d69 Mon Sep 17 00:00:00 2001 From: "Blomme, Maarten" Date: Thu, 2 Mar 2017 13:08:49 +0100 Subject: [PATCH 065/384] spi_ks8995: regs_size incorrect for some devices Signed-off-by: Maarten Blomme Signed-off-by: David S. Miller --- drivers/net/phy/spi_ks8995.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index aa7209d794f3..1e2d4f1179da 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -491,8 +491,8 @@ static int ks8995_probe(struct spi_device *spi) if (err) return err; - ks->regs_attr.size = ks->chip->regs_size; memcpy(&ks->regs_attr, &ks8995_registers_attr, sizeof(ks->regs_attr)); + ks->regs_attr.size = ks->chip->regs_size; err = ks8995_reset(ks); if (err) From 466e8bf10ac104d96e1ea813e8126e11cb72ea20 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:28 +0100 Subject: [PATCH 066/384] bnx2x: prevent crash when accessing PTP with interface down It is possible to crash the kernel by accessing a PTP device while its associated bnx2x interface is down. Before the interface is brought up, the timecounter is not initialized, so accessing it results in NULL dereference. Fix it by checking if the interface is up. Use -ENETDOWN as the error code when the interface is down. -EFAULT in bnx2x_ptp_adjfreq() did not seem right. Tested using phc_ctl get/set/adj/freq commands. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index d8d06fdfc42b..d57290b9ea75 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13738,7 +13738,7 @@ static int bnx2x_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) if (!netif_running(bp->dev)) { DP(BNX2X_MSG_PTP, "PTP adjfreq called while the interface is down\n"); - return -EFAULT; + return -ENETDOWN; } if (ppb < 0) { @@ -13797,6 +13797,12 @@ static int bnx2x_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP adjtime called while the interface is down\n"); + return -ENETDOWN; + } + DP(BNX2X_MSG_PTP, "PTP adjtime called, delta = %llx\n", delta); timecounter_adjtime(&bp->timecounter, delta); @@ -13809,6 +13815,12 @@ static int bnx2x_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); u64 ns; + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP gettime called while the interface is down\n"); + return -ENETDOWN; + } + ns = timecounter_read(&bp->timecounter); DP(BNX2X_MSG_PTP, "PTP gettime called, ns = %llu\n", ns); @@ -13824,6 +13836,12 @@ static int bnx2x_ptp_settime(struct ptp_clock_info *ptp, struct bnx2x *bp = container_of(ptp, struct bnx2x, ptp_clock_info); u64 ns; + if (!netif_running(bp->dev)) { + DP(BNX2X_MSG_PTP, + "PTP settime called while the interface is down\n"); + return -ENETDOWN; + } + ns = timespec64_to_ns(ts); DP(BNX2X_MSG_PTP, "PTP settime called, ns = %llu\n", ns); From 850268d320f0c7c5eb7ad0a62ef21859fa331ded Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:29 +0100 Subject: [PATCH 067/384] bnx2x: lower verbosity of VF stats debug messages When BNX2X_MSG_IOV is enabled, the driver produces too many VF statistics messages. Lower the verbosity of the VF stats messages similarly as in commit 76ca70fabbdaa3 ("bnx2x: [Debug] change verbosity of some prints"). Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 6fad22adbbb9..9f0f851774f2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -1899,7 +1899,8 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp) continue; } - DP(BNX2X_MSG_IOV, "add addresses for vf %d\n", vf->abs_vfid); + DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS), + "add addresses for vf %d\n", vf->abs_vfid); for_each_vfq(vf, j) { struct bnx2x_vf_queue *rxq = vfq_get(vf, j); @@ -1920,11 +1921,12 @@ void bnx2x_iov_adjust_stats_req(struct bnx2x *bp) cpu_to_le32(U64_HI(q_stats_addr)); cur_query_entry->address.lo = cpu_to_le32(U64_LO(q_stats_addr)); - DP(BNX2X_MSG_IOV, - "added address %x %x for vf %d queue %d client %d\n", - cur_query_entry->address.hi, - cur_query_entry->address.lo, cur_query_entry->funcID, - j, cur_query_entry->index); + DP_AND((BNX2X_MSG_IOV | BNX2X_MSG_STATS), + "added address %x %x for vf %d queue %d client %d\n", + cur_query_entry->address.hi, + cur_query_entry->address.lo, + cur_query_entry->funcID, + j, cur_query_entry->index); cur_query_entry++; cur_data_offset += sizeof(struct per_queue_stats); stats_count++; From 22118d861cec5da6ed525aaf12a3de9bfeffc58f Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:30 +0100 Subject: [PATCH 068/384] bnx2x: fix possible overrun of VFPF multicast addresses array It is too late to check for the limit of the number of VF multicast addresses after they have already been copied to the req->multicast[] array, possibly overflowing it. Do the check before copying. Also fix the error path to not skip unlocking vf2pf_mutex. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index bfae300cf25f..c2d327d9dff0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -868,7 +868,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) struct bnx2x *bp = netdev_priv(dev); struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters; struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp; - int rc, i = 0; + int rc = 0, i = 0; struct netdev_hw_addr *ha; if (bp->state != BNX2X_STATE_OPEN) { @@ -883,6 +883,15 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) /* Get Rx mode requested */ DP(NETIF_MSG_IFUP, "dev->flags = %x\n", dev->flags); + /* We support PFVF_MAX_MULTICAST_PER_VF mcast addresses tops */ + if (netdev_mc_count(dev) > PFVF_MAX_MULTICAST_PER_VF) { + DP(NETIF_MSG_IFUP, + "VF supports not more than %d multicast MAC addresses\n", + PFVF_MAX_MULTICAST_PER_VF); + rc = -EINVAL; + goto out; + } + netdev_for_each_mc_addr(ha, dev) { DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n", bnx2x_mc_addr(ha)); @@ -890,16 +899,6 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) i++; } - /* We support four PFVF_MAX_MULTICAST_PER_VF mcast - * addresses tops - */ - if (i >= PFVF_MAX_MULTICAST_PER_VF) { - DP(NETIF_MSG_IFUP, - "VF supports not more than %d multicast MAC addresses\n", - PFVF_MAX_MULTICAST_PER_VF); - return -EINVAL; - } - req->n_multicast = i; req->flags |= VFPF_SET_Q_FILTERS_MULTICAST_CHANGED; req->vf_qid = 0; @@ -924,7 +923,7 @@ int bnx2x_vfpf_set_mcast(struct net_device *dev) out: bnx2x_vfpf_finalize(bp, &req->first_tlv); - return 0; + return rc; } /* request pf to add a vlan for the vf */ From 83bd9eb8fc69cdd5135ed6e1f066adc8841800fd Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:31 +0100 Subject: [PATCH 069/384] bnx2x: fix detection of VLAN filtering feature for VF VFs are currently missing the VLAN filtering feature, because we were checking the PF's acquire response before actually performing the acquire. Fix it by setting the feature flag later when we have the PF response. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index d57290b9ea75..ac76fc251d26 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -13292,17 +13292,15 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_HIGHDMA; - /* VF with OLD Hypervisor or old PF do not support filtering */ if (IS_PF(bp)) { if (chip_is_e1x) bp->accept_any_vlan = true; else dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#ifdef CONFIG_BNX2X_SRIOV - } else if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; -#endif } + /* For VF we'll know whether to enable VLAN filtering after + * getting a response to CHANNEL_TLV_ACQUIRE from PF. + */ dev->features |= dev->hw_features | NETIF_F_HW_VLAN_CTAG_RX; dev->features |= NETIF_F_HIGHDMA; @@ -14009,6 +14007,14 @@ static int bnx2x_init_one(struct pci_dev *pdev, rc = bnx2x_vfpf_acquire(bp, tx_count, rx_count); if (rc) goto init_one_freemem; + +#ifdef CONFIG_BNX2X_SRIOV + /* VF with OLD Hypervisor or old PF do not support filtering */ + if (bp->acquire_resp.pfdev_info.pf_cap & PFVF_CAP_VLAN_FILTER) { + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + } +#endif } /* Enable SRIOV if capability found in configuration space */ From 78d5505432436516456c12abbe705ec8dee7ee2b Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:32 +0100 Subject: [PATCH 070/384] bnx2x: do not rollback VF MAC/VLAN filters we did not configure On failure to configure a VF MAC/VLAN filter we should not attempt to rollback filters that we failed to configure with -EEXIST. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 8 +++++++- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 9f0f851774f2..2068bb8f5495 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -434,7 +434,9 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp, /* Add/Remove the filter */ rc = bnx2x_config_vlan_mac(bp, &ramrod); - if (rc && rc != -EEXIST) { + if (rc == -EEXIST) + return 0; + if (rc) { BNX2X_ERR("Failed to %s %s\n", filter->add ? "add" : "delete", (filter->type == BNX2X_VF_FILTER_VLAN_MAC) ? @@ -444,6 +446,8 @@ static int bnx2x_vf_mac_vlan_config(struct bnx2x *bp, return rc; } + filter->applied = true; + return 0; } @@ -471,6 +475,8 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf, BNX2X_ERR("Managed only %d/%d filters - rolling back\n", i, filters->count + 1); while (--i >= 0) { + if (!filters->filters[i].applied) + continue; filters->filters[i].add = !filters->filters[i].add; bnx2x_vf_mac_vlan_config(bp, vf, qid, &filters->filters[i], diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 7a6d406f4c11..888d0b6632e8 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -114,6 +114,7 @@ struct bnx2x_vf_mac_vlan_filter { (BNX2X_VF_FILTER_MAC | BNX2X_VF_FILTER_VLAN) /*shortcut*/ bool add; + bool applied; u8 *mac; u16 vid; }; From 74bcbeb7d77ec92e4262fc340cb436ef7d98ba01 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:33 +0100 Subject: [PATCH 071/384] bnx2x: fix incorrect filter count in an error message filters->count is the number of filters we were supposed to configure. There is no reason to increase it by +1 when printing the count in an error message. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 2068bb8f5495..bdfd53b46bc5 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -473,7 +473,7 @@ int bnx2x_vf_mac_vlan_config_list(struct bnx2x *bp, struct bnx2x_virtf *vf, /* Rollback if needed */ if (i != filters->count) { BNX2X_ERR("Managed only %d/%d filters - rolling back\n", - i, filters->count + 1); + i, filters->count); while (--i >= 0) { if (!filters->filters[i].applied) continue; From e39513259450a8312cb98a9a3b16bb924310dbcc Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 3 Mar 2017 17:08:34 +0100 Subject: [PATCH 072/384] bnx2x: add missing configuration of VF VLAN filters Configuring VLANs from the VF side had no effect, because the PF ignored filters of type VFPF_VLAN_FILTER in the VF-PF message. Add the missing filter type to configure. Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index c2d327d9dff0..76a4668c50fe 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -1777,6 +1777,23 @@ static int bnx2x_vf_mbx_qfilters(struct bnx2x *bp, struct bnx2x_virtf *vf) goto op_err; } + /* build vlan list */ + fl = NULL; + + rc = bnx2x_vf_mbx_macvlan_list(bp, vf, msg, &fl, + VFPF_VLAN_FILTER); + if (rc) + goto op_err; + + if (fl) { + /* set vlan list */ + rc = bnx2x_vf_mac_vlan_config_list(bp, vf, fl, + msg->vf_qid, + false); + if (rc) + goto op_err; + } + } if (msg->flags & VFPF_SET_Q_FILTERS_RX_MASK_CHANGED) { From 02b2faaf0af1d85585f6d6980e286d53612acfc2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Mar 2017 14:08:21 -0800 Subject: [PATCH 073/384] tcp: fix various issues for sockets morphing to listen state Dmitry Vyukov reported a divide by 0 triggered by syzkaller, exploiting tcp_disconnect() path that was never really considered and/or used before syzkaller ;) I was not able to reproduce the bug, but it seems issues here are the three possible actions that assumed they would never trigger on a listener. 1) tcp_write_timer_handler 2) tcp_delack_timer_handler 3) MTU reduction Only IPv6 MTU reduction was properly testing TCP_CLOSE and TCP_LISTEN states from tcp_v6_mtu_reduced() Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 7 +++++-- net/ipv4/tcp_timer.c | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9a89b8deafae..8f3ec1365497 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -279,10 +279,13 @@ EXPORT_SYMBOL(tcp_v4_connect); */ void tcp_v4_mtu_reduced(struct sock *sk) { - struct dst_entry *dst; struct inet_sock *inet = inet_sk(sk); - u32 mtu = tcp_sk(sk)->mtu_info; + struct dst_entry *dst; + u32 mtu; + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + return; + mtu = tcp_sk(sk)->mtu_info; dst = inet_csk_update_pmtu(sk, mtu); if (!dst) return; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 40d893556e67..b2ab411c6d37 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk) sk_mem_reclaim_partial(sk); - if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; if (time_after(icsk->icsk_ack.timeout, jiffies)) { @@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); int event; - if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) + if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || + !icsk->icsk_pending) goto out; if (time_after(icsk->icsk_timeout, jiffies)) { From 146d8fef9da1ba854de7fa28b9cb9eb147535f88 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 4 Mar 2017 00:01:41 +0000 Subject: [PATCH 074/384] rxrpc: Call state should be read with READ_ONCE() under some circumstances The call state may be changed at any time by the data-ready routine in response to received packets, so if the call state is to be read and acted upon several times in a function, READ_ONCE() must be used unless the call state lock is held. Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/input.c | 12 +++++++----- net/rxrpc/recvmsg.c | 4 ++-- net/rxrpc/sendmsg.c | 48 ++++++++++++++++++++++++++++----------------- 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9f4cfa25af7c..d74921c4969b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -420,6 +420,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + enum rxrpc_call_state state; unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int ix; rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; @@ -434,14 +435,15 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, _proto("Rx DATA %%%u { #%u f=%02x }", sp->hdr.serial, seq, sp->hdr.flags); - if (call->state >= RXRPC_CALL_COMPLETE) + state = READ_ONCE(call->state); + if (state >= RXRPC_CALL_COMPLETE) return; /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ - if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST || - call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && + if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || + state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && !rxrpc_receiving_reply(call)) return; @@ -799,7 +801,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, return rxrpc_proto_abort("AK0", call, 0); /* Ignore ACKs unless we are or have just been transmitting. */ - switch (call->state) { + switch (READ_ONCE(call->state)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: case RXRPC_CALL_SERVER_SEND_REPLY: @@ -940,7 +942,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, static void rxrpc_input_implicit_end_call(struct rxrpc_connection *conn, struct rxrpc_call *call) { - switch (call->state) { + switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); break; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 6491ca46a03f..3e2f1a8e9c5b 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -527,7 +527,7 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = len; } - switch (call->state) { + switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_ACCEPTING: ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); break; @@ -640,7 +640,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, mutex_lock(&call->user_mutex); - switch (call->state) { + switch (READ_ONCE(call->state)) { case RXRPC_CALL_CLIENT_RECV_REPLY: case RXRPC_CALL_SERVER_RECV_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index bc2d3dcff9de..47ccfeacc1c6 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -488,6 +488,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) __releases(&rx->sk.sk_lock.slock) { + enum rxrpc_call_state state; enum rxrpc_command cmd; struct rxrpc_call *call; unsigned long user_call_ID = 0; @@ -526,13 +527,17 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) return PTR_ERR(call); /* ... and we have the call lock. */ } else { - ret = -EBUSY; - if (call->state == RXRPC_CALL_UNINITIALISED || - call->state == RXRPC_CALL_CLIENT_AWAIT_CONN || - call->state == RXRPC_CALL_SERVER_PREALLOC || - call->state == RXRPC_CALL_SERVER_SECURING || - call->state == RXRPC_CALL_SERVER_ACCEPTING) + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_UNINITIALISED: + case RXRPC_CALL_CLIENT_AWAIT_CONN: + case RXRPC_CALL_SERVER_PREALLOC: + case RXRPC_CALL_SERVER_SECURING: + case RXRPC_CALL_SERVER_ACCEPTING: + ret = -EBUSY; goto error_release_sock; + default: + break; + } ret = mutex_lock_interruptible(&call->user_mutex); release_sock(&rx->sk); @@ -542,10 +547,11 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } } + state = READ_ONCE(call->state); _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); + call->debug_id, call->user_call_ID, state, call->conn); - if (call->state >= RXRPC_CALL_COMPLETE) { + if (state >= RXRPC_CALL_COMPLETE) { /* it's too late for this call */ ret = -ESHUTDOWN; } else if (cmd == RXRPC_CMD_SEND_ABORT) { @@ -555,12 +561,12 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else if (cmd != RXRPC_CMD_SEND_DATA) { ret = -EINVAL; } else if (rxrpc_is_client_call(call) && - call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { + state != RXRPC_CALL_CLIENT_SEND_REQUEST) { /* request phase complete for this client call */ ret = -EPROTO; } else if (rxrpc_is_service_call(call) && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + state != RXRPC_CALL_SERVER_ACK_REQUEST && + state != RXRPC_CALL_SERVER_SEND_REPLY) { /* Reply phase not begun or not complete for service call. */ ret = -EPROTO; } else { @@ -605,14 +611,20 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, _debug("CALL %d USR %lx ST %d on CONN %p", call->debug_id, call->user_call_ID, call->state, call->conn); - if (call->state >= RXRPC_CALL_COMPLETE) { - ret = -ESHUTDOWN; /* it's too late for this call */ - } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - ret = -EPROTO; /* request phase complete for this client call */ - } else { + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + case RXRPC_CALL_SERVER_SEND_REPLY: ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); + break; + case RXRPC_CALL_COMPLETE: + /* It's too late for this call */ + ret = -ESHUTDOWN; + break; + default: + /* Request phase complete for this client call */ + ret = -EPROTO; + break; } mutex_unlock(&call->user_mutex); From dd4f10722aeb10f4f582948839f066bebe44e5fb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Mar 2017 21:01:02 -0800 Subject: [PATCH 075/384] net: fix socket refcounting in skb_complete_wifi_ack() TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt By the time TX completion happens, sk_refcnt might be already 0. sock_hold()/sock_put() would then corrupt critical state, like sk_wmem_alloc. Fixes: bf7fa551e0ce ("mac80211: Resolve sk_refcnt/sk_wmem_alloc issue in wifi ack path") Signed-off-by: Eric Dumazet Cc: Alexander Duyck Cc: Johannes Berg Cc: Soheil Hassas Yeganeh Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/core/skbuff.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f3557958e9bf..e2f37a560ec4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3893,7 +3893,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; struct sock_exterr_skb *serr; - int err; + int err = 1; skb->wifi_acked_valid = 1; skb->wifi_acked = acked; @@ -3903,14 +3903,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - err = sock_queue_err_skb(sk, skb); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + err = sock_queue_err_skb(sk, skb); + sock_put(sk); + } if (err) kfree_skb(skb); - - sock_put(sk); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); From 9ac25fc063751379cb77434fef9f3b088cd3e2f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Mar 2017 21:01:03 -0800 Subject: [PATCH 076/384] net: fix socket refcounting in skb_complete_tx_timestamp() TX skbs do not necessarily hold a reference on skb->sk->sk_refcnt By the time TX completion happens, sk_refcnt might be already 0. sock_hold()/sock_put() would then corrupt critical state, like sk_wmem_alloc and lead to leaks or use after free. Fixes: 62bccb8cdb69 ("net-timestamp: Make the clone operation stand-alone from phy timestamping") Signed-off-by: Eric Dumazet Cc: Alexander Duyck Cc: Johannes Berg Cc: Soheil Hassas Yeganeh Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/core/skbuff.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e2f37a560ec4..cd4ba8c6b609 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3828,13 +3828,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, if (!skb_may_tx_timestamp(sk, false)) return; - /* take a reference to prevent skb_orphan() from freeing the socket */ - sock_hold(sk); - - *skb_hwtstamps(skb) = *hwtstamps; - __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); - - sock_put(sk); + /* Take a reference to prevent skb_orphan() from freeing the socket, + * but only if the socket refcount is not zero. + */ + if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) { + *skb_hwtstamps(skb) = *hwtstamps; + __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND); + sock_put(sk); + } } EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp); From 8edc3affc0770886c7bfb3436b0fdd09bce13167 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Mar 2017 08:57:33 -0800 Subject: [PATCH 077/384] rds: tcp: Take explicit refcounts on struct net It is incorrect for the rds_connection to piggyback on the sock_net() refcount for the netns because this gives rise to a chicken-and-egg problem during rds_conn_destroy. Instead explicitly take a ref on the net, and hold the netns down till the connection tear-down is complete. Reported-by: Dmitry Vyukov Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/connection.c | 1 + net/rds/rds.h | 6 +++--- net/rds/tcp.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 0e04dcceb1d4..1fa75ab7b733 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -429,6 +429,7 @@ void rds_conn_destroy(struct rds_connection *conn) */ rds_cong_remove_conn(conn); + put_net(conn->c_net); kmem_cache_free(rds_conn_slab, conn); spin_lock_irqsave(&rds_conn_lock, flags); diff --git a/net/rds/rds.h b/net/rds/rds.h index 39518ef7af4d..82d38ccf5e8b 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -147,7 +147,7 @@ struct rds_connection { /* Protocol version */ unsigned int c_version; - possible_net_t c_net; + struct net *c_net; struct list_head c_map_item; unsigned long c_map_queued; @@ -162,13 +162,13 @@ struct rds_connection { static inline struct net *rds_conn_net(struct rds_connection *conn) { - return read_pnet(&conn->c_net); + return conn->c_net; } static inline void rds_conn_net_set(struct rds_connection *conn, struct net *net) { - write_pnet(&conn->c_net, net); + conn->c_net = get_net(net); } #define RDS_FLAG_CONG_BITMAP 0x01 diff --git a/net/rds/tcp.c b/net/rds/tcp.c index a973d3b4dff0..65c8e3b3b710 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -529,7 +529,7 @@ static void rds_tcp_kill_sock(struct net *net) flush_work(&rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); + struct net *c_net = tc->t_cpath->cp_conn->c_net; if (net != c_net || !tc->t_sock) continue; @@ -584,7 +584,7 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); + struct net *c_net = tc->t_cpath->cp_conn->c_net; if (net != c_net || !tc->t_sock) continue; From 16c09b1c7657522a321b04aa7f4300865b7cb292 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Mar 2017 08:57:34 -0800 Subject: [PATCH 078/384] rds: tcp: Reorder initialization sequence in rds_tcp_init to avoid races Order of initialization in rds_tcp_init needs to be done so that resources are set up and destroyed in the correct synchronization sequence with both the data path, as well as netns create/destroy path. Specifically, - we must call register_pernet_subsys and get the rds_tcp_netid before calling register_netdevice_notifier, otherwise we risk the sequence 1. register_netdevice_notifier sets up netdev notifier callback 2. rds_tcp_dev_event -> rds_tcp_kill_sock uses netid 0, and finds the wrong rtn, resulting in a panic with string that is of the form: BUG: unable to handle kernel NULL pointer dereference at 000000000000000d IP: rds_tcp_kill_sock+0x3a/0x1d0 [rds_tcp] : - the rds_tcp_incoming_slab kmem_cache must be initialized before the datapath starts up. The latter can happen any time after the pernet_subsys registration of rds_tcp_net_ops, whose -> init function sets up the listen socket. If the rds_tcp_incoming_slab has not been set up at that time, a panic of the form below may be encountered BUG: unable to handle kernel NULL pointer dereference at 0000000000000014 IP: kmem_cache_alloc+0x90/0x1c0 : rds_tcp_data_recv+0x1e7/0x370 [rds_tcp] tcp_read_sock+0x96/0x1c0 rds_tcp_recv_path+0x65/0x80 [rds_tcp] : Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 65c8e3b3b710..fbf807a0cc4a 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -638,19 +638,19 @@ static int rds_tcp_init(void) goto out; } - ret = register_netdevice_notifier(&rds_tcp_dev_notifier); - if (ret) { - pr_warn("could not register rds_tcp_dev_notifier\n"); + ret = rds_tcp_recv_init(); + if (ret) goto out_slab; - } ret = register_pernet_subsys(&rds_tcp_net_ops); if (ret) - goto out_notifier; + goto out_recv; - ret = rds_tcp_recv_init(); - if (ret) + ret = register_netdevice_notifier(&rds_tcp_dev_notifier); + if (ret) { + pr_warn("could not register rds_tcp_dev_notifier\n"); goto out_pernet; + } rds_trans_register(&rds_tcp_transport); @@ -660,9 +660,8 @@ static int rds_tcp_init(void) out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); -out_notifier: - if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) - pr_warn("could not unregister rds_tcp_dev_notifier\n"); +out_recv: + rds_tcp_recv_exit(); out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: From b21dd4506b71bdb9c5a20e759255cd2513ea7ebe Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Mar 2017 08:57:35 -0800 Subject: [PATCH 079/384] rds: tcp: Sequence teardown of listen and acceptor sockets to avoid races Commit a93d01f5777e ("RDS: TCP: avoid bad page reference in rds_tcp_listen_data_ready") added the function rds_tcp_listen_sock_def_readable() to handle the case when a partially set-up acceptor socket drops into rds_tcp_listen_data_ready(). However, if the listen socket (rtn->rds_tcp_listen_sock) is itself going through a tear-down via rds_tcp_listen_stop(), the (*ready)() will be null and we would hit a panic of the form BUG: unable to handle kernel NULL pointer dereference at (null) IP: (null) : ? rds_tcp_listen_data_ready+0x59/0xb0 [rds_tcp] tcp_data_queue+0x39d/0x5b0 tcp_rcv_established+0x2e5/0x660 tcp_v4_do_rcv+0x122/0x220 tcp_v4_rcv+0x8b7/0x980 : In the above case, it is not fatal to encounter a NULL value for ready- we should just drop the packet and let the flush of the acceptor thread finish gracefully. In general, the tear-down sequence for listen() and accept() socket that is ensured by this commit is: rtn->rds_tcp_listen_sock = NULL; /* prevent any new accepts */ In rds_tcp_listen_stop(): serialize with, and prevent, further callbacks using lock_sock() flush rds_wq flush acceptor workq sock_release(listen socket) Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/tcp.c | 15 ++++++++++----- net/rds/tcp.h | 2 +- net/rds/tcp_listen.c | 9 +++++++-- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index fbf807a0cc4a..225690076773 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -484,9 +484,10 @@ static void __net_exit rds_tcp_exit_net(struct net *net) * we do need to clean up the listen socket here. */ if (rtn->rds_tcp_listen_sock) { - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); + struct socket *lsock = rtn->rds_tcp_listen_sock; + rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); } } @@ -523,10 +524,10 @@ static void rds_tcp_kill_sock(struct net *net) struct rds_tcp_connection *tc, *_tc; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; - rds_tcp_listen_stop(rtn->rds_tcp_listen_sock); rtn->rds_tcp_listen_sock = NULL; - flush_work(&rtn->rds_tcp_accept_w); + rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { struct net *c_net = tc->t_cpath->cp_conn->c_net; @@ -546,8 +547,12 @@ static void rds_tcp_kill_sock(struct net *net) void *rds_tcp_listen_sock_def_readable(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; - return rtn->rds_tcp_listen_sock->sk->sk_user_data; + if (!lsock) + return NULL; + + return lsock->sk->sk_user_data; } static int rds_tcp_dev_event(struct notifier_block *this, diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 9a1cc8906576..56ea6620fcf9 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ struct socket *rds_tcp_listen_init(struct net *); -void rds_tcp_listen_stop(struct socket *); +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor); void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); int rds_tcp_keepalive(struct socket *sock); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 67d0929c7d3d..2c69a508a693 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -223,6 +223,9 @@ void rds_tcp_listen_data_ready(struct sock *sk) * before it has been accepted and the accepter has set up their * data_ready.. we only want to queue listen work for our listening * socket + * + * (*ready)() may be null if we are racing with netns delete, and + * the listen socket is being torn down. */ if (sk->sk_state == TCP_LISTEN) rds_tcp_accept_work(sk); @@ -231,7 +234,8 @@ void rds_tcp_listen_data_ready(struct sock *sk) out: read_unlock_bh(&sk->sk_callback_lock); - ready(sk); + if (ready) + ready(sk); } struct socket *rds_tcp_listen_init(struct net *net) @@ -271,7 +275,7 @@ struct socket *rds_tcp_listen_init(struct net *net) return NULL; } -void rds_tcp_listen_stop(struct socket *sock) +void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor) { struct sock *sk; @@ -292,5 +296,6 @@ void rds_tcp_listen_stop(struct socket *sock) /* wait for accepts to stop and close the socket */ flush_workqueue(rds_wq); + flush_work(acceptor); sock_release(sock); } From 6c4dc75c251721f517e9daeb5370ea606b5b35ce Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sun, 5 Mar 2017 03:01:55 +0300 Subject: [PATCH 080/384] net/sched: act_skbmod: remove unneeded rcu_read_unlock in tcf_skbmod_dump Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 3b7074e23024..c736627f8f4a 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, return skb->len; nla_put_failure: - rcu_read_unlock(); nlmsg_trim(skb, b); return -1; } From 142c0ac445792c492579cb01f1cfd4e32e6dfcce Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Sun, 5 Mar 2017 12:18:41 -0600 Subject: [PATCH 081/384] ibmvnic: Fix overflowing firmware/hardware TX queue Use a counter to track the number of outstanding transmissions sent that have not received completions. If the counter reaches the maximum number of queue entries, stop transmissions on that queue. As we receive more completions from firmware, wake the queue once the counter reaches an acceptable level. This patch prevents hardware/firmware TX queue from filling up and and generating errors. Since incorporating this fix, internal testing has reported that these firmware errors have stopped. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 27 ++++++++++++++++++++++++++- drivers/net/ethernet/ibm/ibmvnic.h | 1 + 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 9198e6bd5160..0e87b83f25e2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -705,6 +705,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; struct ibmvnic_tx_buff *tx_buff = NULL; + struct ibmvnic_sub_crq_queue *tx_scrq; struct ibmvnic_tx_pool *tx_pool; unsigned int tx_send_failed = 0; unsigned int tx_map_failed = 0; @@ -724,6 +725,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) int ret = 0; tx_pool = &adapter->tx_pool[queue_num]; + tx_scrq = adapter->tx_scrq[queue_num]; txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb)); handle_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + be32_to_cpu(adapter->login_rsp_buf-> @@ -826,6 +828,14 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ret = NETDEV_TX_BUSY; goto out; } + + atomic_inc(&tx_scrq->used); + + if (atomic_read(&tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) { + netdev_info(netdev, "Stopping queue %d\n", queue_num); + netif_stop_subqueue(netdev, queue_num); + } + tx_packets++; tx_bytes += skb->len; txq->trans_start = jiffies; @@ -1213,6 +1223,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->adapter = adapter; scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs); scrq->cur = 0; + atomic_set(&scrq->used, 0); scrq->rx_skb_top = NULL; spin_lock_init(&scrq->lock); @@ -1355,8 +1366,22 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, DMA_TO_DEVICE); } - if (txbuff->last_frag) + if (txbuff->last_frag) { + atomic_dec(&scrq->used); + + if (atomic_read(&scrq->used) <= + (adapter->req_tx_entries_per_subcrq / 2) && + netif_subqueue_stopped(adapter->netdev, + txbuff->skb)) { + netif_wake_subqueue(adapter->netdev, + scrq->pool_index); + netdev_dbg(adapter->netdev, + "Started queue %d\n", + scrq->pool_index); + } + dev_kfree_skb_any(txbuff->skb); + } adapter->tx_pool[pool].free_map[adapter->tx_pool[pool]. producer_index] = index; diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 422824f1f42a..1993b42666f7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -863,6 +863,7 @@ struct ibmvnic_sub_crq_queue { spinlock_t lock; struct sk_buff *rx_skb_top; struct ibmvnic_adapter *adapter; + atomic_t used; }; struct ibmvnic_long_term_buff { From 068d9f90a6978c3e3a662d9e85204a7d6be240d2 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Sun, 5 Mar 2017 12:18:42 -0600 Subject: [PATCH 082/384] ibmvnic: Allocate number of rx/tx buffers agreed on by firmware The amount of TX/RX buffers that the vNIC driver currently allocates is different from the amount agreed upon in negotiation with firmware. Correct that by allocating the requested number of buffers confirmed by firmware. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 0e87b83f25e2..5f11b4dc95d2 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -404,7 +404,7 @@ static int ibmvnic_open(struct net_device *netdev) send_map_query(adapter); for (i = 0; i < rxadd_subcrqs; i++) { init_rx_pool(adapter, &adapter->rx_pool[i], - IBMVNIC_BUFFS_PER_POOL, i, + adapter->req_rx_add_entries_per_subcrq, i, be64_to_cpu(size_array[i]), 1); if (alloc_rx_pool(adapter, &adapter->rx_pool[i])) { dev_err(dev, "Couldn't alloc rx pool\n"); @@ -419,23 +419,23 @@ static int ibmvnic_open(struct net_device *netdev) for (i = 0; i < tx_subcrqs; i++) { tx_pool = &adapter->tx_pool[i]; tx_pool->tx_buff = - kcalloc(adapter->max_tx_entries_per_subcrq, + kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(struct ibmvnic_tx_buff), GFP_KERNEL); if (!tx_pool->tx_buff) goto tx_pool_alloc_failed; if (alloc_long_term_buff(adapter, &tx_pool->long_term_buff, - adapter->max_tx_entries_per_subcrq * + adapter->req_tx_entries_per_subcrq * adapter->req_mtu)) goto tx_ltb_alloc_failed; tx_pool->free_map = - kcalloc(adapter->max_tx_entries_per_subcrq, + kcalloc(adapter->req_tx_entries_per_subcrq, sizeof(int), GFP_KERNEL); if (!tx_pool->free_map) goto tx_fm_alloc_failed; - for (j = 0; j < adapter->max_tx_entries_per_subcrq; j++) + for (j = 0; j < adapter->req_tx_entries_per_subcrq; j++) tx_pool->free_map[j] = j; tx_pool->consumer_index = 0; @@ -746,7 +746,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_pool->consumer_index = (tx_pool->consumer_index + 1) % - adapter->max_tx_entries_per_subcrq; + adapter->req_tx_entries_per_subcrq; tx_buff = &tx_pool->tx_buff[index]; tx_buff->skb = skb; @@ -819,7 +819,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if (tx_pool->consumer_index == 0) tx_pool->consumer_index = - adapter->max_tx_entries_per_subcrq - 1; + adapter->req_tx_entries_per_subcrq - 1; else tx_pool->consumer_index--; @@ -1387,7 +1387,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter, producer_index] = index; adapter->tx_pool[pool].producer_index = (adapter->tx_pool[pool].producer_index + 1) % - adapter->max_tx_entries_per_subcrq; + adapter->req_tx_entries_per_subcrq; } /* remove tx_comp scrq*/ next->tx_comp.first = 0; From 62f8f4d9066c1c6f2474845d1ca7e2891f2ae3fd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 5 Mar 2017 10:52:16 -0800 Subject: [PATCH 083/384] dccp: fix use-after-free in dccp_feat_activate_values Dmitry reported crashes in DCCP stack [1] Problem here is that when I got rid of listener spinlock, I missed the fact that DCCP stores a complex state in struct dccp_request_sock, while TCP does not. Since multiple cpus could access it at the same time, we need to add protection. [1] BUG: KASAN: use-after-free in dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 at addr ffff88003713be68 Read of size 8 by task syz-executor2/8457 CPU: 2 PID: 8457 Comm: syz-executor2 Not tainted 4.10.0-rc7+ #127 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x292/0x398 lib/dump_stack.c:51 kasan_object_err+0x1c/0x70 mm/kasan/report.c:162 print_address_description mm/kasan/report.c:200 [inline] kasan_report_error mm/kasan/report.c:289 [inline] kasan_report.part.1+0x20e/0x4e0 mm/kasan/report.c:311 kasan_report mm/kasan/report.c:332 [inline] __asan_report_load8_noabort+0x29/0x30 mm/kasan/report.c:332 dccp_feat_activate_values+0x967/0xab0 net/dccp/feat.c:1541 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 do_softirq_own_stack+0x1c/0x30 arch/x86/entry/entry_64.S:902 do_softirq.part.17+0x1e8/0x230 kernel/softirq.c:328 do_softirq kernel/softirq.c:176 [inline] __local_bh_enable_ip+0x1f2/0x200 kernel/softirq.c:181 local_bh_enable include/linux/bottom_half.h:31 [inline] rcu_read_unlock_bh include/linux/rcupdate.h:971 [inline] ip6_finish_output2+0xbb0/0x23d0 net/ipv6/ip6_output.c:123 ip6_finish_output+0x302/0x960 net/ipv6/ip6_output.c:148 NF_HOOK_COND include/linux/netfilter.h:246 [inline] ip6_output+0x1cb/0x8d0 net/ipv6/ip6_output.c:162 ip6_xmit+0xcdf/0x20d0 include/net/dst.h:501 inet6_csk_xmit+0x320/0x5f0 net/ipv6/inet6_connection_sock.c:179 dccp_transmit_skb+0xb09/0x1120 net/dccp/output.c:141 dccp_xmit_packet+0x215/0x760 net/dccp/output.c:280 dccp_write_xmit+0x168/0x1d0 net/dccp/output.c:362 dccp_sendmsg+0x79c/0xb10 net/dccp/proto.c:796 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744 sock_sendmsg_nosec net/socket.c:635 [inline] sock_sendmsg+0xca/0x110 net/socket.c:645 SYSC_sendto+0x660/0x810 net/socket.c:1687 SyS_sendto+0x40/0x50 net/socket.c:1655 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x4458b9 RSP: 002b:00007f8ceb77bb58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000000017 RCX: 00000000004458b9 RDX: 0000000000000023 RSI: 0000000020e60000 RDI: 0000000000000017 RBP: 00000000006e1b90 R08: 00000000200f9fe1 R09: 0000000000000020 R10: 0000000000008010 R11: 0000000000000282 R12: 00000000007080a8 R13: 0000000000000000 R14: 00007f8ceb77c9c0 R15: 00007f8ceb77c700 Object at ffff88003713be50, in cache kmalloc-64 size: 64 Allocated: PID = 8446 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:605 kmem_cache_alloc_trace+0x82/0x270 mm/slub.c:2738 kmalloc include/linux/slab.h:490 [inline] dccp_feat_entry_new+0x214/0x410 net/dccp/feat.c:467 dccp_feat_push_change+0x38/0x220 net/dccp/feat.c:487 __feat_register_sp+0x223/0x2f0 net/dccp/feat.c:741 dccp_feat_propagate_ccid+0x22b/0x2b0 net/dccp/feat.c:949 dccp_feat_server_ccid_dependencies+0x1b3/0x250 net/dccp/feat.c:1012 dccp_make_response+0x1f1/0xc90 net/dccp/output.c:423 dccp_v6_send_response+0x4ec/0xc20 net/dccp/ipv6.c:217 dccp_v6_conn_request+0xaba/0x11b0 net/dccp/ipv6.c:377 dccp_rcv_state_process+0x51e/0x1650 net/dccp/input.c:606 dccp_v6_do_rcv+0x213/0x350 net/dccp/ipv6.c:632 sk_backlog_rcv include/net/sock.h:893 [inline] __sk_receive_skb+0x36f/0xcc0 net/core/sock.c:479 dccp_v6_rcv+0xba5/0x1d00 net/dccp/ipv6.c:742 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Freed: PID = 15 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:57 save_stack+0x43/0xd0 mm/kasan/kasan.c:502 set_track mm/kasan/kasan.c:514 [inline] kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:578 slab_free_hook mm/slub.c:1355 [inline] slab_free_freelist_hook mm/slub.c:1377 [inline] slab_free mm/slub.c:2954 [inline] kfree+0xe8/0x2b0 mm/slub.c:3874 dccp_feat_entry_destructor.part.4+0x48/0x60 net/dccp/feat.c:418 dccp_feat_entry_destructor net/dccp/feat.c:416 [inline] dccp_feat_list_pop net/dccp/feat.c:541 [inline] dccp_feat_activate_values+0x57f/0xab0 net/dccp/feat.c:1543 dccp_create_openreq_child+0x464/0x610 net/dccp/minisocks.c:121 dccp_v6_request_recv_sock+0x1f6/0x1960 net/dccp/ipv6.c:457 dccp_check_req+0x335/0x5a0 net/dccp/minisocks.c:186 dccp_v6_rcv+0x69e/0x1d00 net/dccp/ipv6.c:711 ip6_input_finish+0x46d/0x17a0 net/ipv6/ip6_input.c:279 NF_HOOK include/linux/netfilter.h:257 [inline] ip6_input+0xdb/0x590 net/ipv6/ip6_input.c:322 dst_input include/net/dst.h:507 [inline] ip6_rcv_finish+0x289/0x890 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:257 [inline] ipv6_rcv+0x12ec/0x23d0 net/ipv6/ip6_input.c:203 __netif_receive_skb_core+0x1ae5/0x3400 net/core/dev.c:4190 __netif_receive_skb+0x2a/0x170 net/core/dev.c:4228 process_backlog+0xe5/0x6c0 net/core/dev.c:4839 napi_poll net/core/dev.c:5202 [inline] net_rx_action+0xe70/0x1900 net/core/dev.c:5267 __do_softirq+0x2fb/0xb7d kernel/softirq.c:284 Memory state around the buggy address: ffff88003713bd00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88003713bd80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88003713be00: fc fc fc fc fc fc fc fc fc fc fb fb fb fb fb fb ^ Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller --- include/linux/dccp.h | 1 + net/dccp/minisocks.c | 24 ++++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 61d042bbbf60..68449293c4b6 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -163,6 +163,7 @@ struct dccp_request_sock { __u64 dreq_isr; __u64 dreq_gsr; __be32 dreq_service; + spinlock_t dreq_lock; struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e267e6f4c9a5..abd07a443219 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -142,6 +142,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, struct dccp_request_sock *dreq = dccp_rsk(req); bool own_req; + /* TCP/DCCP listeners became lockless. + * DCCP stores complex state in its request_sock, so we need + * a protection for them, now this code runs without being protected + * by the parent (listener) lock. + */ + spin_lock_bh(&dreq->dreq_lock); + /* Check for retransmitted REQUEST */ if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { @@ -156,7 +163,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, inet_rtx_syn_ack(sk, req); } /* Network Duplicate, discard packet */ - return NULL; + goto out; } DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; @@ -182,20 +189,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL, req, &own_req); - if (!child) - goto listen_overflow; + if (child) { + child = inet_csk_complete_hashdance(sk, child, req, own_req); + goto out; + } - return inet_csk_complete_hashdance(sk, child, req, own_req); - -listen_overflow: - dccp_pr_debug("listen_overflow!\n"); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; drop: if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) req->rsk_ops->send_reset(sk, skb); inet_csk_reqsk_queue_drop(sk, req); - return NULL; +out: + spin_unlock_bh(&dreq->dreq_lock); + return child; } EXPORT_SYMBOL_GPL(dccp_check_req); @@ -246,6 +253,7 @@ int dccp_reqsk_init(struct request_sock *req, { struct dccp_request_sock *dreq = dccp_rsk(req); + spin_lock_init(&dreq->dreq_lock); inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport); inet_rsk(req)->acked = 0; From 15e668070a64bb97f102ad9cf3bccbca0545cda8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Sun, 5 Mar 2017 12:34:53 -0800 Subject: [PATCH 084/384] ipv6: reorder icmpv6_init() and ip6_mr_init() Andrey reported the following kernel crash: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 14446 Comm: syz-executor6 Not tainted 4.10.0+ #82 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88001f311700 task.stack: ffff88001f6e8000 RIP: 0010:ip6mr_sk_done+0x15a/0x3d0 net/ipv6/ip6mr.c:1618 RSP: 0018:ffff88001f6ef418 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 1ffff10003edde8c RCX: ffffc900043ee000 RDX: 0000000000000004 RSI: ffffffff83e3b3f8 RDI: 0000000000000020 RBP: ffff88001f6ef508 R08: fffffbfff0dcc5d8 R09: 0000000000000000 R10: ffffffff86e62ec0 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88001f6ef4e0 R15: ffff8800380a0040 FS: 00007f7a52cec700(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000061c500 CR3: 000000001f1ae000 CR4: 00000000000006f0 DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Call Trace: rawv6_close+0x4c/0x80 net/ipv6/raw.c:1217 inet_release+0xed/0x1c0 net/ipv4/af_inet.c:425 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:432 sock_release+0x8d/0x1e0 net/socket.c:597 __sock_create+0x39d/0x880 net/socket.c:1226 sock_create_kern+0x3f/0x50 net/socket.c:1243 inet_ctl_sock_create+0xbb/0x280 net/ipv4/af_inet.c:1526 icmpv6_sk_init+0x163/0x500 net/ipv6/icmp.c:954 ops_init+0x10a/0x550 net/core/net_namespace.c:115 setup_net+0x261/0x660 net/core/net_namespace.c:291 copy_net_ns+0x27e/0x540 net/core/net_namespace.c:396 9pnet_virtio: no channels available for device ./file1 create_new_namespaces+0x437/0x9b0 kernel/nsproxy.c:106 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205 SYSC_unshare kernel/fork.c:2281 [inline] SyS_unshare+0x64e/0x1000 kernel/fork.c:2231 entry_SYSCALL_64_fastpath+0x1f/0xc2 This is because net->ipv6.mr6_tables is not initialized at that point, ip6mr_rules_init() is not called yet, therefore on the error path when we iterator the list, we trigger this oops. Fix this by reordering ip6mr_rules_init() before icmpv6_sk_init(). Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 04db40620ea6..a9a9553ee63d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -920,12 +920,12 @@ static int __init inet6_init(void) err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; - err = icmpv6_init(); - if (err) - goto icmp_fail; err = ip6_mr_init(); if (err) goto ipmr_fail; + err = icmpv6_init(); + if (err) + goto icmp_fail; err = ndisc_init(); if (err) goto ndisc_fail; @@ -1061,10 +1061,10 @@ static int __init inet6_init(void) ndisc_cleanup(); ndisc_fail: ip6_mr_cleanup(); -ipmr_fail: - icmpv6_cleanup(); icmp_fail: unregister_pernet_subsys(&inet6_net_ops); +ipmr_fail: + icmpv6_cleanup(); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); From 97ee351b50a49717543533cfb85b4bf9d88c9680 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 7 Mar 2017 16:14:49 +1100 Subject: [PATCH 085/384] powerpc/boot: Fix zImage TOC alignment Recent toolchains force the TOC to be 256 byte aligned. We need to enforce this alignment in the zImage linker script, otherwise pointers to our TOC variables (__toc_start) could be incorrect. If the actual start of the TOC and __toc_start don't have the same value we crash early in the zImage wrapper. Cc: stable@vger.kernel.org Suggested-by: Alan Modra Signed-off-by: Michael Ellerman --- arch/powerpc/boot/zImage.lds.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S index 861e72109df2..f080abfc2f83 100644 --- a/arch/powerpc/boot/zImage.lds.S +++ b/arch/powerpc/boot/zImage.lds.S @@ -68,6 +68,7 @@ SECTIONS } #ifdef CONFIG_PPC64_BOOT_WRAPPER + . = ALIGN(256); .got : { __toc_start = .; From f1c635b439a5c01776fe3a25b1e2dc546ea82e6f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 7 Mar 2017 09:15:53 -0800 Subject: [PATCH 086/384] scsi: storvsc: Workaround for virtual DVD SCSI version Hyper-V host emulation of SCSI for virtual DVD device reports SCSI version 0 (UNKNOWN) but is still capable of supporting REPORTLUN. Without this patch, a GEN2 Linux guest on Hyper-V will not boot 4.11 successfully with virtual DVD ROM device. What happens is that the SCSI scan process falls back to doing sequential probing by INQUIRY. But the storvsc driver has a previous workaround that masks/blocks all errors reports from INQUIRY (or MODE_SENSE) commands. This workaround causes the scan to then populate a full set of bogus LUN's on the target and then sends kernel spinning off into a death spiral doing block reads on the non-existent LUNs. By setting the correct blacklist flags, the target with the DVD device is scanned with REPORTLUN and that works correctly. Patch needs to go in current 4.11, it is safe but not necessary in older kernels. Signed-off-by: Stephen Hemminger Reviewed-by: K. Y. Srinivasan Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 585e54f6512c..f555174f2cb7 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -400,8 +400,6 @@ MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels") */ static int storvsc_timeout = 180; -static int msft_blist_flags = BLIST_TRY_VPD_PAGES; - #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) static struct scsi_transport_template *fc_transport_template; #endif @@ -1383,6 +1381,22 @@ static int storvsc_do_io(struct hv_device *device, return ret; } +static int storvsc_device_alloc(struct scsi_device *sdevice) +{ + /* + * Set blist flag to permit the reading of the VPD pages even when + * the target may claim SPC-2 compliance. MSFT targets currently + * claim SPC-2 compliance while they implement post SPC-2 features. + * With this flag we can correctly handle WRITE_SAME_16 issues. + * + * Hypervisor reports SCSI_UNKNOWN type for DVD ROM device but + * still supports REPORT LUN. + */ + sdevice->sdev_bflags = BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES; + + return 0; +} + static int storvsc_device_configure(struct scsi_device *sdevice) { @@ -1395,14 +1409,6 @@ static int storvsc_device_configure(struct scsi_device *sdevice) sdevice->no_write_same = 1; - /* - * Add blist flags to permit the reading of the VPD pages even when - * the target may claim SPC-2 compliance. MSFT targets currently - * claim SPC-2 compliance while they implement post SPC-2 features. - * With this patch we can correctly handle WRITE_SAME_16 issues. - */ - sdevice->sdev_bflags |= msft_blist_flags; - /* * If the host is WIN8 or WIN8 R2, claim conformance to SPC-3 * if the device is a MSFT virtual device. If the host is @@ -1661,6 +1667,7 @@ static struct scsi_host_template scsi_driver = { .eh_host_reset_handler = storvsc_host_reset_handler, .proc_name = "storvsc_host", .eh_timed_out = storvsc_eh_timed_out, + .slave_alloc = storvsc_device_alloc, .slave_configure = storvsc_device_configure, .cmd_per_lun = 255, .this_id = -1, From 85e8a23936ab3442de0c42da97d53b29f004ece1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Feb 2017 08:49:20 +1100 Subject: [PATCH 087/384] scsi: lpfc: Add shutdown method for kexec We see lpfc devices regularly fail during kexec. Fix this by adding a shutdown method which mirrors the remove method. Cc: Signed-off-by: Anton Blanchard Reviewed-by: Mauricio Faria de Oliveira Tested-by: Mauricio Faria de Oliveira Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 661bd25a404a..2697d49da4d7 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -12059,6 +12059,7 @@ static struct pci_driver lpfc_driver = { .id_table = lpfc_id_table, .probe = lpfc_pci_probe_one, .remove = lpfc_pci_remove_one, + .shutdown = lpfc_pci_remove_one, .suspend = lpfc_pci_suspend_one, .resume = lpfc_pci_resume_one, .err_handler = &lpfc_err_handler, From aa2be9b3d6d2d699e9ca7cbfc00867c80e5da213 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 3 Mar 2017 17:56:55 +1100 Subject: [PATCH 088/384] crypto: powerpc - Fix initialisation of crc32c context Turning on crypto self-tests on a POWER8 shows: alg: hash: Test 1 failed for crc32c-vpmsum 00000000: ff ff ff ff Comparing the code with the Intel CRC32c implementation on which ours is based shows that we are doing an init with 0, not ~0 as CRC32c requires. This probably wasn't caught because btrfs does its own weird open-coded initialisation. Initialise our internal context to ~0 on init. This makes the self-tests pass, and btrfs continues to work. Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c") Cc: Anton Blanchard Cc: stable@vger.kernel.org Signed-off-by: Daniel Axtens Acked-by: Anton Blanchard Signed-off-by: Herbert Xu --- arch/powerpc/crypto/crc32c-vpmsum_glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index 9fa046d56eba..411994551afc 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -52,7 +52,7 @@ static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm) { u32 *key = crypto_tfm_ctx(tfm); - *key = 0; + *key = ~0; return 0; } From 07de4bc88ce6a4d898cad9aa4c99c1df7e87702d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 5 Mar 2017 19:14:07 +0200 Subject: [PATCH 089/384] crypto: s5p-sss - Fix completing crypto request in IRQ handler In a regular interrupt handler driver was finishing the crypt/decrypt request by calling complete on crypto request. This is disallowed since converting to skcipher in commit b286d8b1a690 ("crypto: skcipher - Add skcipher walk interface") and causes a warning: WARNING: CPU: 0 PID: 0 at crypto/skcipher.c:430 skcipher_walk_first+0x13c/0x14c The interrupt is marked shared but in fact there are no other users sharing it. Thus the simplest solution seems to be to just use a threaded interrupt handler, after converting it to oneshot. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- drivers/crypto/s5p-sss.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index dce1af0ce85c..a668286d62cb 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -805,8 +805,9 @@ static int s5p_aes_probe(struct platform_device *pdev) dev_warn(dev, "feed control interrupt is not available.\n"); goto err_irq; } - err = devm_request_irq(dev, pdata->irq_fc, s5p_aes_interrupt, - IRQF_SHARED, pdev->name, pdev); + err = devm_request_threaded_irq(dev, pdata->irq_fc, NULL, + s5p_aes_interrupt, IRQF_ONESHOT, + pdev->name, pdev); if (err < 0) { dev_warn(dev, "feed control interrupt is not available.\n"); goto err_irq; From 45c2fdde01299b02a6e3225e848598a3c1e55539 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 7 Mar 2017 15:14:46 +0100 Subject: [PATCH 090/384] hwrng: omap - write registers after enabling the clock Commit 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K") added support for the SafeXcel IP-76 variant of the IP. This modification included getting a reference and enabling a clock. Unfortunately, this was done *after* writing to the RNG_INTMASK_REG register. This generally works fine when the driver is built-in because the clock might have been left enabled by the bootloader, but fails short when the driver is built as a module: it causes a system hang because a register is being accessed while the clock is not enabled. This commit fixes that by making the register access *after* enabling the clock. This issue was found by the kernelci.org testing effort. Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap-rng.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index 3ad86fdf954e..efa3747c1750 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -397,7 +397,6 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv, irq, err); return err; } - omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK); priv->clk = of_clk_get(pdev->dev.of_node, 0); if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER) @@ -408,6 +407,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv, dev_err(&pdev->dev, "unable to enable the clk, " "err = %d\n", err); } + + omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK); } return 0; } From 761c2510283066324cab7859930777ee34cbca78 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 7 Mar 2017 15:14:47 +0100 Subject: [PATCH 091/384] hwrng: omap - use devm_clk_get() instead of of_clk_get() The omap-rng driver currently uses of_clk_get() to get a reference to the clock, but never releases that reference. This commit fixes that by using devm_clk_get() instead. Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap-rng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index efa3747c1750..d2866280f130 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -398,7 +398,7 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv, return err; } - priv->clk = of_clk_get(pdev->dev.of_node, 0); + priv->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER) return -EPROBE_DEFER; if (!IS_ERR(priv->clk)) { From b985735be7afea3a5e0570ce2ea0b662c0e12e19 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 7 Mar 2017 15:14:48 +0100 Subject: [PATCH 092/384] hwrng: omap - Do not access INTMASK_REG on EIP76 The INTMASK_REG register does not exist on EIP76. Due to this, the call: omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK); ends up, through the reg_map_eip76[] array, in accessing the register at offset 0, which is the RNG_OUTPUT_0_REG. This by itself doesn't cause any problem, but clearly doesn't enable the interrupt as it was expected. On EIP76, the register that allows to enable the interrupt is RNG_CONTROL_REG. And just like RNG_INTMASK_REG, it's bit 1 of this register that allows to enable the shutdown_oflo interrupt. Fixes: 383212425c926 ("hwrng: omap - Add device variant for SafeXcel IP-76 found in Armada 8K") Cc: Signed-off-by: Thomas Petazzoni Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap-rng.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index d2866280f130..b1ad12552b56 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -408,7 +408,18 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv, "err = %d\n", err); } - omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK); + /* + * On OMAP4, enabling the shutdown_oflo interrupt is + * done in the interrupt mask register. There is no + * such register on EIP76, and it's enabled by the + * same bit in the control register + */ + if (priv->pdata->regs[RNG_INTMASK_REG]) + omap_rng_write(priv, RNG_INTMASK_REG, + RNG_SHUTDOWN_OFLO_MASK); + else + omap_rng_write(priv, RNG_CONTROL_REG, + RNG_SHUTDOWN_OFLO_MASK); } return 0; } From a04e54f2c35823ca32d56afcd5cea5b783e2f51a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 3 Nov 2016 23:06:53 -0700 Subject: [PATCH 093/384] target/pscsi: Fix TYPE_TAPE + TYPE_MEDIMUM_CHANGER export The following fixes a divide by zero OOPs with TYPE_TAPE due to pscsi_tape_read_blocksize() failing causing a zero sd->sector_size being propigated up via dev_attrib.hw_block_size. It also fixes another long-standing bug where TYPE_TAPE and TYPE_MEDIMUM_CHANGER where using pscsi_create_type_other(), which does not call scsi_device_get() to take the device reference. Instead, rename pscsi_create_type_rom() to pscsi_create_type_nondisk() and use it for all cases. Finally, also drop a dump_stack() in pscsi_get_blocks() for non TYPE_DISK, which in modern target-core can get invoked via target_sense_desc_format() during CHECK_CONDITION. Reported-by: Malcolm Haak Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_pscsi.c | 47 ++++++++---------------------- 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index a8f8e53f2f57..44d92f23a3f0 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -154,7 +154,7 @@ static void pscsi_tape_read_blocksize(struct se_device *dev, buf = kzalloc(12, GFP_KERNEL); if (!buf) - return; + goto out_free; memset(cdb, 0, MAX_COMMAND_SIZE); cdb[0] = MODE_SENSE; @@ -169,9 +169,10 @@ static void pscsi_tape_read_blocksize(struct se_device *dev, * If MODE_SENSE still returns zero, set the default value to 1024. */ sdev->sector_size = (buf[9] << 16) | (buf[10] << 8) | (buf[11]); +out_free: if (!sdev->sector_size) sdev->sector_size = 1024; -out_free: + kfree(buf); } @@ -314,9 +315,10 @@ static int pscsi_add_device_to_list(struct se_device *dev, sd->lun, sd->queue_depth); } - dev->dev_attrib.hw_block_size = sd->sector_size; + dev->dev_attrib.hw_block_size = + min_not_zero((int)sd->sector_size, 512); dev->dev_attrib.hw_max_sectors = - min_t(int, sd->host->max_sectors, queue_max_hw_sectors(q)); + min_not_zero(sd->host->max_sectors, queue_max_hw_sectors(q)); dev->dev_attrib.hw_queue_depth = sd->queue_depth; /* @@ -339,8 +341,10 @@ static int pscsi_add_device_to_list(struct se_device *dev, /* * For TYPE_TAPE, attempt to determine blocksize with MODE_SENSE. */ - if (sd->type == TYPE_TAPE) + if (sd->type == TYPE_TAPE) { pscsi_tape_read_blocksize(dev, sd); + dev->dev_attrib.hw_block_size = sd->sector_size; + } return 0; } @@ -406,7 +410,7 @@ static int pscsi_create_type_disk(struct se_device *dev, struct scsi_device *sd) /* * Called with struct Scsi_Host->host_lock called. */ -static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd) +static int pscsi_create_type_nondisk(struct se_device *dev, struct scsi_device *sd) __releases(sh->host_lock) { struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; @@ -433,28 +437,6 @@ static int pscsi_create_type_rom(struct se_device *dev, struct scsi_device *sd) return 0; } -/* - * Called with struct Scsi_Host->host_lock called. - */ -static int pscsi_create_type_other(struct se_device *dev, - struct scsi_device *sd) - __releases(sh->host_lock) -{ - struct pscsi_hba_virt *phv = dev->se_hba->hba_ptr; - struct Scsi_Host *sh = sd->host; - int ret; - - spin_unlock_irq(sh->host_lock); - ret = pscsi_add_device_to_list(dev, sd); - if (ret) - return ret; - - pr_debug("CORE_PSCSI[%d] - Added Type: %s for %d:%d:%d:%llu\n", - phv->phv_host_id, scsi_device_type(sd->type), sh->host_no, - sd->channel, sd->id, sd->lun); - return 0; -} - static int pscsi_configure_device(struct se_device *dev) { struct se_hba *hba = dev->se_hba; @@ -542,11 +524,8 @@ static int pscsi_configure_device(struct se_device *dev) case TYPE_DISK: ret = pscsi_create_type_disk(dev, sd); break; - case TYPE_ROM: - ret = pscsi_create_type_rom(dev, sd); - break; default: - ret = pscsi_create_type_other(dev, sd); + ret = pscsi_create_type_nondisk(dev, sd); break; } @@ -611,8 +590,7 @@ static void pscsi_free_device(struct se_device *dev) else if (pdv->pdv_lld_host) scsi_host_put(pdv->pdv_lld_host); - if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM)) - scsi_device_put(sd); + scsi_device_put(sd); pdv->pdv_sd = NULL; } @@ -1064,7 +1042,6 @@ static sector_t pscsi_get_blocks(struct se_device *dev) if (pdv->pdv_bd && pdv->pdv_bd->bd_part) return pdv->pdv_bd->bd_part->nr_sects; - dump_stack(); return 0; } From 13603685c1f12c67a7a2427f00b63f39a2b6f7c9 Mon Sep 17 00:00:00 2001 From: Max Lohrmann Date: Tue, 7 Mar 2017 22:09:56 -0800 Subject: [PATCH 094/384] target: Fix VERIFY_16 handling in sbc_parse_cdb As reported by Max, the Windows 2008 R2 chkdsk utility expects VERIFY_16 to be supported, and does not handle the returned CHECK_CONDITION properly, resulting in an infinite loop. The kernel will log huge amounts of this error: kernel: TARGET_CORE[iSCSI]: Unsupported SCSI Opcode 0x8f, sending CHECK_CONDITION. Signed-off-by: Max Lohrmann Cc: Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_sbc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 68d8aef7ab78..c194063f169b 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1105,9 +1105,15 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) return ret; break; case VERIFY: + case VERIFY_16: size = 0; - sectors = transport_get_sectors_10(cdb); - cmd->t_task_lba = transport_lba_32(cdb); + if (cdb[0] == VERIFY) { + sectors = transport_get_sectors_10(cdb); + cmd->t_task_lba = transport_lba_32(cdb); + } else { + sectors = transport_get_sectors_16(cdb); + cmd->t_task_lba = transport_lba_64(cdb); + } cmd->execute_cmd = sbc_emulate_noop; goto check_lba; case REZERO_UNIT: From f04d108029063a8a67528a88449c412aecf4d3d8 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 20 Feb 2017 19:26:30 +0530 Subject: [PATCH 095/384] powerpc/perf: Fix perf_get_data_addr() for power9 DD1 Power9 DD1 do not support PMU_HAS_SIER flag and sdsync in perf_get_data_addr() defaults to MMCRA_SDSYNC which is wrong. Since power9 MMCRA does not support SDSYNC bit, patch includes PPMU_NO_SIAR flag to the check and set the sdsync with MMCRA_SAMPLE_ENABLE; Fixes: 27593d72c4ad ("powerpc/perf: Use MSR to report privilege level on P9 DD1") Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/core-book3s.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 595dd718ea87..2ff13249f87a 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -188,6 +188,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) sdsync = POWER7P_MMCRA_SDAR_VALID; else if (ppmu->flags & PPMU_ALT_SIPR) sdsync = POWER6_MMCRA_SDSYNC; + else if (ppmu->flags & PPMU_NO_SIAR) + sdsync = MMCRA_SAMPLE_ENABLE; else sdsync = MMCRA_SDSYNC; From 78b4416aa249365dd3c1b64da4d3a232014320b0 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Mon, 20 Feb 2017 19:29:03 +0530 Subject: [PATCH 096/384] powerpc/perf: Handle sdar_mode for marked event in power9 MMCRA[SDAR_MODE] specifices how the SDAR should be updated in continous sampling mode. On P9 it must be set to 0b00 when MMCRA[63] is set. Fixes: c7c3f568beff2 ('powerpc/perf: macros for power9 format encoding') Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/perf/isa207-common.c | 43 ++++++++++++++++++++++++++----- arch/powerpc/perf/isa207-common.h | 1 + 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index e79fb5fb817d..cd951fd231c4 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -65,12 +65,41 @@ static bool is_event_valid(u64 event) return !(event & ~valid_mask); } -static u64 mmcra_sdar_mode(u64 event) +static inline bool is_event_marked(u64 event) { - if (cpu_has_feature(CPU_FTR_ARCH_300) && !cpu_has_feature(CPU_FTR_POWER9_DD1)) - return p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + if (event & EVENT_IS_MARKED) + return true; - return MMCRA_SDAR_MODE_TLB; + return false; +} + +static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) +{ + /* + * MMCRA[SDAR_MODE] specifices how the SDAR should be updated in + * continous sampling mode. + * + * Incase of Power8: + * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling + * mode and will be un-changed when setting MMCRA[63] (Marked events). + * + * Incase of Power9: + * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'), + * or if group already have any marked events. + * Non-Marked events (for DD1): + * MMCRA[SDAR_MODE] will be set to 0b01 + * For rest + * MMCRA[SDAR_MODE] will be set from event code. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) + *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; + else if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) + *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; + else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + *mmcra |= MMCRA_SDAR_MODE_TLB; + } else + *mmcra |= MMCRA_SDAR_MODE_TLB; } static u64 thresh_cmp_val(u64 value) @@ -180,7 +209,7 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) value |= CNST_L1_QUAL_VAL(cache); } - if (event & EVENT_IS_MARKED) { + if (is_event_marked(event)) { mask |= CNST_SAMPLE_MASK; value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT); } @@ -276,7 +305,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, } /* In continuous sampling mode, update SDAR on TLB miss */ - mmcra |= mmcra_sdar_mode(event[i]); + mmcra_sdar_mode(event[i], &mmcra); if (event[i] & EVENT_IS_L1) { cache = event[i] >> EVENT_CACHE_SEL_SHIFT; @@ -285,7 +314,7 @@ int isa207_compute_mmcr(u64 event[], int n_ev, mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT; } - if (event[i] & EVENT_IS_MARKED) { + if (is_event_marked(event[i])) { mmcra |= MMCRA_SAMPLE_ENABLE; val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK; diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index cf9bd8990159..899210f14ee4 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -246,6 +246,7 @@ #define MMCRA_THR_CMP_SHIFT 32 #define MMCRA_SDAR_MODE_SHIFT 42 #define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT) +#define MMCRA_SDAR_MODE_NO_UPDATES ~(0x3ull << MMCRA_SDAR_MODE_SHIFT) #define MMCRA_IFM_SHIFT 30 /* MMCR1 Threshold Compare bit constant for power9 */ From 605df8d674ac65e044a0bf4998b28c2f350b7f9e Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Tue, 7 Mar 2017 11:39:31 +1100 Subject: [PATCH 097/384] selftests/powerpc: Replace stxvx and lxvx with stxvd2x/lxvd2x On POWER8 (ISA 2.07) lxvx and stxvx are defined to be extended mnemonics of lxvd2x and stxvd2x. For POWER9 (ISA 3.0) the HW architects in their infinite wisdom made lxvx and stxvx instructions in their own right. POWER9 aware GCC will use the POWER9 instruction for lxvx and stxvx causing these selftests to fail on POWER8. Further compounding the issue, because of the way -mvsx works it will cause the power9 instructions to be used regardless of -mcpu=power8 to GCC or -mpower8 to AS. The safest way to address the problem for now is to not use the extended mnemonic. We don't care how the CPU loads the values from memory since the tests only performs register comparisons, so using stdvd2x/lxvd2x does not impact the test. Signed-off-by: Cyril Bur Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- .../selftests/powerpc/include/vsx_asm.h | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h index d828bfb6ef2d..54064ced9e95 100644 --- a/tools/testing/selftests/powerpc/include/vsx_asm.h +++ b/tools/testing/selftests/powerpc/include/vsx_asm.h @@ -16,56 +16,56 @@ */ FUNC_START(load_vsx) li r5,0 - lxvx vs20,r5,r3 + lxvd2x vs20,r5,r3 addi r5,r5,16 - lxvx vs21,r5,r3 + lxvd2x vs21,r5,r3 addi r5,r5,16 - lxvx vs22,r5,r3 + lxvd2x vs22,r5,r3 addi r5,r5,16 - lxvx vs23,r5,r3 + lxvd2x vs23,r5,r3 addi r5,r5,16 - lxvx vs24,r5,r3 + lxvd2x vs24,r5,r3 addi r5,r5,16 - lxvx vs25,r5,r3 + lxvd2x vs25,r5,r3 addi r5,r5,16 - lxvx vs26,r5,r3 + lxvd2x vs26,r5,r3 addi r5,r5,16 - lxvx vs27,r5,r3 + lxvd2x vs27,r5,r3 addi r5,r5,16 - lxvx vs28,r5,r3 + lxvd2x vs28,r5,r3 addi r5,r5,16 - lxvx vs29,r5,r3 + lxvd2x vs29,r5,r3 addi r5,r5,16 - lxvx vs30,r5,r3 + lxvd2x vs30,r5,r3 addi r5,r5,16 - lxvx vs31,r5,r3 + lxvd2x vs31,r5,r3 blr FUNC_END(load_vsx) FUNC_START(store_vsx) li r5,0 - stxvx vs20,r5,r3 + stxvd2x vs20,r5,r3 addi r5,r5,16 - stxvx vs21,r5,r3 + stxvd2x vs21,r5,r3 addi r5,r5,16 - stxvx vs22,r5,r3 + stxvd2x vs22,r5,r3 addi r5,r5,16 - stxvx vs23,r5,r3 + stxvd2x vs23,r5,r3 addi r5,r5,16 - stxvx vs24,r5,r3 + stxvd2x vs24,r5,r3 addi r5,r5,16 - stxvx vs25,r5,r3 + stxvd2x vs25,r5,r3 addi r5,r5,16 - stxvx vs26,r5,r3 + stxvd2x vs26,r5,r3 addi r5,r5,16 - stxvx vs27,r5,r3 + stxvd2x vs27,r5,r3 addi r5,r5,16 - stxvx vs28,r5,r3 + stxvd2x vs28,r5,r3 addi r5,r5,16 - stxvx vs29,r5,r3 + stxvd2x vs29,r5,r3 addi r5,r5,16 - stxvx vs30,r5,r3 + stxvd2x vs30,r5,r3 addi r5,r5,16 - stxvx vs31,r5,r3 + stxvd2x vs31,r5,r3 blr FUNC_END(store_vsx) From b78125e00fce0a858c7827dd47ce500320d6d0f7 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Tue, 28 Feb 2017 23:49:38 +0100 Subject: [PATCH 098/384] net: smsc: smc911x: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc911x.c | 49 +++++++++++++++-------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 4f19c6166182..36307d34f641 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1446,40 +1446,40 @@ static int smc911x_close(struct net_device *dev) * Ethtool support */ static int -smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) +smc911x_ethtool_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct smc911x_local *lp = netdev_priv(dev); int ret, status; unsigned long flags; + u32 supported; DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); - cmd->maxtxpkt = 1; - cmd->maxrxpkt = 1; if (lp->phy_type != 0) { spin_lock_irqsave(&lp->lock, flags); - ret = mii_ethtool_gset(&lp->mii, cmd); + ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd); spin_unlock_irqrestore(&lp->lock, flags); } else { - cmd->supported = SUPPORTED_10baseT_Half | + supported = SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_TP | SUPPORTED_AUI; if (lp->ctl_rspeed == 10) - ethtool_cmd_speed_set(cmd, SPEED_10); + cmd->base.speed = SPEED_10; else if (lp->ctl_rspeed == 100) - ethtool_cmd_speed_set(cmd, SPEED_100); + cmd->base.speed = SPEED_100; - cmd->autoneg = AUTONEG_DISABLE; - if (lp->mii.phy_id==1) - cmd->transceiver = XCVR_INTERNAL; - else - cmd->transceiver = XCVR_EXTERNAL; - cmd->port = 0; + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.port = 0; SMC_GET_PHY_SPECIAL(lp, lp->mii.phy_id, status); - cmd->duplex = + cmd->base.duplex = (status & (PHY_SPECIAL_SPD_10FULL_ | PHY_SPECIAL_SPD_100FULL_)) ? DUPLEX_FULL : DUPLEX_HALF; + + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); + ret = 0; } @@ -1487,7 +1487,8 @@ smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) } static int -smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) +smc911x_ethtool_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct smc911x_local *lp = netdev_priv(dev); int ret; @@ -1495,16 +1496,18 @@ smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) if (lp->phy_type != 0) { spin_lock_irqsave(&lp->lock, flags); - ret = mii_ethtool_sset(&lp->mii, cmd); + ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd); spin_unlock_irqrestore(&lp->lock, flags); } else { - if (cmd->autoneg != AUTONEG_DISABLE || - cmd->speed != SPEED_10 || - (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) || - (cmd->port != PORT_TP && cmd->port != PORT_AUI)) + if (cmd->base.autoneg != AUTONEG_DISABLE || + cmd->base.speed != SPEED_10 || + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL) || + (cmd->base.port != PORT_TP && + cmd->base.port != PORT_AUI)) return -EINVAL; - lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL; + lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL; ret = 0; } @@ -1686,8 +1689,6 @@ static int smc911x_ethtool_geteeprom_len(struct net_device *dev) } static const struct ethtool_ops smc911x_ethtool_ops = { - .get_settings = smc911x_ethtool_getsettings, - .set_settings = smc911x_ethtool_setsettings, .get_drvinfo = smc911x_ethtool_getdrvinfo, .get_msglevel = smc911x_ethtool_getmsglevel, .set_msglevel = smc911x_ethtool_setmsglevel, @@ -1698,6 +1699,8 @@ static const struct ethtool_ops smc911x_ethtool_ops = { .get_eeprom_len = smc911x_ethtool_geteeprom_len, .get_eeprom = smc911x_ethtool_geteeprom, .set_eeprom = smc911x_ethtool_seteeprom, + .get_link_ksettings = smc911x_ethtool_get_link_ksettings, + .set_link_ksettings = smc911x_ethtool_set_link_ksettings, }; /* From 7b022a1b706f7684341e285e627a74a98e730301 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 4 Mar 2017 12:42:39 +0100 Subject: [PATCH 099/384] net: smsc: smc91x: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Tested-by: Robert Jarzmik Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc91x.c | 47 ++++++++++++++++-------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 65077c77082a..91e9bd7159ab 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -1535,32 +1535,33 @@ static int smc_close(struct net_device *dev) * Ethtool support */ static int -smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) +smc_ethtool_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct smc_local *lp = netdev_priv(dev); int ret; - cmd->maxtxpkt = 1; - cmd->maxrxpkt = 1; - if (lp->phy_type != 0) { spin_lock_irq(&lp->lock); - ret = mii_ethtool_gset(&lp->mii, cmd); + ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd); spin_unlock_irq(&lp->lock); } else { - cmd->supported = SUPPORTED_10baseT_Half | + u32 supported = SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_TP | SUPPORTED_AUI; if (lp->ctl_rspeed == 10) - ethtool_cmd_speed_set(cmd, SPEED_10); + cmd->base.speed = SPEED_10; else if (lp->ctl_rspeed == 100) - ethtool_cmd_speed_set(cmd, SPEED_100); + cmd->base.speed = SPEED_100; - cmd->autoneg = AUTONEG_DISABLE; - cmd->transceiver = XCVR_INTERNAL; - cmd->port = 0; - cmd->duplex = lp->tcr_cur_mode & TCR_SWFDUP ? DUPLEX_FULL : DUPLEX_HALF; + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.port = 0; + cmd->base.duplex = lp->tcr_cur_mode & TCR_SWFDUP ? + DUPLEX_FULL : DUPLEX_HALF; + + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); ret = 0; } @@ -1569,24 +1570,26 @@ smc_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) } static int -smc_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) +smc_ethtool_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct smc_local *lp = netdev_priv(dev); int ret; if (lp->phy_type != 0) { spin_lock_irq(&lp->lock); - ret = mii_ethtool_sset(&lp->mii, cmd); + ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd); spin_unlock_irq(&lp->lock); } else { - if (cmd->autoneg != AUTONEG_DISABLE || - cmd->speed != SPEED_10 || - (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) || - (cmd->port != PORT_TP && cmd->port != PORT_AUI)) + if (cmd->base.autoneg != AUTONEG_DISABLE || + cmd->base.speed != SPEED_10 || + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL) || + (cmd->base.port != PORT_TP && cmd->base.port != PORT_AUI)) return -EINVAL; -// lp->port = cmd->port; - lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL; +// lp->port = cmd->base.port; + lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL; // if (netif_running(dev)) // smc_set_port(dev); @@ -1744,8 +1747,6 @@ static int smc_ethtool_seteeprom(struct net_device *dev, static const struct ethtool_ops smc_ethtool_ops = { - .get_settings = smc_ethtool_getsettings, - .set_settings = smc_ethtool_setsettings, .get_drvinfo = smc_ethtool_getdrvinfo, .get_msglevel = smc_ethtool_getmsglevel, @@ -1755,6 +1756,8 @@ static const struct ethtool_ops smc_ethtool_ops = { .get_eeprom_len = smc_ethtool_geteeprom_len, .get_eeprom = smc_ethtool_geteeprom, .set_eeprom = smc_ethtool_seteeprom, + .get_link_ksettings = smc_ethtool_get_link_ksettings, + .set_link_ksettings = smc_ethtool_set_link_ksettings, }; static const struct net_device_ops smc_netdev_ops = { From 15883a43af0bcd10b3f3173bca4a0e60518bc154 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 4 Mar 2017 16:16:12 +0100 Subject: [PATCH 100/384] net: sun: cassini: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/cassini.c | 98 ++++++++++++++++-------------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 0e8e89f17dbb..382993c1561c 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -691,7 +691,8 @@ static void cas_mif_poll(struct cas *cp, const int enable) } /* Must be invoked under cp->lock */ -static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep) +static void cas_begin_auto_negotiation(struct cas *cp, + const struct ethtool_link_ksettings *ep) { u16 ctl; #if 1 @@ -704,16 +705,16 @@ static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep) if (!ep) goto start_aneg; lcntl = cp->link_cntl; - if (ep->autoneg == AUTONEG_ENABLE) + if (ep->base.autoneg == AUTONEG_ENABLE) { cp->link_cntl = BMCR_ANENABLE; - else { - u32 speed = ethtool_cmd_speed(ep); + } else { + u32 speed = ep->base.speed; cp->link_cntl = 0; if (speed == SPEED_100) cp->link_cntl |= BMCR_SPEED100; else if (speed == SPEED_1000) cp->link_cntl |= CAS_BMCR_SPEED1000; - if (ep->duplex == DUPLEX_FULL) + if (ep->base.duplex == DUPLEX_FULL) cp->link_cntl |= BMCR_FULLDPLX; } #if 1 @@ -4528,19 +4529,21 @@ static void cas_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info strlcpy(info->bus_info, pci_name(cp->pdev), sizeof(info->bus_info)); } -static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int cas_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct cas *cp = netdev_priv(dev); u16 bmcr; int full_duplex, speed, pause; unsigned long flags; enum link_state linkstate = link_up; + u32 supported, advertising; - cmd->advertising = 0; - cmd->supported = SUPPORTED_Autoneg; + advertising = 0; + supported = SUPPORTED_Autoneg; if (cp->cas_flags & CAS_FLAG_1000MB_CAP) { - cmd->supported |= SUPPORTED_1000baseT_Full; - cmd->advertising |= ADVERTISED_1000baseT_Full; + supported |= SUPPORTED_1000baseT_Full; + advertising |= ADVERTISED_1000baseT_Full; } /* Record PHY settings if HW is on. */ @@ -4548,17 +4551,15 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) bmcr = 0; linkstate = cp->lstate; if (CAS_PHY_MII(cp->phy_type)) { - cmd->port = PORT_MII; - cmd->transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ? - XCVR_INTERNAL : XCVR_EXTERNAL; - cmd->phy_address = cp->phy_addr; - cmd->advertising |= ADVERTISED_TP | ADVERTISED_MII | + cmd->base.port = PORT_MII; + cmd->base.phy_address = cp->phy_addr; + advertising |= ADVERTISED_TP | ADVERTISED_MII | ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; - cmd->supported |= + supported |= (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | @@ -4574,11 +4575,10 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) } } else { - cmd->port = PORT_FIBRE; - cmd->transceiver = XCVR_INTERNAL; - cmd->phy_address = 0; - cmd->supported |= SUPPORTED_FIBRE; - cmd->advertising |= ADVERTISED_FIBRE; + cmd->base.port = PORT_FIBRE; + cmd->base.phy_address = 0; + supported |= SUPPORTED_FIBRE; + advertising |= ADVERTISED_FIBRE; if (cp->hw_running) { /* pcs uses the same bits as mii */ @@ -4590,21 +4590,20 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) spin_unlock_irqrestore(&cp->lock, flags); if (bmcr & BMCR_ANENABLE) { - cmd->advertising |= ADVERTISED_Autoneg; - cmd->autoneg = AUTONEG_ENABLE; - ethtool_cmd_speed_set(cmd, ((speed == 10) ? + advertising |= ADVERTISED_Autoneg; + cmd->base.autoneg = AUTONEG_ENABLE; + cmd->base.speed = ((speed == 10) ? SPEED_10 : ((speed == 1000) ? - SPEED_1000 : SPEED_100))); - cmd->duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF; + SPEED_1000 : SPEED_100)); + cmd->base.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF; } else { - cmd->autoneg = AUTONEG_DISABLE; - ethtool_cmd_speed_set(cmd, ((bmcr & CAS_BMCR_SPEED1000) ? + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.speed = ((bmcr & CAS_BMCR_SPEED1000) ? SPEED_1000 : ((bmcr & BMCR_SPEED100) ? - SPEED_100 : SPEED_10))); - cmd->duplex = - (bmcr & BMCR_FULLDPLX) ? + SPEED_100 : SPEED_10)); + cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; } if (linkstate != link_up) { @@ -4619,39 +4618,46 @@ static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) * settings that we configured. */ if (cp->link_cntl & BMCR_ANENABLE) { - ethtool_cmd_speed_set(cmd, 0); - cmd->duplex = 0xff; + cmd->base.speed = 0; + cmd->base.duplex = 0xff; } else { - ethtool_cmd_speed_set(cmd, SPEED_10); + cmd->base.speed = SPEED_10; if (cp->link_cntl & BMCR_SPEED100) { - ethtool_cmd_speed_set(cmd, SPEED_100); + cmd->base.speed = SPEED_100; } else if (cp->link_cntl & CAS_BMCR_SPEED1000) { - ethtool_cmd_speed_set(cmd, SPEED_1000); + cmd->base.speed = SPEED_1000; } - cmd->duplex = (cp->link_cntl & BMCR_FULLDPLX)? + cmd->base.duplex = (cp->link_cntl & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; } } + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + return 0; } -static int cas_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int cas_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct cas *cp = netdev_priv(dev); unsigned long flags; - u32 speed = ethtool_cmd_speed(cmd); + u32 speed = cmd->base.speed; /* Verify the settings we care about. */ - if (cmd->autoneg != AUTONEG_ENABLE && - cmd->autoneg != AUTONEG_DISABLE) + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->autoneg == AUTONEG_DISABLE && + if (cmd->base.autoneg == AUTONEG_DISABLE && ((speed != SPEED_1000 && speed != SPEED_100 && speed != SPEED_10) || - (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL))) + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL))) return -EINVAL; /* Apply settings and restart link process. */ @@ -4753,8 +4759,6 @@ static void cas_get_ethtool_stats(struct net_device *dev, static const struct ethtool_ops cas_ethtool_ops = { .get_drvinfo = cas_get_drvinfo, - .get_settings = cas_get_settings, - .set_settings = cas_set_settings, .nway_reset = cas_nway_reset, .get_link = cas_get_link, .get_msglevel = cas_get_msglevel, @@ -4764,6 +4768,8 @@ static const struct ethtool_ops cas_ethtool_ops = { .get_sset_count = cas_get_sset_count, .get_strings = cas_get_strings, .get_ethtool_stats = cas_get_ethtool_stats, + .get_link_ksettings = cas_get_link_ksettings, + .set_link_ksettings = cas_set_link_ksettings, }; static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) From 56c07e9501e875883584c00fe6977c3addb1f873 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 4 Mar 2017 17:50:06 +0100 Subject: [PATCH 101/384] net: sun: niu: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 37 ++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 57978056b336..2dcca249eb9c 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6813,7 +6813,8 @@ static void niu_get_drvinfo(struct net_device *dev, sizeof(info->bus_info)); } -static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int niu_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct niu *np = netdev_priv(dev); struct niu_link_config *lp; @@ -6821,28 +6822,30 @@ static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) lp = &np->link_config; memset(cmd, 0, sizeof(*cmd)); - cmd->phy_address = np->phy_addr; - cmd->supported = lp->supported; - cmd->advertising = lp->active_advertising; - cmd->autoneg = lp->active_autoneg; - ethtool_cmd_speed_set(cmd, lp->active_speed); - cmd->duplex = lp->active_duplex; - cmd->port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP; - cmd->transceiver = (np->flags & NIU_FLAGS_XCVR_SERDES) ? - XCVR_EXTERNAL : XCVR_INTERNAL; + cmd->base.phy_address = np->phy_addr; + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + lp->supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + lp->active_advertising); + cmd->base.autoneg = lp->active_autoneg; + cmd->base.speed = lp->active_speed; + cmd->base.duplex = lp->active_duplex; + cmd->base.port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP; return 0; } -static int niu_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int niu_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct niu *np = netdev_priv(dev); struct niu_link_config *lp = &np->link_config; - lp->advertising = cmd->advertising; - lp->speed = ethtool_cmd_speed(cmd); - lp->duplex = cmd->duplex; - lp->autoneg = cmd->autoneg; + ethtool_convert_link_mode_to_legacy_u32(&lp->advertising, + cmd->link_modes.advertising); + lp->speed = cmd->base.speed; + lp->duplex = cmd->base.duplex; + lp->autoneg = cmd->base.autoneg; return niu_init_link(np); } @@ -7902,14 +7905,14 @@ static const struct ethtool_ops niu_ethtool_ops = { .nway_reset = niu_nway_reset, .get_eeprom_len = niu_get_eeprom_len, .get_eeprom = niu_get_eeprom, - .get_settings = niu_get_settings, - .set_settings = niu_set_settings, .get_strings = niu_get_strings, .get_sset_count = niu_get_sset_count, .get_ethtool_stats = niu_get_ethtool_stats, .set_phys_id = niu_set_phys_id, .get_rxnfc = niu_get_nfc, .set_rxnfc = niu_set_nfc, + .get_link_ksettings = niu_get_link_ksettings, + .set_link_ksettings = niu_set_link_ksettings, }; static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent, From 9dff2defef3ce1933a44d59ec139987e19645841 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 5 Mar 2017 00:04:18 +0100 Subject: [PATCH 102/384] net: sun: sungem: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sungem.c | 98 ++++++++++++++++++------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 5c5952e782cd..dbfca0466760 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -1250,12 +1250,17 @@ static void gem_stop_dma(struct gem *gp) // XXX dbl check what that function should do when called on PCS PHY -static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep) +static void gem_begin_auto_negotiation(struct gem *gp, + const struct ethtool_link_ksettings *ep) { u32 advertise, features; int autoneg; int speed; int duplex; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + ep->link_modes.advertising); if (gp->phy_type != phy_mii_mdio0 && gp->phy_type != phy_mii_mdio1) @@ -1278,13 +1283,13 @@ static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep) /* Setup link parameters */ if (!ep) goto start_aneg; - if (ep->autoneg == AUTONEG_ENABLE) { - advertise = ep->advertising; + if (ep->base.autoneg == AUTONEG_ENABLE) { + advertise = advertising; autoneg = 1; } else { autoneg = 0; - speed = ethtool_cmd_speed(ep); - duplex = ep->duplex; + speed = ep->base.speed; + duplex = ep->base.duplex; } start_aneg: @@ -2515,85 +2520,96 @@ static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info strlcpy(info->bus_info, pci_name(gp->pdev), sizeof(info->bus_info)); } -static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int gem_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct gem *gp = netdev_priv(dev); + u32 supported, advertising; if (gp->phy_type == phy_mii_mdio0 || gp->phy_type == phy_mii_mdio1) { if (gp->phy_mii.def) - cmd->supported = gp->phy_mii.def->features; + supported = gp->phy_mii.def->features; else - cmd->supported = (SUPPORTED_10baseT_Half | + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full); /* XXX hardcoded stuff for now */ - cmd->port = PORT_MII; - cmd->transceiver = XCVR_EXTERNAL; - cmd->phy_address = 0; /* XXX fixed PHYAD */ + cmd->base.port = PORT_MII; + cmd->base.phy_address = 0; /* XXX fixed PHYAD */ /* Return current PHY settings */ - cmd->autoneg = gp->want_autoneg; - ethtool_cmd_speed_set(cmd, gp->phy_mii.speed); - cmd->duplex = gp->phy_mii.duplex; - cmd->advertising = gp->phy_mii.advertising; + cmd->base.autoneg = gp->want_autoneg; + cmd->base.speed = gp->phy_mii.speed; + cmd->base.duplex = gp->phy_mii.duplex; + advertising = gp->phy_mii.advertising; /* If we started with a forced mode, we don't have a default * advertise set, we need to return something sensible so * userland can re-enable autoneg properly. */ - if (cmd->advertising == 0) - cmd->advertising = cmd->supported; + if (advertising == 0) + advertising = supported; } else { // XXX PCS ? - cmd->supported = + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg); - cmd->advertising = cmd->supported; - ethtool_cmd_speed_set(cmd, 0); - cmd->duplex = cmd->port = cmd->phy_address = - cmd->transceiver = cmd->autoneg = 0; + advertising = supported; + cmd->base.speed = 0; + cmd->base.duplex = 0; + cmd->base.port = 0; + cmd->base.phy_address = 0; + cmd->base.autoneg = 0; /* serdes means usually a Fibre connector, with most fixed */ if (gp->phy_type == phy_serdes) { - cmd->port = PORT_FIBRE; - cmd->supported = (SUPPORTED_1000baseT_Half | + cmd->base.port = PORT_FIBRE; + supported = (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | SUPPORTED_FIBRE | SUPPORTED_Autoneg | SUPPORTED_Pause | SUPPORTED_Asym_Pause); - cmd->advertising = cmd->supported; - cmd->transceiver = XCVR_INTERNAL; + advertising = supported; if (gp->lstate == link_up) - ethtool_cmd_speed_set(cmd, SPEED_1000); - cmd->duplex = DUPLEX_FULL; - cmd->autoneg = 1; + cmd->base.speed = SPEED_1000; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.autoneg = 1; } } - cmd->maxtxpkt = cmd->maxrxpkt = 0; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); return 0; } -static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int gem_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct gem *gp = netdev_priv(dev); - u32 speed = ethtool_cmd_speed(cmd); + u32 speed = cmd->base.speed; + u32 advertising; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); /* Verify the settings we care about. */ - if (cmd->autoneg != AUTONEG_ENABLE && - cmd->autoneg != AUTONEG_DISABLE) + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->autoneg == AUTONEG_ENABLE && - cmd->advertising == 0) + if (cmd->base.autoneg == AUTONEG_ENABLE && + advertising == 0) return -EINVAL; - if (cmd->autoneg == AUTONEG_DISABLE && + if (cmd->base.autoneg == AUTONEG_DISABLE && ((speed != SPEED_1000 && speed != SPEED_100 && speed != SPEED_10) || - (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL))) + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL))) return -EINVAL; /* Apply settings and restart link process. */ @@ -2666,13 +2682,13 @@ static int gem_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static const struct ethtool_ops gem_ethtool_ops = { .get_drvinfo = gem_get_drvinfo, .get_link = ethtool_op_get_link, - .get_settings = gem_get_settings, - .set_settings = gem_set_settings, .nway_reset = gem_nway_reset, .get_msglevel = gem_get_msglevel, .set_msglevel = gem_set_msglevel, .get_wol = gem_get_wol, .set_wol = gem_set_wol, + .get_link_ksettings = gem_get_link_ksettings, + .set_link_ksettings = gem_set_link_ksettings, }; static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) From 8103015df56d97cc3a4167d2e070ee14cc022bae Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 5 Mar 2017 22:25:39 +0100 Subject: [PATCH 103/384] net: sun: sunhme: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunhme.c | 62 +++++++++++++++++-------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 72ff05cd3ed8..53ff66ef53ac 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1294,9 +1294,10 @@ static void happy_meal_init_rings(struct happy_meal *hp) } /* hp->happy_lock must be held */ -static void happy_meal_begin_auto_negotiation(struct happy_meal *hp, - void __iomem *tregs, - struct ethtool_cmd *ep) +static void +happy_meal_begin_auto_negotiation(struct happy_meal *hp, + void __iomem *tregs, + const struct ethtool_link_ksettings *ep) { int timeout; @@ -1309,7 +1310,7 @@ static void happy_meal_begin_auto_negotiation(struct happy_meal *hp, /* XXX Check BMSR_ANEGCAPABLE, should not be necessary though. */ hp->sw_advertise = happy_meal_tcvr_read(hp, tregs, MII_ADVERTISE); - if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) { + if (!ep || ep->base.autoneg == AUTONEG_ENABLE) { /* Advertise everything we can support. */ if (hp->sw_bmsr & BMSR_10HALF) hp->sw_advertise |= (ADVERTISE_10HALF); @@ -1384,14 +1385,14 @@ static void happy_meal_begin_auto_negotiation(struct happy_meal *hp, /* Disable auto-negotiation in BMCR, enable the duplex and * speed setting, init the timer state machine, and fire it off. */ - if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) { + if (!ep || ep->base.autoneg == AUTONEG_ENABLE) { hp->sw_bmcr = BMCR_SPEED100; } else { - if (ethtool_cmd_speed(ep) == SPEED_100) + if (ep->base.speed == SPEED_100) hp->sw_bmcr = BMCR_SPEED100; else hp->sw_bmcr = 0; - if (ep->duplex == DUPLEX_FULL) + if (ep->base.duplex == DUPLEX_FULL) hp->sw_bmcr |= BMCR_FULLDPLX; } happy_meal_tcvr_write(hp, tregs, MII_BMCR, hp->sw_bmcr); @@ -2434,20 +2435,21 @@ static void happy_meal_set_multicast(struct net_device *dev) } /* Ethtool support... */ -static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int hme_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct happy_meal *hp = netdev_priv(dev); u32 speed; + u32 supported; - cmd->supported = + supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); /* XXX hardcoded stuff for now */ - cmd->port = PORT_TP; /* XXX no MII support */ - cmd->transceiver = XCVR_INTERNAL; /* XXX no external xcvr support */ - cmd->phy_address = 0; /* XXX fixed PHYAD */ + cmd->base.port = PORT_TP; /* XXX no MII support */ + cmd->base.phy_address = 0; /* XXX fixed PHYAD */ /* Record PHY settings. */ spin_lock_irq(&hp->happy_lock); @@ -2456,41 +2458,45 @@ static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) spin_unlock_irq(&hp->happy_lock); if (hp->sw_bmcr & BMCR_ANENABLE) { - cmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; speed = ((hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) ? SPEED_100 : SPEED_10); if (speed == SPEED_100) - cmd->duplex = + cmd->base.duplex = (hp->sw_lpa & (LPA_100FULL)) ? DUPLEX_FULL : DUPLEX_HALF; else - cmd->duplex = + cmd->base.duplex = (hp->sw_lpa & (LPA_10FULL)) ? DUPLEX_FULL : DUPLEX_HALF; } else { - cmd->autoneg = AUTONEG_DISABLE; + cmd->base.autoneg = AUTONEG_DISABLE; speed = (hp->sw_bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10; - cmd->duplex = + cmd->base.duplex = (hp->sw_bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; } - ethtool_cmd_speed_set(cmd, speed); + cmd->base.speed = speed; + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + return 0; } -static int hme_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int hme_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct happy_meal *hp = netdev_priv(dev); /* Verify the settings we care about. */ - if (cmd->autoneg != AUTONEG_ENABLE && - cmd->autoneg != AUTONEG_DISABLE) + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->autoneg == AUTONEG_DISABLE && - ((ethtool_cmd_speed(cmd) != SPEED_100 && - ethtool_cmd_speed(cmd) != SPEED_10) || - (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL))) + if (cmd->base.autoneg == AUTONEG_DISABLE && + ((cmd->base.speed != SPEED_100 && + cmd->base.speed != SPEED_10) || + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL))) return -EINVAL; /* Ok, do it to it. */ @@ -2537,10 +2543,10 @@ static u32 hme_get_link(struct net_device *dev) } static const struct ethtool_ops hme_ethtool_ops = { - .get_settings = hme_get_settings, - .set_settings = hme_set_settings, .get_drvinfo = hme_get_drvinfo, .get_link = hme_get_link, + .get_link_ksettings = hme_get_link_ksettings, + .set_link_ksettings = hme_set_link_ksettings, }; static int hme_version_printed; From f441df6b9ff41e1af7289b69d5ce379053f900a5 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 5 Mar 2017 23:21:06 +0100 Subject: [PATCH 104/384] net: toshiba: ps3_genic_net: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Tested-by: Geoff Levand Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_net.c | 51 +++++++++++--------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 72013314bba8..fa6a06571187 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1206,61 +1206,68 @@ void gelic_net_get_drvinfo(struct net_device *netdev, strlcpy(info->version, DRV_VERSION, sizeof(info->version)); } -static int gelic_ether_get_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int gelic_ether_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct gelic_card *card = netdev_card(netdev); + u32 supported, advertising; gelic_card_get_ether_port_status(card, 0); if (card->ether_port_status & GELIC_LV1_ETHER_FULL_DUPLEX) - cmd->duplex = DUPLEX_FULL; + cmd->base.duplex = DUPLEX_FULL; else - cmd->duplex = DUPLEX_HALF; + cmd->base.duplex = DUPLEX_HALF; switch (card->ether_port_status & GELIC_LV1_ETHER_SPEED_MASK) { case GELIC_LV1_ETHER_SPEED_10: - ethtool_cmd_speed_set(cmd, SPEED_10); + cmd->base.speed = SPEED_10; break; case GELIC_LV1_ETHER_SPEED_100: - ethtool_cmd_speed_set(cmd, SPEED_100); + cmd->base.speed = SPEED_100; break; case GELIC_LV1_ETHER_SPEED_1000: - ethtool_cmd_speed_set(cmd, SPEED_1000); + cmd->base.speed = SPEED_1000; break; default: pr_info("%s: speed unknown\n", __func__); - ethtool_cmd_speed_set(cmd, SPEED_10); + cmd->base.speed = SPEED_10; break; } - cmd->supported = SUPPORTED_TP | SUPPORTED_Autoneg | + supported = SUPPORTED_TP | SUPPORTED_Autoneg | SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_1000baseT_Full; - cmd->advertising = cmd->supported; + advertising = supported; if (card->link_mode & GELIC_LV1_ETHER_AUTO_NEG) { - cmd->autoneg = AUTONEG_ENABLE; + cmd->base.autoneg = AUTONEG_ENABLE; } else { - cmd->autoneg = AUTONEG_DISABLE; - cmd->advertising &= ~ADVERTISED_Autoneg; + cmd->base.autoneg = AUTONEG_DISABLE; + advertising &= ~ADVERTISED_Autoneg; } - cmd->port = PORT_TP; + cmd->base.port = PORT_TP; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); return 0; } -static int gelic_ether_set_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int +gelic_ether_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct gelic_card *card = netdev_card(netdev); u64 mode; int ret; - if (cmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { mode = GELIC_LV1_ETHER_AUTO_NEG; } else { - switch (cmd->speed) { + switch (cmd->base.speed) { case SPEED_10: mode = GELIC_LV1_ETHER_SPEED_10; break; @@ -1273,9 +1280,9 @@ static int gelic_ether_set_settings(struct net_device *netdev, default: return -EINVAL; } - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) { mode |= GELIC_LV1_ETHER_FULL_DUPLEX; - else if (cmd->speed == SPEED_1000) { + } else if (cmd->base.speed == SPEED_1000) { pr_info("1000 half duplex is not supported.\n"); return -EINVAL; } @@ -1370,11 +1377,11 @@ static int gelic_net_set_wol(struct net_device *netdev, static const struct ethtool_ops gelic_ether_ethtool_ops = { .get_drvinfo = gelic_net_get_drvinfo, - .get_settings = gelic_ether_get_settings, - .set_settings = gelic_ether_set_settings, .get_link = ethtool_op_get_link, .get_wol = gelic_net_get_wol, .set_wol = gelic_net_set_wol, + .get_link_ksettings = gelic_ether_get_link_ksettings, + .set_link_ksettings = gelic_ether_set_link_ksettings, }; /** From 50ad480e4d64fde941fcac39db1ab29a83d86703 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sun, 5 Mar 2017 23:46:00 +0100 Subject: [PATCH 105/384] net: toshiba: spider_net: use new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We move this driver to new api {get|set}_link_ksettings. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Signed-off-by: David S. Miller --- .../net/ethernet/toshiba/spider_net_ethtool.c | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/toshiba/spider_net_ethtool.c b/drivers/net/ethernet/toshiba/spider_net_ethtool.c index ffe519382e11..16bd036d0682 100644 --- a/drivers/net/ethernet/toshiba/spider_net_ethtool.c +++ b/drivers/net/ethernet/toshiba/spider_net_ethtool.c @@ -47,19 +47,23 @@ static struct { }; static int -spider_net_ethtool_get_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +spider_net_ethtool_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct spider_net_card *card; card = netdev_priv(netdev); - cmd->supported = (SUPPORTED_1000baseT_Full | - SUPPORTED_FIBRE); - cmd->advertising = (ADVERTISED_1000baseT_Full | - ADVERTISED_FIBRE); - cmd->port = PORT_FIBRE; - ethtool_cmd_speed_set(cmd, card->phy.speed); - cmd->duplex = DUPLEX_FULL; + ethtool_link_ksettings_zero_link_mode(cmd, supported); + ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); + ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); + + cmd->base.port = PORT_FIBRE; + cmd->base.speed = card->phy.speed; + cmd->base.duplex = DUPLEX_FULL; return 0; } @@ -166,7 +170,6 @@ static void spider_net_get_strings(struct net_device *netdev, u32 stringset, } const struct ethtool_ops spider_net_ethtool_ops = { - .get_settings = spider_net_ethtool_get_settings, .get_drvinfo = spider_net_ethtool_get_drvinfo, .get_wol = spider_net_ethtool_get_wol, .get_msglevel = spider_net_ethtool_get_msglevel, @@ -177,5 +180,6 @@ const struct ethtool_ops spider_net_ethtool_ops = { .get_strings = spider_net_get_strings, .get_sset_count = spider_net_get_sset_count, .get_ethtool_stats = spider_net_get_ethtool_stats, + .get_link_ksettings = spider_net_ethtool_get_link_ksettings, }; From b793f081674e363f71699b0b32fc9a1890e52db2 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 6 Mar 2017 14:34:27 +0100 Subject: [PATCH 106/384] net: ibm: emac: fix regression caused by emac_dt_phy_probe() Julian Margetson reported a panic on his SAM460EX with Kernel 4.11-rc1: | Unable to handle kernel paging request for data at address 0x00000014 | Oops: Kernel access of bad area, sig: 11 [#1] | PREEMPT | Canyonlands | Modules linked in: | CPU: 0 PID: 1 Comm: swapper Not tainted [...] | task: ea838000 task.stack: ea836000 | NIP: c0599f5c LR: c0599dd8 CTR: 00000000 | REGS: ea837c80 TRAP: 0300 Not tainted [...] | MSR: 00029000 | CR: 24371242 XER: 20000000 | DEAR: 00000014 ESR: 00000000 | GPR00: c0599ce8 ea837d30 ea838000 c0e52dcc c0d56ffb [...] | NIP [c0599f5c] emac_probe+0xfb4/0x1304 | LR [c0599dd8] emac_probe+0xe30/0x1304 | Call Trace: | [ea837d30] [c0599ce8] emac_probe+0xd40/0x1304 (unreliable) | [ea837d80] [c0533504] platform_drv_probe+0x48/0x90 | [ea837da0] [c0531c14] driver_probe_device+0x15c/0x2c4 | [ea837dd0] [c0531e04] __driver_attach+0x88/0xb0 | ---[ end trace ... ]--- The problem is caused by emac_dt_phy_probe() returing success (0) for existing device-trees configurations that do not specify a "phy-handle" property. This caused the code to skip the existing phy probe and setup. Which led to essential phy related data-structures being uninitialized. This patch also removes the unused variable in emac_dt_phy_connect(). Fixes: a577ca6badb5261d ("net: emac: add support for device-tree based PHY discovery and setup") Reported-by: Julian Margetson Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 275c2e2349ad..c44036d5761a 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2589,8 +2589,6 @@ static int emac_dt_mdio_probe(struct emac_instance *dev) static int emac_dt_phy_connect(struct emac_instance *dev, struct device_node *phy_handle) { - int res; - dev->phy.def = devm_kzalloc(&dev->ofdev->dev, sizeof(*dev->phy.def), GFP_KERNEL); if (!dev->phy.def) @@ -2617,7 +2615,7 @@ static int emac_dt_phy_probe(struct emac_instance *dev) { struct device_node *np = dev->ofdev->dev.of_node; struct device_node *phy_handle; - int res = 0; + int res = 1; phy_handle = of_parse_phandle(np, "phy-handle", 0); @@ -2714,13 +2712,24 @@ static int emac_init_phy(struct emac_instance *dev) if (emac_has_feature(dev, EMAC_FTR_HAS_RGMII)) { int res = emac_dt_phy_probe(dev); - mutex_unlock(&emac_phy_map_lock); - if (!res) + switch (res) { + case 1: + /* No phy-handle property configured. + * Continue with the existing phy probe + * and setup code. + */ + break; + + case 0: + mutex_unlock(&emac_phy_map_lock); goto init_phy; - dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n", - res); - return res; + default: + mutex_unlock(&emac_phy_map_lock); + dev_err(&dev->ofdev->dev, "failed to attach dt phy (%d).\n", + res); + return res; + } } if (dev->phy_address != 0xffffffff) From aac1561ad98e74f6ea43337f9b9714ab0b1f493e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 8 Mar 2017 22:17:10 -0800 Subject: [PATCH 107/384] net: Revert ksettings conversions. Those were supposed to go into the net-next tree not the net tree. Oops... Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smc911x.c | 49 +++++----- drivers/net/ethernet/sun/cassini.c | 98 +++++++++---------- drivers/net/ethernet/sun/niu.c | 37 ++++--- drivers/net/ethernet/sun/sungem.c | 98 ++++++++----------- drivers/net/ethernet/sun/sunhme.c | 62 ++++++------ drivers/net/ethernet/toshiba/ps3_gelic_net.c | 51 +++++----- .../net/ethernet/toshiba/spider_net_ethtool.c | 24 ++--- 7 files changed, 187 insertions(+), 232 deletions(-) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 36307d34f641..4f19c6166182 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1446,40 +1446,40 @@ static int smc911x_close(struct net_device *dev) * Ethtool support */ static int -smc911x_ethtool_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +smc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) { struct smc911x_local *lp = netdev_priv(dev); int ret, status; unsigned long flags; - u32 supported; DBG(SMC_DEBUG_FUNC, dev, "--> %s\n", __func__); + cmd->maxtxpkt = 1; + cmd->maxrxpkt = 1; if (lp->phy_type != 0) { spin_lock_irqsave(&lp->lock, flags); - ret = mii_ethtool_get_link_ksettings(&lp->mii, cmd); + ret = mii_ethtool_gset(&lp->mii, cmd); spin_unlock_irqrestore(&lp->lock, flags); } else { - supported = SUPPORTED_10baseT_Half | + cmd->supported = SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_TP | SUPPORTED_AUI; if (lp->ctl_rspeed == 10) - cmd->base.speed = SPEED_10; + ethtool_cmd_speed_set(cmd, SPEED_10); else if (lp->ctl_rspeed == 100) - cmd->base.speed = SPEED_100; + ethtool_cmd_speed_set(cmd, SPEED_100); - cmd->base.autoneg = AUTONEG_DISABLE; - cmd->base.port = 0; + cmd->autoneg = AUTONEG_DISABLE; + if (lp->mii.phy_id==1) + cmd->transceiver = XCVR_INTERNAL; + else + cmd->transceiver = XCVR_EXTERNAL; + cmd->port = 0; SMC_GET_PHY_SPECIAL(lp, lp->mii.phy_id, status); - cmd->base.duplex = + cmd->duplex = (status & (PHY_SPECIAL_SPD_10FULL_ | PHY_SPECIAL_SPD_100FULL_)) ? DUPLEX_FULL : DUPLEX_HALF; - - ethtool_convert_legacy_u32_to_link_mode( - cmd->link_modes.supported, supported); - ret = 0; } @@ -1487,8 +1487,7 @@ smc911x_ethtool_get_link_ksettings(struct net_device *dev, } static int -smc911x_ethtool_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +smc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) { struct smc911x_local *lp = netdev_priv(dev); int ret; @@ -1496,18 +1495,16 @@ smc911x_ethtool_set_link_ksettings(struct net_device *dev, if (lp->phy_type != 0) { spin_lock_irqsave(&lp->lock, flags); - ret = mii_ethtool_set_link_ksettings(&lp->mii, cmd); + ret = mii_ethtool_sset(&lp->mii, cmd); spin_unlock_irqrestore(&lp->lock, flags); } else { - if (cmd->base.autoneg != AUTONEG_DISABLE || - cmd->base.speed != SPEED_10 || - (cmd->base.duplex != DUPLEX_HALF && - cmd->base.duplex != DUPLEX_FULL) || - (cmd->base.port != PORT_TP && - cmd->base.port != PORT_AUI)) + if (cmd->autoneg != AUTONEG_DISABLE || + cmd->speed != SPEED_10 || + (cmd->duplex != DUPLEX_HALF && cmd->duplex != DUPLEX_FULL) || + (cmd->port != PORT_TP && cmd->port != PORT_AUI)) return -EINVAL; - lp->ctl_rfduplx = cmd->base.duplex == DUPLEX_FULL; + lp->ctl_rfduplx = cmd->duplex == DUPLEX_FULL; ret = 0; } @@ -1689,6 +1686,8 @@ static int smc911x_ethtool_geteeprom_len(struct net_device *dev) } static const struct ethtool_ops smc911x_ethtool_ops = { + .get_settings = smc911x_ethtool_getsettings, + .set_settings = smc911x_ethtool_setsettings, .get_drvinfo = smc911x_ethtool_getdrvinfo, .get_msglevel = smc911x_ethtool_getmsglevel, .set_msglevel = smc911x_ethtool_setmsglevel, @@ -1699,8 +1698,6 @@ static const struct ethtool_ops smc911x_ethtool_ops = { .get_eeprom_len = smc911x_ethtool_geteeprom_len, .get_eeprom = smc911x_ethtool_geteeprom, .set_eeprom = smc911x_ethtool_seteeprom, - .get_link_ksettings = smc911x_ethtool_get_link_ksettings, - .set_link_ksettings = smc911x_ethtool_set_link_ksettings, }; /* diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 382993c1561c..0e8e89f17dbb 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -691,8 +691,7 @@ static void cas_mif_poll(struct cas *cp, const int enable) } /* Must be invoked under cp->lock */ -static void cas_begin_auto_negotiation(struct cas *cp, - const struct ethtool_link_ksettings *ep) +static void cas_begin_auto_negotiation(struct cas *cp, struct ethtool_cmd *ep) { u16 ctl; #if 1 @@ -705,16 +704,16 @@ static void cas_begin_auto_negotiation(struct cas *cp, if (!ep) goto start_aneg; lcntl = cp->link_cntl; - if (ep->base.autoneg == AUTONEG_ENABLE) { + if (ep->autoneg == AUTONEG_ENABLE) cp->link_cntl = BMCR_ANENABLE; - } else { - u32 speed = ep->base.speed; + else { + u32 speed = ethtool_cmd_speed(ep); cp->link_cntl = 0; if (speed == SPEED_100) cp->link_cntl |= BMCR_SPEED100; else if (speed == SPEED_1000) cp->link_cntl |= CAS_BMCR_SPEED1000; - if (ep->base.duplex == DUPLEX_FULL) + if (ep->duplex == DUPLEX_FULL) cp->link_cntl |= BMCR_FULLDPLX; } #if 1 @@ -4529,21 +4528,19 @@ static void cas_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info strlcpy(info->bus_info, pci_name(cp->pdev), sizeof(info->bus_info)); } -static int cas_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +static int cas_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct cas *cp = netdev_priv(dev); u16 bmcr; int full_duplex, speed, pause; unsigned long flags; enum link_state linkstate = link_up; - u32 supported, advertising; - advertising = 0; - supported = SUPPORTED_Autoneg; + cmd->advertising = 0; + cmd->supported = SUPPORTED_Autoneg; if (cp->cas_flags & CAS_FLAG_1000MB_CAP) { - supported |= SUPPORTED_1000baseT_Full; - advertising |= ADVERTISED_1000baseT_Full; + cmd->supported |= SUPPORTED_1000baseT_Full; + cmd->advertising |= ADVERTISED_1000baseT_Full; } /* Record PHY settings if HW is on. */ @@ -4551,15 +4548,17 @@ static int cas_get_link_ksettings(struct net_device *dev, bmcr = 0; linkstate = cp->lstate; if (CAS_PHY_MII(cp->phy_type)) { - cmd->base.port = PORT_MII; - cmd->base.phy_address = cp->phy_addr; - advertising |= ADVERTISED_TP | ADVERTISED_MII | + cmd->port = PORT_MII; + cmd->transceiver = (cp->cas_flags & CAS_FLAG_SATURN) ? + XCVR_INTERNAL : XCVR_EXTERNAL; + cmd->phy_address = cp->phy_addr; + cmd->advertising |= ADVERTISED_TP | ADVERTISED_MII | ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; - supported |= + cmd->supported |= (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | @@ -4575,10 +4574,11 @@ static int cas_get_link_ksettings(struct net_device *dev, } } else { - cmd->base.port = PORT_FIBRE; - cmd->base.phy_address = 0; - supported |= SUPPORTED_FIBRE; - advertising |= ADVERTISED_FIBRE; + cmd->port = PORT_FIBRE; + cmd->transceiver = XCVR_INTERNAL; + cmd->phy_address = 0; + cmd->supported |= SUPPORTED_FIBRE; + cmd->advertising |= ADVERTISED_FIBRE; if (cp->hw_running) { /* pcs uses the same bits as mii */ @@ -4590,20 +4590,21 @@ static int cas_get_link_ksettings(struct net_device *dev, spin_unlock_irqrestore(&cp->lock, flags); if (bmcr & BMCR_ANENABLE) { - advertising |= ADVERTISED_Autoneg; - cmd->base.autoneg = AUTONEG_ENABLE; - cmd->base.speed = ((speed == 10) ? + cmd->advertising |= ADVERTISED_Autoneg; + cmd->autoneg = AUTONEG_ENABLE; + ethtool_cmd_speed_set(cmd, ((speed == 10) ? SPEED_10 : ((speed == 1000) ? - SPEED_1000 : SPEED_100)); - cmd->base.duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF; + SPEED_1000 : SPEED_100))); + cmd->duplex = full_duplex ? DUPLEX_FULL : DUPLEX_HALF; } else { - cmd->base.autoneg = AUTONEG_DISABLE; - cmd->base.speed = ((bmcr & CAS_BMCR_SPEED1000) ? + cmd->autoneg = AUTONEG_DISABLE; + ethtool_cmd_speed_set(cmd, ((bmcr & CAS_BMCR_SPEED1000) ? SPEED_1000 : ((bmcr & BMCR_SPEED100) ? - SPEED_100 : SPEED_10)); - cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ? + SPEED_100 : SPEED_10))); + cmd->duplex = + (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; } if (linkstate != link_up) { @@ -4618,46 +4619,39 @@ static int cas_get_link_ksettings(struct net_device *dev, * settings that we configured. */ if (cp->link_cntl & BMCR_ANENABLE) { - cmd->base.speed = 0; - cmd->base.duplex = 0xff; + ethtool_cmd_speed_set(cmd, 0); + cmd->duplex = 0xff; } else { - cmd->base.speed = SPEED_10; + ethtool_cmd_speed_set(cmd, SPEED_10); if (cp->link_cntl & BMCR_SPEED100) { - cmd->base.speed = SPEED_100; + ethtool_cmd_speed_set(cmd, SPEED_100); } else if (cp->link_cntl & CAS_BMCR_SPEED1000) { - cmd->base.speed = SPEED_1000; + ethtool_cmd_speed_set(cmd, SPEED_1000); } - cmd->base.duplex = (cp->link_cntl & BMCR_FULLDPLX) ? + cmd->duplex = (cp->link_cntl & BMCR_FULLDPLX)? DUPLEX_FULL : DUPLEX_HALF; } } - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); - return 0; } -static int cas_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +static int cas_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct cas *cp = netdev_priv(dev); unsigned long flags; - u32 speed = cmd->base.speed; + u32 speed = ethtool_cmd_speed(cmd); /* Verify the settings we care about. */ - if (cmd->base.autoneg != AUTONEG_ENABLE && - cmd->base.autoneg != AUTONEG_DISABLE) + if (cmd->autoneg != AUTONEG_ENABLE && + cmd->autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->base.autoneg == AUTONEG_DISABLE && + if (cmd->autoneg == AUTONEG_DISABLE && ((speed != SPEED_1000 && speed != SPEED_100 && speed != SPEED_10) || - (cmd->base.duplex != DUPLEX_HALF && - cmd->base.duplex != DUPLEX_FULL))) + (cmd->duplex != DUPLEX_HALF && + cmd->duplex != DUPLEX_FULL))) return -EINVAL; /* Apply settings and restart link process. */ @@ -4759,6 +4753,8 @@ static void cas_get_ethtool_stats(struct net_device *dev, static const struct ethtool_ops cas_ethtool_ops = { .get_drvinfo = cas_get_drvinfo, + .get_settings = cas_get_settings, + .set_settings = cas_set_settings, .nway_reset = cas_nway_reset, .get_link = cas_get_link, .get_msglevel = cas_get_msglevel, @@ -4768,8 +4764,6 @@ static const struct ethtool_ops cas_ethtool_ops = { .get_sset_count = cas_get_sset_count, .get_strings = cas_get_strings, .get_ethtool_stats = cas_get_ethtool_stats, - .get_link_ksettings = cas_get_link_ksettings, - .set_link_ksettings = cas_set_link_ksettings, }; static int cas_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 2dcca249eb9c..57978056b336 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6813,8 +6813,7 @@ static void niu_get_drvinfo(struct net_device *dev, sizeof(info->bus_info)); } -static int niu_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +static int niu_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct niu *np = netdev_priv(dev); struct niu_link_config *lp; @@ -6822,30 +6821,28 @@ static int niu_get_link_ksettings(struct net_device *dev, lp = &np->link_config; memset(cmd, 0, sizeof(*cmd)); - cmd->base.phy_address = np->phy_addr; - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - lp->supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - lp->active_advertising); - cmd->base.autoneg = lp->active_autoneg; - cmd->base.speed = lp->active_speed; - cmd->base.duplex = lp->active_duplex; - cmd->base.port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP; + cmd->phy_address = np->phy_addr; + cmd->supported = lp->supported; + cmd->advertising = lp->active_advertising; + cmd->autoneg = lp->active_autoneg; + ethtool_cmd_speed_set(cmd, lp->active_speed); + cmd->duplex = lp->active_duplex; + cmd->port = (np->flags & NIU_FLAGS_FIBER) ? PORT_FIBRE : PORT_TP; + cmd->transceiver = (np->flags & NIU_FLAGS_XCVR_SERDES) ? + XCVR_EXTERNAL : XCVR_INTERNAL; return 0; } -static int niu_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +static int niu_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct niu *np = netdev_priv(dev); struct niu_link_config *lp = &np->link_config; - ethtool_convert_link_mode_to_legacy_u32(&lp->advertising, - cmd->link_modes.advertising); - lp->speed = cmd->base.speed; - lp->duplex = cmd->base.duplex; - lp->autoneg = cmd->base.autoneg; + lp->advertising = cmd->advertising; + lp->speed = ethtool_cmd_speed(cmd); + lp->duplex = cmd->duplex; + lp->autoneg = cmd->autoneg; return niu_init_link(np); } @@ -7905,14 +7902,14 @@ static const struct ethtool_ops niu_ethtool_ops = { .nway_reset = niu_nway_reset, .get_eeprom_len = niu_get_eeprom_len, .get_eeprom = niu_get_eeprom, + .get_settings = niu_get_settings, + .set_settings = niu_set_settings, .get_strings = niu_get_strings, .get_sset_count = niu_get_sset_count, .get_ethtool_stats = niu_get_ethtool_stats, .set_phys_id = niu_set_phys_id, .get_rxnfc = niu_get_nfc, .set_rxnfc = niu_set_nfc, - .get_link_ksettings = niu_get_link_ksettings, - .set_link_ksettings = niu_set_link_ksettings, }; static int niu_ldg_assign_ldn(struct niu *np, struct niu_parent *parent, diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index dbfca0466760..5c5952e782cd 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -1250,17 +1250,12 @@ static void gem_stop_dma(struct gem *gp) // XXX dbl check what that function should do when called on PCS PHY -static void gem_begin_auto_negotiation(struct gem *gp, - const struct ethtool_link_ksettings *ep) +static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep) { u32 advertise, features; int autoneg; int speed; int duplex; - u32 advertising; - - ethtool_convert_link_mode_to_legacy_u32(&advertising, - ep->link_modes.advertising); if (gp->phy_type != phy_mii_mdio0 && gp->phy_type != phy_mii_mdio1) @@ -1283,13 +1278,13 @@ static void gem_begin_auto_negotiation(struct gem *gp, /* Setup link parameters */ if (!ep) goto start_aneg; - if (ep->base.autoneg == AUTONEG_ENABLE) { - advertise = advertising; + if (ep->autoneg == AUTONEG_ENABLE) { + advertise = ep->advertising; autoneg = 1; } else { autoneg = 0; - speed = ep->base.speed; - duplex = ep->base.duplex; + speed = ethtool_cmd_speed(ep); + duplex = ep->duplex; } start_aneg: @@ -2520,96 +2515,85 @@ static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info strlcpy(info->bus_info, pci_name(gp->pdev), sizeof(info->bus_info)); } -static int gem_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct gem *gp = netdev_priv(dev); - u32 supported, advertising; if (gp->phy_type == phy_mii_mdio0 || gp->phy_type == phy_mii_mdio1) { if (gp->phy_mii.def) - supported = gp->phy_mii.def->features; + cmd->supported = gp->phy_mii.def->features; else - supported = (SUPPORTED_10baseT_Half | + cmd->supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full); /* XXX hardcoded stuff for now */ - cmd->base.port = PORT_MII; - cmd->base.phy_address = 0; /* XXX fixed PHYAD */ + cmd->port = PORT_MII; + cmd->transceiver = XCVR_EXTERNAL; + cmd->phy_address = 0; /* XXX fixed PHYAD */ /* Return current PHY settings */ - cmd->base.autoneg = gp->want_autoneg; - cmd->base.speed = gp->phy_mii.speed; - cmd->base.duplex = gp->phy_mii.duplex; - advertising = gp->phy_mii.advertising; + cmd->autoneg = gp->want_autoneg; + ethtool_cmd_speed_set(cmd, gp->phy_mii.speed); + cmd->duplex = gp->phy_mii.duplex; + cmd->advertising = gp->phy_mii.advertising; /* If we started with a forced mode, we don't have a default * advertise set, we need to return something sensible so * userland can re-enable autoneg properly. */ - if (advertising == 0) - advertising = supported; + if (cmd->advertising == 0) + cmd->advertising = cmd->supported; } else { // XXX PCS ? - supported = + cmd->supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg); - advertising = supported; - cmd->base.speed = 0; - cmd->base.duplex = 0; - cmd->base.port = 0; - cmd->base.phy_address = 0; - cmd->base.autoneg = 0; + cmd->advertising = cmd->supported; + ethtool_cmd_speed_set(cmd, 0); + cmd->duplex = cmd->port = cmd->phy_address = + cmd->transceiver = cmd->autoneg = 0; /* serdes means usually a Fibre connector, with most fixed */ if (gp->phy_type == phy_serdes) { - cmd->base.port = PORT_FIBRE; - supported = (SUPPORTED_1000baseT_Half | + cmd->port = PORT_FIBRE; + cmd->supported = (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full | SUPPORTED_FIBRE | SUPPORTED_Autoneg | SUPPORTED_Pause | SUPPORTED_Asym_Pause); - advertising = supported; + cmd->advertising = cmd->supported; + cmd->transceiver = XCVR_INTERNAL; if (gp->lstate == link_up) - cmd->base.speed = SPEED_1000; - cmd->base.duplex = DUPLEX_FULL; - cmd->base.autoneg = 1; + ethtool_cmd_speed_set(cmd, SPEED_1000); + cmd->duplex = DUPLEX_FULL; + cmd->autoneg = 1; } } - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); + cmd->maxtxpkt = cmd->maxrxpkt = 0; return 0; } -static int gem_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct gem *gp = netdev_priv(dev); - u32 speed = cmd->base.speed; - u32 advertising; - - ethtool_convert_link_mode_to_legacy_u32(&advertising, - cmd->link_modes.advertising); + u32 speed = ethtool_cmd_speed(cmd); /* Verify the settings we care about. */ - if (cmd->base.autoneg != AUTONEG_ENABLE && - cmd->base.autoneg != AUTONEG_DISABLE) + if (cmd->autoneg != AUTONEG_ENABLE && + cmd->autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->base.autoneg == AUTONEG_ENABLE && - advertising == 0) + if (cmd->autoneg == AUTONEG_ENABLE && + cmd->advertising == 0) return -EINVAL; - if (cmd->base.autoneg == AUTONEG_DISABLE && + if (cmd->autoneg == AUTONEG_DISABLE && ((speed != SPEED_1000 && speed != SPEED_100 && speed != SPEED_10) || - (cmd->base.duplex != DUPLEX_HALF && - cmd->base.duplex != DUPLEX_FULL))) + (cmd->duplex != DUPLEX_HALF && + cmd->duplex != DUPLEX_FULL))) return -EINVAL; /* Apply settings and restart link process. */ @@ -2682,13 +2666,13 @@ static int gem_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static const struct ethtool_ops gem_ethtool_ops = { .get_drvinfo = gem_get_drvinfo, .get_link = ethtool_op_get_link, + .get_settings = gem_get_settings, + .set_settings = gem_set_settings, .nway_reset = gem_nway_reset, .get_msglevel = gem_get_msglevel, .set_msglevel = gem_set_msglevel, .get_wol = gem_get_wol, .set_wol = gem_set_wol, - .get_link_ksettings = gem_get_link_ksettings, - .set_link_ksettings = gem_set_link_ksettings, }; static int gem_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 53ff66ef53ac..72ff05cd3ed8 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -1294,10 +1294,9 @@ static void happy_meal_init_rings(struct happy_meal *hp) } /* hp->happy_lock must be held */ -static void -happy_meal_begin_auto_negotiation(struct happy_meal *hp, - void __iomem *tregs, - const struct ethtool_link_ksettings *ep) +static void happy_meal_begin_auto_negotiation(struct happy_meal *hp, + void __iomem *tregs, + struct ethtool_cmd *ep) { int timeout; @@ -1310,7 +1309,7 @@ happy_meal_begin_auto_negotiation(struct happy_meal *hp, /* XXX Check BMSR_ANEGCAPABLE, should not be necessary though. */ hp->sw_advertise = happy_meal_tcvr_read(hp, tregs, MII_ADVERTISE); - if (!ep || ep->base.autoneg == AUTONEG_ENABLE) { + if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) { /* Advertise everything we can support. */ if (hp->sw_bmsr & BMSR_10HALF) hp->sw_advertise |= (ADVERTISE_10HALF); @@ -1385,14 +1384,14 @@ happy_meal_begin_auto_negotiation(struct happy_meal *hp, /* Disable auto-negotiation in BMCR, enable the duplex and * speed setting, init the timer state machine, and fire it off. */ - if (!ep || ep->base.autoneg == AUTONEG_ENABLE) { + if (ep == NULL || ep->autoneg == AUTONEG_ENABLE) { hp->sw_bmcr = BMCR_SPEED100; } else { - if (ep->base.speed == SPEED_100) + if (ethtool_cmd_speed(ep) == SPEED_100) hp->sw_bmcr = BMCR_SPEED100; else hp->sw_bmcr = 0; - if (ep->base.duplex == DUPLEX_FULL) + if (ep->duplex == DUPLEX_FULL) hp->sw_bmcr |= BMCR_FULLDPLX; } happy_meal_tcvr_write(hp, tregs, MII_BMCR, hp->sw_bmcr); @@ -2435,21 +2434,20 @@ static void happy_meal_set_multicast(struct net_device *dev) } /* Ethtool support... */ -static int hme_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +static int hme_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct happy_meal *hp = netdev_priv(dev); u32 speed; - u32 supported; - supported = + cmd->supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); /* XXX hardcoded stuff for now */ - cmd->base.port = PORT_TP; /* XXX no MII support */ - cmd->base.phy_address = 0; /* XXX fixed PHYAD */ + cmd->port = PORT_TP; /* XXX no MII support */ + cmd->transceiver = XCVR_INTERNAL; /* XXX no external xcvr support */ + cmd->phy_address = 0; /* XXX fixed PHYAD */ /* Record PHY settings. */ spin_lock_irq(&hp->happy_lock); @@ -2458,45 +2456,41 @@ static int hme_get_link_ksettings(struct net_device *dev, spin_unlock_irq(&hp->happy_lock); if (hp->sw_bmcr & BMCR_ANENABLE) { - cmd->base.autoneg = AUTONEG_ENABLE; + cmd->autoneg = AUTONEG_ENABLE; speed = ((hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) ? SPEED_100 : SPEED_10); if (speed == SPEED_100) - cmd->base.duplex = + cmd->duplex = (hp->sw_lpa & (LPA_100FULL)) ? DUPLEX_FULL : DUPLEX_HALF; else - cmd->base.duplex = + cmd->duplex = (hp->sw_lpa & (LPA_10FULL)) ? DUPLEX_FULL : DUPLEX_HALF; } else { - cmd->base.autoneg = AUTONEG_DISABLE; + cmd->autoneg = AUTONEG_DISABLE; speed = (hp->sw_bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10; - cmd->base.duplex = + cmd->duplex = (hp->sw_bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; } - cmd->base.speed = speed; - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - + ethtool_cmd_speed_set(cmd, speed); return 0; } -static int hme_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +static int hme_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct happy_meal *hp = netdev_priv(dev); /* Verify the settings we care about. */ - if (cmd->base.autoneg != AUTONEG_ENABLE && - cmd->base.autoneg != AUTONEG_DISABLE) + if (cmd->autoneg != AUTONEG_ENABLE && + cmd->autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->base.autoneg == AUTONEG_DISABLE && - ((cmd->base.speed != SPEED_100 && - cmd->base.speed != SPEED_10) || - (cmd->base.duplex != DUPLEX_HALF && - cmd->base.duplex != DUPLEX_FULL))) + if (cmd->autoneg == AUTONEG_DISABLE && + ((ethtool_cmd_speed(cmd) != SPEED_100 && + ethtool_cmd_speed(cmd) != SPEED_10) || + (cmd->duplex != DUPLEX_HALF && + cmd->duplex != DUPLEX_FULL))) return -EINVAL; /* Ok, do it to it. */ @@ -2543,10 +2537,10 @@ static u32 hme_get_link(struct net_device *dev) } static const struct ethtool_ops hme_ethtool_ops = { + .get_settings = hme_get_settings, + .set_settings = hme_set_settings, .get_drvinfo = hme_get_drvinfo, .get_link = hme_get_link, - .get_link_ksettings = hme_get_link_ksettings, - .set_link_ksettings = hme_set_link_ksettings, }; static int hme_version_printed; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index fa6a06571187..72013314bba8 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1206,68 +1206,61 @@ void gelic_net_get_drvinfo(struct net_device *netdev, strlcpy(info->version, DRV_VERSION, sizeof(info->version)); } -static int gelic_ether_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) +static int gelic_ether_get_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) { struct gelic_card *card = netdev_card(netdev); - u32 supported, advertising; gelic_card_get_ether_port_status(card, 0); if (card->ether_port_status & GELIC_LV1_ETHER_FULL_DUPLEX) - cmd->base.duplex = DUPLEX_FULL; + cmd->duplex = DUPLEX_FULL; else - cmd->base.duplex = DUPLEX_HALF; + cmd->duplex = DUPLEX_HALF; switch (card->ether_port_status & GELIC_LV1_ETHER_SPEED_MASK) { case GELIC_LV1_ETHER_SPEED_10: - cmd->base.speed = SPEED_10; + ethtool_cmd_speed_set(cmd, SPEED_10); break; case GELIC_LV1_ETHER_SPEED_100: - cmd->base.speed = SPEED_100; + ethtool_cmd_speed_set(cmd, SPEED_100); break; case GELIC_LV1_ETHER_SPEED_1000: - cmd->base.speed = SPEED_1000; + ethtool_cmd_speed_set(cmd, SPEED_1000); break; default: pr_info("%s: speed unknown\n", __func__); - cmd->base.speed = SPEED_10; + ethtool_cmd_speed_set(cmd, SPEED_10); break; } - supported = SUPPORTED_TP | SUPPORTED_Autoneg | + cmd->supported = SUPPORTED_TP | SUPPORTED_Autoneg | SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | SUPPORTED_1000baseT_Full; - advertising = supported; + cmd->advertising = cmd->supported; if (card->link_mode & GELIC_LV1_ETHER_AUTO_NEG) { - cmd->base.autoneg = AUTONEG_ENABLE; + cmd->autoneg = AUTONEG_ENABLE; } else { - cmd->base.autoneg = AUTONEG_DISABLE; - advertising &= ~ADVERTISED_Autoneg; + cmd->autoneg = AUTONEG_DISABLE; + cmd->advertising &= ~ADVERTISED_Autoneg; } - cmd->base.port = PORT_TP; - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, - advertising); + cmd->port = PORT_TP; return 0; } -static int -gelic_ether_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) +static int gelic_ether_set_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) { struct gelic_card *card = netdev_card(netdev); u64 mode; int ret; - if (cmd->base.autoneg == AUTONEG_ENABLE) { + if (cmd->autoneg == AUTONEG_ENABLE) { mode = GELIC_LV1_ETHER_AUTO_NEG; } else { - switch (cmd->base.speed) { + switch (cmd->speed) { case SPEED_10: mode = GELIC_LV1_ETHER_SPEED_10; break; @@ -1280,9 +1273,9 @@ gelic_ether_set_link_ksettings(struct net_device *netdev, default: return -EINVAL; } - if (cmd->base.duplex == DUPLEX_FULL) { + if (cmd->duplex == DUPLEX_FULL) mode |= GELIC_LV1_ETHER_FULL_DUPLEX; - } else if (cmd->base.speed == SPEED_1000) { + else if (cmd->speed == SPEED_1000) { pr_info("1000 half duplex is not supported.\n"); return -EINVAL; } @@ -1377,11 +1370,11 @@ static int gelic_net_set_wol(struct net_device *netdev, static const struct ethtool_ops gelic_ether_ethtool_ops = { .get_drvinfo = gelic_net_get_drvinfo, + .get_settings = gelic_ether_get_settings, + .set_settings = gelic_ether_set_settings, .get_link = ethtool_op_get_link, .get_wol = gelic_net_get_wol, .set_wol = gelic_net_set_wol, - .get_link_ksettings = gelic_ether_get_link_ksettings, - .set_link_ksettings = gelic_ether_set_link_ksettings, }; /** diff --git a/drivers/net/ethernet/toshiba/spider_net_ethtool.c b/drivers/net/ethernet/toshiba/spider_net_ethtool.c index 16bd036d0682..ffe519382e11 100644 --- a/drivers/net/ethernet/toshiba/spider_net_ethtool.c +++ b/drivers/net/ethernet/toshiba/spider_net_ethtool.c @@ -47,23 +47,19 @@ static struct { }; static int -spider_net_ethtool_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) +spider_net_ethtool_get_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) { struct spider_net_card *card; card = netdev_priv(netdev); - ethtool_link_ksettings_zero_link_mode(cmd, supported); - ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - - ethtool_link_ksettings_zero_link_mode(cmd, advertising); - ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full); - ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); - - cmd->base.port = PORT_FIBRE; - cmd->base.speed = card->phy.speed; - cmd->base.duplex = DUPLEX_FULL; + cmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE); + cmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE); + cmd->port = PORT_FIBRE; + ethtool_cmd_speed_set(cmd, card->phy.speed); + cmd->duplex = DUPLEX_FULL; return 0; } @@ -170,6 +166,7 @@ static void spider_net_get_strings(struct net_device *netdev, u32 stringset, } const struct ethtool_ops spider_net_ethtool_ops = { + .get_settings = spider_net_ethtool_get_settings, .get_drvinfo = spider_net_ethtool_get_drvinfo, .get_wol = spider_net_ethtool_get_wol, .get_msglevel = spider_net_ethtool_get_msglevel, @@ -180,6 +177,5 @@ const struct ethtool_ops spider_net_ethtool_ops = { .get_strings = spider_net_get_strings, .get_sset_count = spider_net_get_sset_count, .get_ethtool_stats = spider_net_get_ethtool_stats, - .get_link_ksettings = spider_net_ethtool_get_link_ksettings, }; From 3331aa378e9bcbd0d16de9034b0c20f4050e26b4 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 6 Mar 2017 08:48:58 -0500 Subject: [PATCH 108/384] team: use ETH_MAX_MTU as max mtu This restores the ability to set a team device's mtu to anything higher than 1500. Similar to the reported issue with bonding, the team driver calls ether_setup(), which sets an initial max_mtu of 1500, while the underlying hardware can handle something much larger. Just set it to ETH_MAX_MTU to support all possible values, and the limitations of the underlying devices will prevent setting anything too large. Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra") CC: Cong Wang CC: Jiri Pirko CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- drivers/net/team/team.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 4a24b5d15f5a..1b52520715ae 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2072,6 +2072,7 @@ static int team_dev_type_check_change(struct net_device *dev, static void team_setup(struct net_device *dev) { ether_setup(dev); + dev->max_mtu = ETH_MAX_MTU; dev->netdev_ops = &team_netdev_ops; dev->ethtool_ops = &team_ethtool_ops; From f7887d40e541f74402df0684a1463c0a0bb68c68 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 6 Mar 2017 08:53:04 -0800 Subject: [PATCH 109/384] vrf: Fix use-after-free in vrf_xmit KASAN detected a use-after-free: [ 269.467067] BUG: KASAN: use-after-free in vrf_xmit+0x7f1/0x827 [vrf] at addr ffff8800350a21c0 [ 269.467067] Read of size 4 by task ssh/1879 [ 269.467067] CPU: 1 PID: 1879 Comm: ssh Not tainted 4.10.0+ #249 [ 269.467067] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 [ 269.467067] Call Trace: [ 269.467067] dump_stack+0x81/0xb6 [ 269.467067] kasan_object_err+0x21/0x78 [ 269.467067] kasan_report+0x2f7/0x450 [ 269.467067] ? vrf_xmit+0x7f1/0x827 [vrf] [ 269.467067] ? ip_output+0xa4/0xdb [ 269.467067] __asan_load4+0x6b/0x6d [ 269.467067] vrf_xmit+0x7f1/0x827 [vrf] ... Which corresponds to the skb access after xmit handling. Fix by saving skb->len and using the saved value to update stats. Fixes: 193125dbd8eb2 ("net: Introduce VRF device driver") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 22379da63400..fea687f35b5a 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -340,6 +340,7 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { + int len = skb->len; netdev_tx_t ret = is_ip_tx_frame(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { @@ -347,7 +348,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) u64_stats_update_begin(&dstats->syncp); dstats->tx_pkts++; - dstats->tx_bytes += skb->len; + dstats->tx_bytes += len; u64_stats_update_end(&dstats->syncp); } else { this_cpu_inc(dev->dstats->tx_drps); From 713c43b315fc160dc4ff3da0020ebe09b8a65587 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 6 Mar 2017 21:22:09 +0100 Subject: [PATCH 110/384] mlxsw: spectrum_flower: Remove bogus warns in mlxsw_sp_flower_destroy This warnings may be hit even in case they should not - in case user puts a TC-flower rule which failed to be offloaded. So just remove them. Reported-by: Petr Machata Reported-by: Ido Schimmel Fixes: commit 7aa0f5aa9030 ("mlxsw: spectrum: Implement TC flower offload") Signed-off-by: Jiri Pirko Acked-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 22ab42925377..ae6cccc666e4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -303,11 +303,11 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, ingress, MLXSW_SP_ACL_PROFILE_FLOWER); - if (WARN_ON(IS_ERR(ruleset))) + if (IS_ERR(ruleset)) return; rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie); - if (!WARN_ON(!rule)) { + if (rule) { mlxsw_sp_acl_rule_del(mlxsw_sp, rule); mlxsw_sp_acl_rule_destroy(mlxsw_sp, rule); } From 7aafac11e308d37ed3c509829bb43d80c1811ac3 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 22 Feb 2017 15:43:59 +1100 Subject: [PATCH 111/384] powerpc/powernv/ioda2: Gracefully fail if too many TCE levels requested The IODA2 specification says that a 64 DMA address cannot use top 4 bits (3 are reserved and one is a "TVE select"); bottom page_shift bits cannot be used for multilevel table addressing either. The existing IODA2 table allocation code aligns the minimum TCE table size to PAGE_SIZE so in the case of 64K system pages and 4K IOMMU pages, we have 64-4-12=48 bits. Since 64K page stores 8192 TCEs, i.e. needs 13 bits, the maximum number of levels is 48/13 = 3 so we physically cannot address more and EEH happens on DMA accesses. This adds a check that too many levels were requested. It is still possible to have 5 levels in the case of 4K system page size. Signed-off-by: Alexey Kardashevskiy Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6901a06da2f9..957a57a6c812 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2624,6 +2624,9 @@ static long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, level_shift = entries_shift + 3; level_shift = max_t(unsigned, level_shift, PAGE_SHIFT); + if ((level_shift - 3) * levels + page_shift >= 60) + return -EINVAL; + /* Allocate TCE table */ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, levels, tce_table_size, &offset, &total_allocated); From 89cf83d4e065ff9fbd2ddc674489c8058eeca758 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Feb 2017 12:54:41 +0000 Subject: [PATCH 112/384] drm/i915: Squelch any ktime/jiffie rounding errors for wait-ioctl We wait upon jiffies, but report the time elapsed using a high-resolution timer. This discrepancy can lead to us timing out the wait prior to us reporting the elapsed time as complete. This restores the squelching lost in commit e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers"). Fixes: e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170216125441.30923-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit c1d2061b28c2aa25ec39b60d9c248e6beebd7315) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6908123162d1..c45af09555dc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3029,6 +3029,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); if (args->timeout_ns < 0) args->timeout_ns = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regression from the timespec->ktime conversion. + */ + if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) + args->timeout_ns = 0; } i915_gem_object_put(obj); From 1d972d6021a1388021df51a58248e68372ce2b5d Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:15:57 +0200 Subject: [PATCH 113/384] drm/i915/glk: Fix watermark computations for third sprite plane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geminilake has a third sprite plane (or fourth universal plane) that is independent from the cursor. Make sure that for_each_plane_id_on_crtc() is aware of that extra plane so that the watermark code takes it into account. Fixes: e9c9882556fc ("drm/i915/glk: Configure number of sprite planes properly") Cc: Ander Conselvan de Oliveira Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-2-ander.conselvan.de.oliveira@intel.com (cherry picked from commit 19c3164db457e0fc65d4501fd354506228576241) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a4b42d31391..7febe6eecf72 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -293,6 +293,7 @@ enum plane_id { PLANE_PRIMARY, PLANE_SPRITE0, PLANE_SPRITE1, + PLANE_SPRITE2, PLANE_CURSOR, I915_MAX_PLANES, }; From b717a0392530ae8da0da041abe5c3a6098b55660 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Feb 2017 11:43:06 +0000 Subject: [PATCH 114/384] drm/i915/fbdev: Stop repeating tile configuration on stagnation If we cease making progress in finding matching outputs for a tiled configuration, stop looping over the remaining unconfigured outputs. v2: Use conn_seq (instead of pass) to only apply tile configuration on first pass. Fixes: b0ee9e7fa5b4 ("drm/fb: add support for tiled monitor configurations. (v2)") Signed-off-by: Chris Wilson Cc: Tomasz Lis Cc: Dave Airlie Cc: Daniel Vetter Cc: Jani Nikula Cc: Sean Paul Cc: # v3.19+ Reviewed-by: Tomasz Lis Link: http://patchwork.freedesktop.org/patch/msgid/20170224114306.4400-1-chris@chris-wilson.co.uk (cherry picked from commit 754a76591b12c88f57ad8b4ca533a5c9566a1922) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 1b8ba2e77539..2d449fb5d1d2 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -357,14 +357,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, mask; + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); int i, j; bool *save_enabled; bool fallback = true; int num_connectors_enabled = 0; int num_connectors_detected = 0; - int pass = 0; save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); if (!save_enabled) @@ -374,6 +373,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, mask = BIT(count) - 1; conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -387,7 +387,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, if (conn_configured & BIT(i)) continue; - if (pass == 0 && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -498,10 +498,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask) { - pass++; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the From 8c9923707f30ff56d9fd242053594b18f38d8036 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 12:26:54 +0000 Subject: [PATCH 115/384] drm/i915: Remove the vma from the drm_mm if binding fails As we track whether a vma has been inserted into the drm_mm using the vma->flags, if we fail to bind the vma into the GTT we do not update those bits and will attempt to reinsert the vma into the drm_mm on future passes. To prevent that, we want to unwind i915_vma_insert() if we fail in our attempt to bind. Fixes: 59bfa1248e22 ("drm/i915: Start passing around i915_vma from execbuffer") Testcase: igt/drv_selftest/live_gtt Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.9+ Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-3-chris@chris-wilson.co.uk (cherry picked from commit 31c7effa39f21f0fea1b3250ae9ff32b9c7e1ae5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_vma.c | 57 +++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 155906e84812..df20e9bc1c0f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -512,10 +512,36 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return ret; } +static void +i915_vma_remove(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. + */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); +} + int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - unsigned int bound = vma->flags; + const unsigned int bound = vma->flags; int ret; lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); @@ -524,18 +550,18 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { ret = -EBUSY; - goto err; + goto err_unpin; } if ((bound & I915_VMA_BIND_MASK) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - goto err; + goto err_unpin; } ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - goto err; + goto err_remove; if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); @@ -544,7 +570,12 @@ int __i915_vma_do_pin(struct i915_vma *vma, GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; -err: +err_remove: + if ((bound & I915_VMA_BIND_MASK) == 0) { + GEM_BUG_ON(vma->pages); + i915_vma_remove(vma); + } +err_unpin: __i915_vma_unpin(vma); return ret; } @@ -657,9 +688,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (vma->pages != obj->mm.pages) { GEM_BUG_ON(!vma->pages); sg_free_table(vma->pages); @@ -667,18 +695,7 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->pages = NULL; - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + i915_vma_remove(vma); destroy: if (unlikely(i915_vma_is_closed(vma))) From 34dc8993eef63681b062871413a9484008a2a78f Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 15 Feb 2017 15:52:59 +0200 Subject: [PATCH 116/384] drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain Baytrails, namely the 4 cpu core variants, have been plaqued by spurious system hangs, mostly occurring with light loads. Multiple bisects by various people point to a commit which changes the reclocking strategy for Baytrail to follow its bigger brethen: commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail") There is also a review comment attached to this commit from Deepak S on avoiding punit access on Cherryview and thus it was excluded on common reclocking path. By taking the same approach and omitting the punit access by not tweaking the thresholds when the hardware has been asked to move into different frequency, considerable gains in stability have been observed. With J1900 box, light render/video load would end up in system hang in usually less than 12 hours. With this patch applied, the cumulative uptime has now been 34 days without issues. To provoke system hang, light loads on both render and bsd engines in parallel have been used: glxgears >/dev/null 2>/dev/null & mpv --vo=vaapi --hwdec=vaapi --loop=inf vid.mp4 So far, author has not witnessed system hang with above load and this patch applied. Reports from the tenacious people at kernel bugzilla are also promising. Considering that the punit access frequency with this patch is considerably less, there is a possibility that this will push the, still unknown, root cause past the triggering point on most loads. But as we now can reliably reproduce the hang independently, we can reduce the pain that users are having and use a static thresholds until a root cause is found. v3: don't break debugfs and simplification (Chris Wilson) References: https://bugzilla.kernel.org/show_bug.cgi?id=109051 Cc: Chris Wilson Cc: Ville Syrjälä Cc: Len Brown Cc: Daniel Vetter Cc: Jani Nikula Cc: fritsch@xbmc.org Cc: miku@iki.fi Cc: Ezequiel Garcia CC: Michal Feix Cc: Hans de Goede Cc: Deepak S Cc: Jarkko Nikula Cc: # v4.2+ Acked-by: Daniel Vetter Acked-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1487166779-26945-1-git-send-email-mika.kuoppala@intel.com (cherry picked from commit 6067a27d1f0184596d51decbac1c1fdc4acb012f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249623d45be0..65cd4c56c9dd 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4891,6 +4891,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) break; } + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(dev_priv)) + goto skip_hw_write; + I915_WRITE(GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(dev_priv, ei_up)); I915_WRITE(GEN6_RP_UP_THRESHOLD, @@ -4911,6 +4917,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); +skip_hw_write: dev_priv->rps.power = new_power; dev_priv->rps.up_threshold = threshold_up; dev_priv->rps.down_threshold = threshold_down; From d253371c4c2f5fc2d884ef25f64decd7549aff5a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 24 Feb 2017 16:32:10 +0200 Subject: [PATCH 117/384] drm/i915/gen9: Increase PCODE request timeout to 50ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 2c7d0602c815277f7cb7c932b091288710d8aba7 Author: Imre Deak Date: Mon Dec 5 18:27:37 2016 +0200 drm/i915/gen9: Fix PCODE polling during CDCLK change notification there is still one report of the CDCLK-change request timing out on a KBL machine, see the Reference link. On that machine the maximum time the request took to succeed was 34ms, so increase the timeout to 50ms. v2: - Change timeout from 100 to 50 ms to maintain the current 50 ms limit for atomic waits in the driver. (Chris, Tvrtko) Reference: https://bugs.freedesktop.org/show_bug.cgi?id=99345 Cc: Ville Syrjälä Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Signed-off-by: Imre Deak Acked-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1487946730-17162-1-git-send-email-imre.deak@intel.com (cherry picked from commit 0129936ddda26afd5d9d207c4e86b2425952579f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 65cd4c56c9dd..940bab22d464 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7923,10 +7923,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, * @timeout_base_ms: timeout for polling with preemption enabled * * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE - * reports an error or an overall timeout of @timeout_base_ms+10 ms expires. + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. * The request is acknowledged once the PCODE reply dword equals @reply after * applying @reply_mask. Polling is first attempted with preemption enabled - * for @timeout_base_ms and if this times out for another 10 ms with + * for @timeout_base_ms and if this times out for another 50 ms with * preemption disabled. * * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some @@ -7962,14 +7962,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, * worst case) _and_ PCODE was busy for some reason even after a * (queued) request and @timeout_base_ms delay. As a workaround retry * the poll with preemption disabled to maximize the number of - * requests. Increase the timeout from @timeout_base_ms to 10ms to + * requests. Increase the timeout from @timeout_base_ms to 50ms to * account for interrupts that could reduce the number of these - * requests. + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. */ DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); - ret = wait_for_atomic(COND, 10); + ret = wait_for_atomic(COND, 50); preempt_enable(); out: From 38230243ef316ac696956d75dc78a22e3aa789b9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 28 Feb 2017 15:28:47 +0100 Subject: [PATCH 118/384] drm/i915: Move updating color management to before vblank evasion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This cannot be done reliably during vblank evasasion since the color management registers are not double buffered. The original commit that moved it always during vblank evasion was wrong, so revert it to before vblank evasion again. Signed-off-by: Maarten Lankhorst Fixes: 20a34e78f0d7 ("drm/i915: Update color management during vblank evasion.") Cc: stable@vger.kernel.org # v4.7+ Link: http://patchwork.freedesktop.org/patch/msgid/1488292128-14540-1-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä (cherry picked from commit 567f0792a6ad11c0c2620944b8eeb777359fb85a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 01341670738f..9a8b6a13233d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14946,17 +14946,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, to_intel_atomic_state(old_crtc_state->state); bool modeset = needs_modeset(crtc->state); + if (!modeset && + (intel_cstate->base.color_mgmt_changed || + intel_cstate->update_pipe)) { + intel_color_set_csc(crtc->state); + intel_color_load_luts(crtc->state); + } + /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_crtc); if (modeset) goto out; - if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) { - intel_color_set_csc(crtc->state); - intel_color_load_luts(crtc->state); - } - if (intel_cstate->update_pipe) intel_update_pipe_config(intel_crtc, old_intel_cstate); else if (INTEL_GEN(dev_priv) >= 9) From 0d9dc306e15b59bf50db87ebcb1e2248586d4733 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Mar 2017 13:20:31 +0000 Subject: [PATCH 119/384] drm/i915: Store a permanent error in obj->mm.pages Once the object has been truncated, it is unrecoverable. To facilitate detection of this state store the error in obj->mm.pages. This is required for the next patch which should be applied to v4.10 (via stable), so we also need to mark this patch for backporting. In that regard, let's consider this to be a fix/improvement too. v2: Avoid dereferencing the ERR_PTR when freeing the object. Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: # v4.10+ Link: http://patchwork.freedesktop.org/patch/msgid/20170307132031.32461-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit 4e5462ee843c883790e9609cf560d88960ea4227) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c45af09555dc..3591e8656ff9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2119,6 +2119,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) */ shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); obj->mm.madv = __I915_MADV_PURGED; + obj->mm.pages = ERR_PTR(-EFAULT); } /* Try to discard unwanted pages */ @@ -2218,7 +2219,9 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, __i915_gem_object_reset_page_iter(obj); - obj->ops->put_pages(obj, pages); + if (!IS_ERR(pages)) + obj->ops->put_pages(obj, pages); + unlock: mutex_unlock(&obj->mm.lock); } @@ -2437,7 +2440,7 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (err) return err; - if (unlikely(!obj->mm.pages)) { + if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { err = ____i915_gem_object_get_pages(obj); if (err) goto unlock; @@ -2515,7 +2518,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, pinned = true; if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { - if (unlikely(!obj->mm.pages)) { + if (unlikely(IS_ERR_OR_NULL(obj->mm.pages))) { ret = ____i915_gem_object_get_pages(obj); if (ret) goto err_unlock; From 4e6fdafa7ac395ad47a80a0e7b4fd1e11550f862 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Mar 2017 12:03:38 +0000 Subject: [PATCH 120/384] drm/i915: Use pagecache write to prepopulate shmemfs from pwrite-ioctl Before we instantiate/pin the backing store for our use, we can prepopulate the shmemfs filp efficiently using a write into the pagecache. We avoid the penalty of instantiating all the pages, important if the user is just writing to a few and never uses the object on the GPU, and using a direct write into shmemfs allows it to avoid the cost of retrieving a page (mostly the clear-before-use, but in theory we could curtail swapin) before it is overwritten. This can be extended later to provide additional specialisation for other backends (other than shmemfs). For now it provides a defense against very large write-only allocations from exhausting all of system memory. v2: Smelling fixes. Fixes: fe115628d567 ("drm/i915: Implement pwrite without struct-mutex") References: https://bugs.freedesktop.org/show_bug.cgi?id=99107 Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: # v4.10+ Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170307120338.7277-2-chris@chris-wilson.co.uk (cherry picked from commit 7c55e2c5772dcf3cbacd0fa2bcfeefae416b73f7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 78 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_object.h | 3 + 2 files changed, 81 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3591e8656ff9..10777da73039 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1434,6 +1434,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pwrite(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pwrite) + ret = obj->ops->pwrite(obj, args); + if (ret != -ENODEV) + goto err; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, @@ -2566,6 +2572,75 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, goto out_unlock; } +static int +i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *arg) +{ + struct address_space *mapping = obj->base.filp->f_mapping; + char __user *user_data = u64_to_user_ptr(arg->data_ptr); + u64 remain, offset; + unsigned int pg; + + /* Before we instantiate/pin the backing store for our use, we + * can prepopulate the shmemfs filp efficiently using a write into + * the pagecache. We avoid the penalty of instantiating all the + * pages, important if the user is just writing to a few and never + * uses the object on the GPU, and using a direct write into shmemfs + * allows it to avoid the cost of retrieving a page (either swapin + * or clearing-before-use) before it is overwritten. + */ + if (READ_ONCE(obj->mm.pages)) + return -ENODEV; + + /* Before the pages are instantiated the object is treated as being + * in the CPU domain. The pages will be clflushed as required before + * use, and we can freely write into the pages directly. If userspace + * races pwrite with any other operation; corruption will ensue - + * that is userspace's prerogative! + */ + + remain = arg->size; + offset = arg->offset; + pg = offset_in_page(offset); + + do { + unsigned int len, unwritten; + struct page *page; + void *data, *vaddr; + int err; + + len = PAGE_SIZE - pg; + if (len > remain) + len = remain; + + err = pagecache_write_begin(obj->base.filp, mapping, + offset, len, 0, + &page, &data); + if (err < 0) + return err; + + vaddr = kmap(page); + unwritten = copy_from_user(vaddr + pg, user_data, len); + kunmap(page); + + err = pagecache_write_end(obj->base.filp, mapping, + offset, len, len - unwritten, + page, data); + if (err < 0) + return err; + + if (unwritten) + return -EFAULT; + + remain -= len; + user_data += len; + offset += len; + pg = 0; + } while (remain); + + return 0; +} + static bool ban_context(const struct i915_gem_context *ctx) { return (i915_gem_context_is_bannable(ctx) && @@ -3987,8 +4062,11 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = i915_gem_object_get_pages_gtt, .put_pages = i915_gem_object_put_pages_gtt, + + .pwrite = i915_gem_object_pwrite_gtt, }; struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index bf90b07163d1..76b80a0be797 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -54,6 +54,9 @@ struct drm_i915_gem_object_ops { struct sg_table *(*get_pages)(struct drm_i915_gem_object *); void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); + int (*pwrite)(struct drm_i915_gem_object *, + const struct drm_i915_gem_pwrite *); + int (*dmabuf_export)(struct drm_i915_gem_object *); void (*release)(struct drm_i915_gem_object *); }; From edd06b8353772dca7afcd4640dafa83b521edd55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Mar 2017 22:54:19 +0200 Subject: [PATCH 121/384] drm/i915: Nuke debug messages from the pipe update critical section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit printks are slow so we should not be doing them from the vblank evade critical section. These could explain why we sometimes seem to blow past our 100 usec deadline. The problem has been there ever since commit bfd16b2a23dc ("drm/i915: Make updating pipe without modeset atomic.") but it may not have been readily visible until commit e1edbd44e23b ("drm/i915: Complain if we take too long under vblank evasion.") increased our chances of noticing it. Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Fixes: bfd16b2a23dc ("drm/i915: Make updating pipe without modeset atomic.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170307205419.19447-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst (cherry picked from commit c3f8ad57a01a31397e5a0349a226a32f35ddc19c) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9a8b6a13233d..b3e0cd133b49 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3669,10 +3669,6 @@ static void intel_update_pipe_config(struct intel_crtc *crtc, /* drm_atomic_helper_update_legacy_modeset_state might not be called. */ crtc->base.mode = crtc->base.state->mode; - DRM_DEBUG_KMS("Updating pipe size %ix%i -> %ix%i\n", - old_crtc_state->pipe_src_w, old_crtc_state->pipe_src_h, - pipe_config->pipe_src_w, pipe_config->pipe_src_h); - /* * Update pipe size and adjust fitter if needed: the reason for this is * that in compute_mode_changes we check the native mode (not the pfit @@ -4796,23 +4792,17 @@ static void skylake_pfit_enable(struct intel_crtc *crtc) struct intel_crtc_scaler_state *scaler_state = &crtc->config->scaler_state; - DRM_DEBUG_KMS("for crtc_state = %p\n", crtc->config); - if (crtc->config->pch_pfit.enabled) { int id; - if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) { - DRM_ERROR("Requesting pfit without getting a scaler first\n"); + if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) return; - } id = scaler_state->scaler_id; I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN | PS_FILTER_MEDIUM | scaler_state->scalers[id].mode); I915_WRITE(SKL_PS_WIN_POS(pipe, id), crtc->config->pch_pfit.pos); I915_WRITE(SKL_PS_WIN_SZ(pipe, id), crtc->config->pch_pfit.size); - - DRM_DEBUG_KMS("for crtc_state = %p scaler_id = %d\n", crtc->config, id); } } From 5a8cf90d743f2d05433c6109f6c1b9b904b0cdb7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Feb 2017 20:47:41 +0000 Subject: [PATCH 122/384] drm/i915: Drain the freed state from the tail of the next commit If we have any residual freed atomic state from earlier commits, flush the freed list after performing the current modeset. This prevents the freed list from ever-growing if userspace manages to starve the kernel threads (i.e. we are never able to run our free state worker and eventually the system may even oom). Fixes: 6f0f02dc56f1 ("drm/i915: Move atomic state free from out of fence release") Testcase: igt/kms_cursor/legacy/all-pipes-single-bo Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170202204741.18231-1-chris@chris-wilson.co.uk Reviewed-by: Maarten Lankhorst (cherry picked from commit ba318c61a9719577b6f451c055f364e4116874b2) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++++----------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b3e0cd133b49..3282b0f4b134 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14369,6 +14369,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state, } while (progress); } +static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) +{ + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + +static void intel_atomic_helper_free_state_worker(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + + intel_atomic_helper_free_state(dev_priv); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -14535,6 +14553,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * can happen also when the device is completely off. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + + intel_atomic_helper_free_state(dev_priv); } static void intel_atomic_commit_work(struct work_struct *work) @@ -16591,18 +16611,6 @@ static void sanitize_watermarks(struct drm_device *dev) drm_modeset_acquire_fini(&ctx); } -static void intel_atomic_helper_free_state(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), atomic_helper.free_work); - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16623,7 +16631,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state); + intel_atomic_helper_free_state_worker); intel_init_quirks(dev); From db08e1d53034a54fe177ced70476fda73954b9e9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 21 Feb 2017 13:41:31 +1100 Subject: [PATCH 123/384] powerpc/powernv/ioda2: Update iommu table base on ownership change On POWERNV platform, in order to do DMA via IOMMU (i.e. 32bit DMA in our case), a device needs an iommu_table pointer set via set_iommu_table_base(). The codeflow is: - pnv_pci_ioda2_setup_dma_pe() - pnv_pci_ioda2_setup_default_config() - pnv_ioda_setup_bus_dma() [1] pnv_pci_ioda2_setup_dma_pe() creates IOMMU groups, pnv_pci_ioda2_setup_default_config() does default DMA setup, pnv_ioda_setup_bus_dma() takes a bus PE (on IODA2, all physical function PEs as bus PEs except NPU), walks through all underlying buses and devices, adds all devices to an IOMMU group and sets iommu_table. On IODA2, when VFIO is used, it takes ownership over a PE which means it removes all tables and creates new ones (with a possibility of sharing them among PEs). So when the ownership is returned from VFIO to the kernel, the iommu_table pointer written to a device at [1] is stale and needs an update. This adds an "add_to_group" parameter to pnv_ioda_setup_bus_dma() (in fact re-adds as it used to be there a while ago for different reasons) to tell the helper if a device needs to be added to an IOMMU group with an iommu_table update or just the latter. This calls pnv_ioda_setup_bus_dma(..., false) from pnv_ioda2_release_ownership() so when the ownership is restored, 32bit DMA can work again for a device. This does the same thing on obtaining ownership as the iommu_table point is stale at this point anyway and it is safer to have NULL there. We did not hit this earlier as all tested devices in recent years were only using 64bit DMA; the rare exception for this is MPT3 SAS adapter which uses both 32bit and 64bit DMA access and it has not been tested with VFIO much. Signed-off-by: Alexey Kardashevskiy Acked-by: Gavin Shan Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 957a57a6c812..e36738291c32 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1775,17 +1775,20 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev) } static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, - struct pci_bus *bus) + struct pci_bus *bus, + bool add_to_group) { struct pci_dev *dev; list_for_each_entry(dev, &bus->devices, bus_list) { set_iommu_table_base(&dev->dev, pe->table_group.tables[0]); set_dma_offset(&dev->dev, pe->tce_bypass_base); - iommu_add_device(&dev->dev); + if (add_to_group) + iommu_add_device(&dev->dev); if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate) - pnv_ioda_setup_bus_dma(pe, dev->subordinate); + pnv_ioda_setup_bus_dma(pe, dev->subordinate, + add_to_group); } } @@ -2191,7 +2194,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, set_iommu_table_base(&pe->pdev->dev, tbl); iommu_add_device(&pe->pdev->dev); } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); return; fail: @@ -2426,6 +2429,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) pnv_pci_ioda2_set_bypass(pe, false); pnv_pci_ioda2_unset_window(&pe->table_group, 0); + if (pe->pbus) + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); pnv_ioda2_table_free(tbl); } @@ -2435,6 +2440,8 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) table_group); pnv_pci_ioda2_setup_default_config(pe); + if (pe->pbus) + pnv_ioda_setup_bus_dma(pe, pe->pbus, false); } static struct iommu_table_group_ops pnv_pci_ioda2_ops = { @@ -2731,7 +2738,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, if (pe->flags & PNV_IODA_PE_DEV) iommu_add_device(&pe->pdev->dev); else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) - pnv_ioda_setup_bus_dma(pe, pe->pbus); + pnv_ioda_setup_bus_dma(pe, pe->pbus, true); } #ifdef CONFIG_PCI_MSI From 28b62b1458685d8f68f67d9b2d511bf8fa32b746 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 8 Mar 2017 23:14:20 +0200 Subject: [PATCH 124/384] crypto: s5p-sss - Fix spinlock recursion on LRW(AES) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Running TCRYPT with LRW compiled causes spinlock recursion: testing speed of async lrw(aes) (lrw(ecb-aes-s5p)) encryption tcrypt: test 0 (256 bit key, 16 byte blocks): 19007 operations in 1 seconds (304112 bytes) tcrypt: test 1 (256 bit key, 64 byte blocks): 15753 operations in 1 seconds (1008192 bytes) tcrypt: test 2 (256 bit key, 256 byte blocks): 14293 operations in 1 seconds (3659008 bytes) tcrypt: test 3 (256 bit key, 1024 byte blocks): 11906 operations in 1 seconds (12191744 bytes) tcrypt: test 4 (256 bit key, 8192 byte blocks): BUG: spinlock recursion on CPU#1, irq/84-10830000/89  lock: 0xeea99a68, .magic: dead4ead, .owner: irq/84-10830000/89, .owner_cpu: 1 CPU: 1 PID: 89 Comm: irq/84-10830000 Not tainted 4.11.0-rc1-00001-g897ca6d0800d #559 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x78/0x8c) [] (dump_stack) from [] (do_raw_spin_lock+0x11c/0x120) [] (do_raw_spin_lock) from [] (_raw_spin_lock_irqsave+0x20/0x28) [] (_raw_spin_lock_irqsave) from [] (s5p_aes_crypt+0x2c/0xb4) [] (s5p_aes_crypt) from [] (do_encrypt+0x78/0xb0 [lrw]) [] (do_encrypt [lrw]) from [] (encrypt_done+0x24/0x54 [lrw]) [] (encrypt_done [lrw]) from [] (s5p_aes_complete+0x60/0xcc) [] (s5p_aes_complete) from [] (s5p_aes_interrupt+0x134/0x1a0) [] (s5p_aes_interrupt) from [] (irq_thread_fn+0x1c/0x54) [] (irq_thread_fn) from [] (irq_thread+0x12c/0x1e0) [] (irq_thread) from [] (kthread+0x108/0x138) [] (kthread) from [] (ret_from_fork+0x14/0x3c) Interrupt handling routine was calling req->base.complete() under spinlock. In most cases this wasn't fatal but when combined with some of the cipher modes (like LRW) this caused recursion - starting the new encryption (s5p_aes_crypt()) while still holding the spinlock from previous round (s5p_aes_complete()). Beside that, the s5p_aes_interrupt() error handling path could execute two completions in case of error for RX and TX blocks. Rewrite the interrupt handling routine and the completion by: 1. Splitting the operations on scatterlist copies from s5p_aes_complete() into separate s5p_sg_done(). This still should be done under lock. The s5p_aes_complete() now only calls req->base.complete() and it has to be called outside of lock. 2. Moving the s5p_aes_complete() out of spinlock critical sections. In interrupt service routine s5p_aes_interrupts(), it appeared in few places, including error paths inside other functions called from ISR. This code was not so obvious to read so simplify it by putting the s5p_aes_complete() only within ISR level. Reported-by: Nathan Royce Cc: # v4.10.x: 07de4bc88c crypto: s5p-sss - Fix completing Cc: # v4.10.x Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- drivers/crypto/s5p-sss.c | 127 +++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 45 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index a668286d62cb..1b9da3dc799b 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -270,7 +270,7 @@ static void s5p_sg_copy_buf(void *buf, struct scatterlist *sg, scatterwalk_done(&walk, out, 0); } -static void s5p_aes_complete(struct s5p_aes_dev *dev, int err) +static void s5p_sg_done(struct s5p_aes_dev *dev) { if (dev->sg_dst_cpy) { dev_dbg(dev->dev, @@ -281,8 +281,11 @@ static void s5p_aes_complete(struct s5p_aes_dev *dev, int err) } s5p_free_sg_cpy(dev, &dev->sg_src_cpy); s5p_free_sg_cpy(dev, &dev->sg_dst_cpy); +} - /* holding a lock outside */ +/* Calls the completion. Cannot be called with dev->lock hold. */ +static void s5p_aes_complete(struct s5p_aes_dev *dev, int err) +{ dev->req->base.complete(&dev->req->base, err); dev->busy = false; } @@ -368,51 +371,44 @@ static int s5p_set_indata(struct s5p_aes_dev *dev, struct scatterlist *sg) } /* - * Returns true if new transmitting (output) data is ready and its - * address+length have to be written to device (by calling - * s5p_set_dma_outdata()). False otherwise. + * Returns -ERRNO on error (mapping of new data failed). + * On success returns: + * - 0 if there is no more data, + * - 1 if new transmitting (output) data is ready and its address+length + * have to be written to device (by calling s5p_set_dma_outdata()). */ -static bool s5p_aes_tx(struct s5p_aes_dev *dev) +static int s5p_aes_tx(struct s5p_aes_dev *dev) { - int err = 0; - bool ret = false; + int ret = 0; s5p_unset_outdata(dev); if (!sg_is_last(dev->sg_dst)) { - err = s5p_set_outdata(dev, sg_next(dev->sg_dst)); - if (err) - s5p_aes_complete(dev, err); - else - ret = true; - } else { - s5p_aes_complete(dev, err); - - dev->busy = true; - tasklet_schedule(&dev->tasklet); + ret = s5p_set_outdata(dev, sg_next(dev->sg_dst)); + if (!ret) + ret = 1; } return ret; } /* - * Returns true if new receiving (input) data is ready and its - * address+length have to be written to device (by calling - * s5p_set_dma_indata()). False otherwise. + * Returns -ERRNO on error (mapping of new data failed). + * On success returns: + * - 0 if there is no more data, + * - 1 if new receiving (input) data is ready and its address+length + * have to be written to device (by calling s5p_set_dma_indata()). */ -static bool s5p_aes_rx(struct s5p_aes_dev *dev) +static int s5p_aes_rx(struct s5p_aes_dev *dev/*, bool *set_dma*/) { - int err; - bool ret = false; + int ret = 0; s5p_unset_indata(dev); if (!sg_is_last(dev->sg_src)) { - err = s5p_set_indata(dev, sg_next(dev->sg_src)); - if (err) - s5p_aes_complete(dev, err); - else - ret = true; + ret = s5p_set_indata(dev, sg_next(dev->sg_src)); + if (!ret) + ret = 1; } return ret; @@ -422,33 +418,73 @@ static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id) { struct platform_device *pdev = dev_id; struct s5p_aes_dev *dev = platform_get_drvdata(pdev); - bool set_dma_tx = false; - bool set_dma_rx = false; + int err_dma_tx = 0; + int err_dma_rx = 0; + bool tx_end = false; unsigned long flags; uint32_t status; + int err; spin_lock_irqsave(&dev->lock, flags); + /* + * Handle rx or tx interrupt. If there is still data (scatterlist did not + * reach end), then map next scatterlist entry. + * In case of such mapping error, s5p_aes_complete() should be called. + * + * If there is no more data in tx scatter list, call s5p_aes_complete() + * and schedule new tasklet. + */ status = SSS_READ(dev, FCINTSTAT); if (status & SSS_FCINTSTAT_BRDMAINT) - set_dma_rx = s5p_aes_rx(dev); - if (status & SSS_FCINTSTAT_BTDMAINT) - set_dma_tx = s5p_aes_tx(dev); + err_dma_rx = s5p_aes_rx(dev); + + if (status & SSS_FCINTSTAT_BTDMAINT) { + if (sg_is_last(dev->sg_dst)) + tx_end = true; + err_dma_tx = s5p_aes_tx(dev); + } SSS_WRITE(dev, FCINTPEND, status); - /* - * Writing length of DMA block (either receiving or transmitting) - * will start the operation immediately, so this should be done - * at the end (even after clearing pending interrupts to not miss the - * interrupt). - */ - if (set_dma_tx) - s5p_set_dma_outdata(dev, dev->sg_dst); - if (set_dma_rx) - s5p_set_dma_indata(dev, dev->sg_src); + if (err_dma_rx < 0) { + err = err_dma_rx; + goto error; + } + if (err_dma_tx < 0) { + err = err_dma_tx; + goto error; + } + if (tx_end) { + s5p_sg_done(dev); + + spin_unlock_irqrestore(&dev->lock, flags); + + s5p_aes_complete(dev, 0); + dev->busy = true; + tasklet_schedule(&dev->tasklet); + } else { + /* + * Writing length of DMA block (either receiving or + * transmitting) will start the operation immediately, so this + * should be done at the end (even after clearing pending + * interrupts to not miss the interrupt). + */ + if (err_dma_tx == 1) + s5p_set_dma_outdata(dev, dev->sg_dst); + if (err_dma_rx == 1) + s5p_set_dma_indata(dev, dev->sg_src); + + spin_unlock_irqrestore(&dev->lock, flags); + } + + return IRQ_HANDLED; + +error: + s5p_sg_done(dev); spin_unlock_irqrestore(&dev->lock, flags); + s5p_aes_complete(dev, err); return IRQ_HANDLED; } @@ -597,8 +633,9 @@ static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) s5p_unset_indata(dev); indata_error: - s5p_aes_complete(dev, err); + s5p_sg_done(dev); spin_unlock_irqrestore(&dev->lock, flags); + s5p_aes_complete(dev, err); } static void s5p_tasklet_cb(unsigned long data) From 6022c5cadf1a43ca30f431f128daa6163909ad60 Mon Sep 17 00:00:00 2001 From: Yuantian Tang Date: Thu, 9 Mar 2017 17:13:29 +0800 Subject: [PATCH 125/384] ahci: qoriq: correct the sata ecc setting error Sata ecc is controlled by only 1 bit which is 24bit in big-endian in ecc register. So only setting 24bit to disable sata ecc prevents other bits from being overwritten in ecc register. Signed-off-by: Tang Yuantian Signed-off-by: Tejun Heo --- drivers/ata/ahci_qoriq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c index 85d833289f28..4c96f3ac4976 100644 --- a/drivers/ata/ahci_qoriq.c +++ b/drivers/ata/ahci_qoriq.c @@ -177,7 +177,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) case AHCI_LS1043A: if (!qpriv->ecc_addr) return -EINVAL; - writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr); + writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2, + qpriv->ecc_addr); writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) @@ -194,7 +195,8 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv) case AHCI_LS1046A: if (!qpriv->ecc_addr) return -EINVAL; - writel(ECC_DIS_ARMV8_CH2, qpriv->ecc_addr); + writel(readl(qpriv->ecc_addr) | ECC_DIS_ARMV8_CH2, + qpriv->ecc_addr); writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1); writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS); if (qpriv->is_dmacoherent) From 6d399783e9d4e9bd44931501948059d24ad96ff8 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 23 Feb 2017 12:26:41 -0800 Subject: [PATCH 126/384] md/raid10: submit bio directly to replacement disk Commit 57c67df(md/raid10: submit IO from originating thread instead of md thread) submits bio directly for normal disks but not for replacement disks. There is no point we shouldn't do this for replacement disks. Cc: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 063c43d83b72..1443305613c5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1477,11 +1477,24 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio, mbio->bi_bdev = (void*)rdev; atomic_inc(&r10_bio->remaining); + + cb = blk_check_plugged(raid10_unplug, mddev, + sizeof(*plug)); + if (cb) + plug = container_of(cb, struct raid10_plug_cb, + cb); + else + plug = NULL; spin_lock_irqsave(&conf->device_lock, flags); - bio_list_add(&conf->pending_bio_list, mbio); - conf->pending_count++; + if (plug) { + bio_list_add(&plug->pending, mbio); + plug->pending_cnt++; + } else { + bio_list_add(&conf->pending_bio_list, mbio); + conf->pending_count++; + } spin_unlock_irqrestore(&conf->device_lock, flags); - if (!mddev_check_plugged(mddev)) + if (!plug) md_wakeup_thread(mddev->thread); } } From 99b3d74ec05c4a4c57766a90d65b53d78ab06404 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 23 Feb 2017 12:31:10 -0800 Subject: [PATCH 127/384] md: delete dead code Nobody is using mddev_check_plugged(), so delete the dead code Signed-off-by: Shaohua Li --- drivers/md/md.c | 8 -------- drivers/md/md.h | 6 ------ 2 files changed, 14 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 548d1b8014f8..82bd1f3d2b19 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -440,14 +440,6 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) } EXPORT_SYMBOL(md_flush_request); -void md_unplug(struct blk_plug_cb *cb, bool from_schedule) -{ - struct mddev *mddev = cb->data; - md_wakeup_thread(mddev->thread); - kfree(cb); -} -EXPORT_SYMBOL(md_unplug); - static inline struct mddev *mddev_get(struct mddev *mddev) { atomic_inc(&mddev->active); diff --git a/drivers/md/md.h b/drivers/md/md.h index b8859cbf84b6..dde8ecb760c8 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -676,16 +676,10 @@ extern void mddev_resume(struct mddev *mddev); extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, struct mddev *mddev); -extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule); extern void md_reload_sb(struct mddev *mddev, int raid_disk); extern void md_update_sb(struct mddev *mddev, int force); extern void md_kick_rdev_from_array(struct md_rdev * rdev); struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); -static inline int mddev_check_plugged(struct mddev *mddev) -{ - return !!blk_check_plugged(md_unplug, mddev, - sizeof(struct blk_plug_cb)); -} static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) { From 9c8043f337f14d1743006dfc59c03e80a42e3884 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 24 Feb 2017 11:15:12 +0800 Subject: [PATCH 128/384] md-cluster: free md_cluster_info if node leave cluster To avoid memory leak, we need to free the cinfo which is allocated when node join cluster. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md-cluster.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 2b13117fb918..ba7edcdd09ce 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -974,6 +974,7 @@ static int leave(struct mddev *mddev) lockres_free(cinfo->bitmap_lockres); unlock_all_bitmaps(mddev); dlm_release_lockspace(cinfo->lockspace, 2); + kfree(cinfo); return 0; } From 75df023f4f2188c21181996e28234fef9351ef45 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 24 Feb 2017 11:15:13 +0800 Subject: [PATCH 129/384] md-cluster: remove useless memset from gather_all_resync_info This memset is not needed. The lvb is already zeroed because it was recently allocated by lockres_init, which uses kzalloc(), and read_resync_info() doesn't need it to be zero anyway. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md-cluster.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index ba7edcdd09ce..321ecac23027 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -777,7 +777,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) bm_lockres->flags |= DLM_LKF_NOQUEUE; ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); if (ret == -EAGAIN) { - memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE); s = read_resync_info(mddev, bm_lockres); if (s) { pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n", From c94836342192b05d599d6aa3397f732f7a238689 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 24 Feb 2017 11:15:23 +0800 Subject: [PATCH 130/384] md: move funcs from pers->resize to update_size raid1_resize and raid5_resize should also check the mddev->queue if run underneath dm-raid. And both set_capacity and revalidate_disk are used in pers->resize such as raid1, raid10 and raid5. So move them from personality file to common code. Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li --- drivers/md/md.c | 8 ++++++-- drivers/md/raid1.c | 2 -- drivers/md/raid10.c | 4 ---- drivers/md/raid5.c | 2 -- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 82bd1f3d2b19..bd15a18485c8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6525,8 +6525,12 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) return -ENOSPC; } rv = mddev->pers->resize(mddev, num_sectors); - if (!rv) - revalidate_disk(mddev->gendisk); + if (!rv) { + if (mddev->queue) { + set_capacity(mddev->gendisk, mddev->array_sectors); + revalidate_disk(mddev->gendisk); + } + } return rv; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fbc2d7851b49..10c3865e1186 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3246,8 +3246,6 @@ static int raid1_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, newsize); - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) { mddev->recovery_cp = mddev->dev_sectors; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 1443305613c5..c4db6d1fb6a2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3956,10 +3956,6 @@ static int raid10_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, size); - if (mddev->queue) { - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); - } if (sectors > mddev->dev_sectors && mddev->recovery_cp > oldsize) { mddev->recovery_cp = oldsize; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4fb09b3fcb41..6bfedfcf41c1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7605,8 +7605,6 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) return ret; } md_set_array_sectors(mddev, newsize); - set_capacity(mddev->gendisk, mddev->array_sectors); - revalidate_disk(mddev->gendisk); if (sectors > mddev->dev_sectors && mddev->recovery_cp > mddev->dev_sectors) { mddev->recovery_cp = mddev->dev_sectors; From 1b3bae49fba52f1ec499c36c53bc07761a9f6c4d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 1 Mar 2017 07:31:28 +1100 Subject: [PATCH 131/384] md: don't impose the MD_SB_DISKS limit on arrays without metadata. These arrays, created with "mdadm --build" don't benefit from a limit. The default will be used, which is '0' and is interpreted as "don't impose a limit". Reported-by: ian_bruce@mail.ru Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index bd15a18485c8..cd89ad3c3a0d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6450,11 +6450,10 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info) mddev->layout = info->layout; mddev->chunk_sectors = info->chunk_size >> 9; - mddev->max_disks = MD_SB_DISKS; - if (mddev->persistent) { - mddev->flags = 0; - mddev->sb_flags = 0; + mddev->max_disks = MD_SB_DISKS; + mddev->flags = 0; + mddev->sb_flags = 0; } set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); From 61eb2b43b99ebdc9bc6bc83d9792257b243e7cb3 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 28 Feb 2017 13:00:20 -0800 Subject: [PATCH 132/384] md/raid1/10: fix potential deadlock Neil Brown pointed out a potential deadlock in raid 10 code with bio_split/chain. The raid1 code could have the same issue, but recent barrier rework makes it less likely to happen. The deadlock happens in below sequence: 1. generic_make_request(bio), this will set current->bio_list 2. raid10_make_request will split bio to bio1 and bio2 3. __make_request(bio1), wait_barrer, add underlayer disk bio to current->bio_list 4. __make_request(bio2), wait_barrer If raise_barrier happens between 3 & 4, since wait_barrier runs at 3, raise_barrier waits for IO completion from 3. And since raise_barrier sets barrier, 4 waits for raise_barrier. But IO from 3 can't be dispatched because raid10_make_request() doesn't finished yet. The solution is to adjust the IO ordering. Quotes from Neil: " It is much safer to: if (need to split) { split = bio_split(bio, ...) bio_chain(...) make_request_fn(split); generic_make_request(bio); } else make_request_fn(mddev, bio); This way we first process the initial section of the bio (in 'split') which will queue some requests to the underlying devices. These requests will be queued in generic_make_request. Then we queue the remainder of the bio, which will be added to the end of the generic_make_request queue. Then we return. generic_make_request() will pop the lower-level device requests off the queue and handle them first. Then it will process the remainder of the original bio once the first section has been fully processed. " Note, this only happens in read path. In write path, the bio is flushed to underlaying disks either by blk flush (from schedule) or offladed to raid1/10d. It's queued in current->bio_list. Cc: Coly Li Cc: stable@vger.kernel.org (v3.14+, only the raid10 part) Suggested-by: NeilBrown Reviewed-by: Jack Wang Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 25 +++++++++++++++++++++++-- drivers/md/raid10.c | 18 ++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 10c3865e1186..c33e96e33b8e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1587,9 +1587,30 @@ static void raid1_make_request(struct mddev *mddev, struct bio *bio) split = bio; } - if (bio_data_dir(split) == READ) + if (bio_data_dir(split) == READ) { raid1_read_request(mddev, split); - else + + /* + * If a bio is splitted, the first part of bio will + * pass barrier but the bio is queued in + * current->bio_list (see generic_make_request). If + * there is a raise_barrier() called here, the second + * part of bio can't pass barrier. But since the first + * part bio isn't dispatched to underlaying disks yet, + * the barrier is never released, hence raise_barrier + * will alays wait. We have a deadlock. + * Note, this only happens in read path. For write + * path, the first part of bio is dispatched in a + * schedule() call (because of blk plug) or offloaded + * to raid10d. + * Quitting from the function immediately can change + * the bio order queued in bio_list and avoid the deadlock. + */ + if (split != bio) { + generic_make_request(bio); + break; + } + } else raid1_write_request(mddev, split); } while (split != bio); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c4db6d1fb6a2..b1b1f982a722 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1584,7 +1584,25 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) split = bio; } + /* + * If a bio is splitted, the first part of bio will pass + * barrier but the bio is queued in current->bio_list (see + * generic_make_request). If there is a raise_barrier() called + * here, the second part of bio can't pass barrier. But since + * the first part bio isn't dispatched to underlaying disks + * yet, the barrier is never released, hence raise_barrier will + * alays wait. We have a deadlock. + * Note, this only happens in read path. For write path, the + * first part of bio is dispatched in a schedule() call + * (because of blk plug) or offloaded to raid10d. + * Quitting from the function immediately can change the bio + * order queued in bio_list and avoid the deadlock. + */ __make_request(mddev, split); + if (split != bio && bio_data_dir(bio) == READ) { + generic_make_request(bio); + break; + } } while (split != bio); /* In case raid10d snuck in to freeze_array */ From 5be083cedc087b2d457ef2e9c2d5698c94d3d5e4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 6 Mar 2017 15:57:31 -0800 Subject: [PATCH 133/384] net: ipv6: Remove redundant RTA_OIF in multipath routes Dinesh reported that RTA_MULTIPATH nexthops are 8-bytes larger with IPv6 than IPv4. The recent refactoring for multipath support in netlink messages does discriminate between non-multipath which needs the OIF and multipath which adds a rtnexthop struct for each hop making the RTA_OIF attribute redundant. Resolve by adding a flag to the info function to skip the oif for multipath. Fixes: beb1afac518d ("net: ipv6: Add support to dump multipath routes via RTA_MULTIPATH attribute") Reported-by: Dinesh Dutt Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 229bfcc451ef..35c58b669ebd 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3299,7 +3299,6 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt) nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + nla_total_size(4) /* RTA_OIF */ + lwtunnel_get_encap_size(rt->dst.lwtstate); nexthop_len *= rt->rt6i_nsiblings; @@ -3323,7 +3322,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt) } static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, - unsigned int *flags) + unsigned int *flags, bool skip_oif) { if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) { *flags |= RTNH_F_LINKDOWN; @@ -3336,7 +3335,8 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, goto nla_put_failure; } - if (rt->dst.dev && + /* not needed for multipath encoding b/c it has a rtnexthop struct */ + if (!skip_oif && rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) goto nla_put_failure; @@ -3350,6 +3350,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, return -EMSGSIZE; } +/* add multipath next hop */ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) { struct rtnexthop *rtnh; @@ -3362,7 +3363,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) rtnh->rtnh_hops = 0; rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; - if (rt6_nexthop_info(skb, rt, &flags) < 0) + if (rt6_nexthop_info(skb, rt, &flags, true) < 0) goto nla_put_failure; rtnh->rtnh_flags = flags; @@ -3515,7 +3516,7 @@ static int rt6_fill_node(struct net *net, nla_nest_end(skb, mp); } else { - if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0) + if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) goto nla_put_failure; } From 67e303e0c7683957eb4e530453705a43a6d4f966 Mon Sep 17 00:00:00 2001 From: VSR Burru Date: Mon, 6 Mar 2017 18:45:59 -0800 Subject: [PATCH 134/384] liquidio: improve UDP TX performance Improve UDP TX performance by: * reducing the ring size from 2K to 512 * replacing the numerous streaming DMA allocations for info buffers and gather lists with one large consistent DMA allocation per ring BQL is not effective here. We reduced the ring size because there is heavy overhead with dma_map_single every so often. With iommu=on, dma_map_single in PF Tx data path was taking longer time (~700usec) for every ~250 packets. Debugged intel_iommu code, and found that PF driver is utilizing too many static IO virtual address mapping entries (for gather list entries and info buffers): about 100K entries for two PF's each using 8 rings. Also, finding an empty entry (in rbtree of device domain's iova mapping in kernel) during Tx path becomes a bottleneck every so often; the loop to find the empty entry goes through over 40K iterations; this is too costly and was the major overhead. Overhead is low when this loop quits quickly. Netperf benchmark numbers before and after patch: PF UDP TX +--------+--------+------------+------------+---------+ | | | Before | After | | | Number | | Patch | Patch | | | of | Packet | Throughput | Throughput | Percent | | Flows | Size | (Gbps) | (Gbps) | Change | +--------+--------+------------+------------+---------+ | | 360 | 0.52 | 0.93 | +78.9 | | 1 | 1024 | 1.62 | 2.84 | +75.3 | | | 1518 | 2.44 | 4.21 | +72.5 | +--------+--------+------------+------------+---------+ | | 360 | 0.45 | 1.59 | +253.3 | | 4 | 1024 | 1.34 | 5.48 | +308.9 | | | 1518 | 2.27 | 8.31 | +266.1 | +--------+--------+------------+------------+---------+ | | 360 | 0.40 | 1.61 | +302.5 | | 8 | 1024 | 1.64 | 4.24 | +158.5 | | | 1518 | 2.87 | 6.52 | +127.2 | +--------+--------+------------+------------+---------+ VF UDP TX +--------+--------+------------+------------+---------+ | | | Before | After | | | Number | | Patch | Patch | | | of | Packet | Throughput | Throughput | Percent | | Flows | Size | (Gbps) | (Gbps) | Change | +--------+--------+------------+------------+---------+ | | 360 | 1.28 | 1.49 | +16.4 | | 1 | 1024 | 4.44 | 4.39 | -1.1 | | | 1518 | 6.08 | 6.51 | +7.1 | +--------+--------+------------+------------+---------+ | | 360 | 2.35 | 2.35 | 0.0 | | 4 | 1024 | 6.41 | 8.07 | +25.9 | | | 1518 | 9.56 | 9.54 | -0.2 | +--------+--------+------------+------------+---------+ | | 360 | 3.41 | 3.65 | +7.0 | | 8 | 1024 | 9.35 | 9.34 | -0.1 | | | 1518 | 9.56 | 9.57 | +0.1 | +--------+--------+------------+------------+---------+ Signed-off-by: VSR Burru Signed-off-by: Felix Manlunas Signed-off-by: Derek Chickles Signed-off-by: Raghu Vatsavayi Signed-off-by: David S. Miller --- .../net/ethernet/cavium/liquidio/lio_main.c | 110 +++++++++--------- .../ethernet/cavium/liquidio/lio_vf_main.c | 104 +++++++++-------- .../ethernet/cavium/liquidio/octeon_config.h | 6 +- .../ethernet/cavium/liquidio/octeon_droq.c | 17 +-- .../ethernet/cavium/liquidio/octeon_droq.h | 4 +- .../ethernet/cavium/liquidio/octeon_main.h | 42 ------- .../ethernet/cavium/liquidio/octeon_network.h | 43 ++++--- 7 files changed, 144 insertions(+), 182 deletions(-) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index be9c0e3f5ade..92f46b1375c3 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -152,7 +152,7 @@ struct octnic_gather { */ struct octeon_sg_entry *sg; - u64 sg_dma_ptr; + dma_addr_t sg_dma_ptr; }; struct handshake { @@ -734,6 +734,9 @@ static void delete_glists(struct lio *lio) struct octnic_gather *g; int i; + kfree(lio->glist_lock); + lio->glist_lock = NULL; + if (!lio->glist) return; @@ -741,23 +744,26 @@ static void delete_glists(struct lio *lio) do { g = (struct octnic_gather *) list_delete_head(&lio->glist[i]); - if (g) { - if (g->sg) { - dma_unmap_single(&lio->oct_dev-> - pci_dev->dev, - g->sg_dma_ptr, - g->sg_size, - DMA_TO_DEVICE); - kfree((void *)((unsigned long)g->sg - - g->adjust)); - } + if (g) kfree(g); - } } while (g); + + if (lio->glists_virt_base && lio->glists_virt_base[i]) { + lio_dma_free(lio->oct_dev, + lio->glist_entry_size * lio->tx_qsize, + lio->glists_virt_base[i], + lio->glists_dma_base[i]); + } } - kfree((void *)lio->glist); - kfree((void *)lio->glist_lock); + kfree(lio->glists_virt_base); + lio->glists_virt_base = NULL; + + kfree(lio->glists_dma_base); + lio->glists_dma_base = NULL; + + kfree(lio->glist); + lio->glist = NULL; } /** @@ -772,13 +778,30 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs) lio->glist_lock = kcalloc(num_iqs, sizeof(*lio->glist_lock), GFP_KERNEL); if (!lio->glist_lock) - return 1; + return -ENOMEM; lio->glist = kcalloc(num_iqs, sizeof(*lio->glist), GFP_KERNEL); if (!lio->glist) { - kfree((void *)lio->glist_lock); - return 1; + kfree(lio->glist_lock); + lio->glist_lock = NULL; + return -ENOMEM; + } + + lio->glist_entry_size = + ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE); + + /* allocate memory to store virtual and dma base address of + * per glist consistent memory + */ + lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base), + GFP_KERNEL); + lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base), + GFP_KERNEL); + + if (!lio->glists_virt_base || !lio->glists_dma_base) { + delete_glists(lio); + return -ENOMEM; } for (i = 0; i < num_iqs; i++) { @@ -788,6 +811,16 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs) INIT_LIST_HEAD(&lio->glist[i]); + lio->glists_virt_base[i] = + lio_dma_alloc(oct, + lio->glist_entry_size * lio->tx_qsize, + &lio->glists_dma_base[i]); + + if (!lio->glists_virt_base[i]) { + delete_glists(lio); + return -ENOMEM; + } + for (j = 0; j < lio->tx_qsize; j++) { g = kzalloc_node(sizeof(*g), GFP_KERNEL, numa_node); @@ -796,43 +829,18 @@ static int setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs) if (!g) break; - g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * - OCT_SG_ENTRY_SIZE); + g->sg = lio->glists_virt_base[i] + + (j * lio->glist_entry_size); - g->sg = kmalloc_node(g->sg_size + 8, - GFP_KERNEL, numa_node); - if (!g->sg) - g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL); - if (!g->sg) { - kfree(g); - break; - } - - /* The gather component should be aligned on 64-bit - * boundary - */ - if (((unsigned long)g->sg) & 7) { - g->adjust = 8 - (((unsigned long)g->sg) & 7); - g->sg = (struct octeon_sg_entry *) - ((unsigned long)g->sg + g->adjust); - } - g->sg_dma_ptr = dma_map_single(&oct->pci_dev->dev, - g->sg, g->sg_size, - DMA_TO_DEVICE); - if (dma_mapping_error(&oct->pci_dev->dev, - g->sg_dma_ptr)) { - kfree((void *)((unsigned long)g->sg - - g->adjust)); - kfree(g); - break; - } + g->sg_dma_ptr = lio->glists_dma_base[i] + + (j * lio->glist_entry_size); list_add_tail(&g->list, &lio->glist[i]); } if (j != lio->tx_qsize) { delete_glists(lio); - return 1; + return -ENOMEM; } } @@ -1885,9 +1893,6 @@ static void free_netsgbuf(void *buf) i++; } - dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev, - g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE); - iq = skb_iq(lio, skb); spin_lock(&lio->glist_lock[iq]); list_add_tail(&g->list, &lio->glist[iq]); @@ -1933,9 +1938,6 @@ static void free_netsgbuf_with_resp(void *buf) i++; } - dma_sync_single_for_cpu(&lio->oct_dev->pci_dev->dev, - g->sg_dma_ptr, g->sg_size, DMA_TO_DEVICE); - iq = skb_iq(lio, skb); spin_lock(&lio->glist_lock[iq]); @@ -3273,8 +3275,6 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) i++; } - dma_sync_single_for_device(&oct->pci_dev->dev, g->sg_dma_ptr, - g->sg_size, DMA_TO_DEVICE); dptr = g->sg_dma_ptr; if (OCTEON_CN23XX_PF(oct)) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 9d5e03502c76..7b83be4ce1fe 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -108,6 +108,8 @@ struct octnic_gather { * received from the IP layer. */ struct octeon_sg_entry *sg; + + dma_addr_t sg_dma_ptr; }; struct octeon_device_priv { @@ -490,6 +492,9 @@ static void delete_glists(struct lio *lio) struct octnic_gather *g; int i; + kfree(lio->glist_lock); + lio->glist_lock = NULL; + if (!lio->glist) return; @@ -497,17 +502,26 @@ static void delete_glists(struct lio *lio) do { g = (struct octnic_gather *) list_delete_head(&lio->glist[i]); - if (g) { - if (g->sg) - kfree((void *)((unsigned long)g->sg - - g->adjust)); + if (g) kfree(g); - } } while (g); + + if (lio->glists_virt_base && lio->glists_virt_base[i]) { + lio_dma_free(lio->oct_dev, + lio->glist_entry_size * lio->tx_qsize, + lio->glists_virt_base[i], + lio->glists_dma_base[i]); + } } + kfree(lio->glists_virt_base); + lio->glists_virt_base = NULL; + + kfree(lio->glists_dma_base); + lio->glists_dma_base = NULL; + kfree(lio->glist); - kfree(lio->glist_lock); + lio->glist = NULL; } /** @@ -522,13 +536,30 @@ static int setup_glists(struct lio *lio, int num_iqs) lio->glist_lock = kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL); if (!lio->glist_lock) - return 1; + return -ENOMEM; lio->glist = kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL); if (!lio->glist) { kfree(lio->glist_lock); - return 1; + lio->glist_lock = NULL; + return -ENOMEM; + } + + lio->glist_entry_size = + ROUNDUP8((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * OCT_SG_ENTRY_SIZE); + + /* allocate memory to store virtual and dma base address of + * per glist consistent memory + */ + lio->glists_virt_base = kcalloc(num_iqs, sizeof(*lio->glists_virt_base), + GFP_KERNEL); + lio->glists_dma_base = kcalloc(num_iqs, sizeof(*lio->glists_dma_base), + GFP_KERNEL); + + if (!lio->glists_virt_base || !lio->glists_dma_base) { + delete_glists(lio); + return -ENOMEM; } for (i = 0; i < num_iqs; i++) { @@ -536,34 +567,33 @@ static int setup_glists(struct lio *lio, int num_iqs) INIT_LIST_HEAD(&lio->glist[i]); + lio->glists_virt_base[i] = + lio_dma_alloc(lio->oct_dev, + lio->glist_entry_size * lio->tx_qsize, + &lio->glists_dma_base[i]); + + if (!lio->glists_virt_base[i]) { + delete_glists(lio); + return -ENOMEM; + } + for (j = 0; j < lio->tx_qsize; j++) { g = kzalloc(sizeof(*g), GFP_KERNEL); if (!g) break; - g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) * - OCT_SG_ENTRY_SIZE); + g->sg = lio->glists_virt_base[i] + + (j * lio->glist_entry_size); - g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL); - if (!g->sg) { - kfree(g); - break; - } + g->sg_dma_ptr = lio->glists_dma_base[i] + + (j * lio->glist_entry_size); - /* The gather component should be aligned on 64-bit - * boundary - */ - if (((unsigned long)g->sg) & 7) { - g->adjust = 8 - (((unsigned long)g->sg) & 7); - g->sg = (struct octeon_sg_entry *) - ((unsigned long)g->sg + g->adjust); - } list_add_tail(&g->list, &lio->glist[i]); } if (j != lio->tx_qsize) { delete_glists(lio); - return 1; + return -ENOMEM; } } @@ -1324,10 +1354,6 @@ static void free_netsgbuf(void *buf) i++; } - dma_unmap_single(&lio->oct_dev->pci_dev->dev, - finfo->dptr, g->sg_size, - DMA_TO_DEVICE); - iq = skb_iq(lio, skb); spin_lock(&lio->glist_lock[iq]); @@ -1374,10 +1400,6 @@ static void free_netsgbuf_with_resp(void *buf) i++; } - dma_unmap_single(&lio->oct_dev->pci_dev->dev, - finfo->dptr, g->sg_size, - DMA_TO_DEVICE); - iq = skb_iq(lio, skb); spin_lock(&lio->glist_lock[iq]); @@ -2382,23 +2404,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) i++; } - dptr = dma_map_single(&oct->pci_dev->dev, - g->sg, g->sg_size, - DMA_TO_DEVICE); - if (dma_mapping_error(&oct->pci_dev->dev, dptr)) { - dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n", - __func__); - dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0], - skb->len - skb->data_len, - DMA_TO_DEVICE); - for (j = 1; j <= frags; j++) { - frag = &skb_shinfo(skb)->frags[j - 1]; - dma_unmap_page(&oct->pci_dev->dev, - g->sg[j >> 2].ptr[j & 3], - frag->size, DMA_TO_DEVICE); - } - return NETDEV_TX_BUSY; - } + dptr = g->sg_dma_ptr; ndata.cmd.cmd3.dptr = dptr; finfo->dptr = dptr; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h index b3dc2e9651a8..d29ebc531151 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h @@ -71,17 +71,17 @@ #define CN23XX_MAX_RINGS_PER_VF 8 #define CN23XX_MAX_INPUT_QUEUES CN23XX_MAX_RINGS_PER_PF -#define CN23XX_MAX_IQ_DESCRIPTORS 2048 +#define CN23XX_MAX_IQ_DESCRIPTORS 512 #define CN23XX_DB_MIN 1 #define CN23XX_DB_MAX 8 #define CN23XX_DB_TIMEOUT 1 #define CN23XX_MAX_OUTPUT_QUEUES CN23XX_MAX_RINGS_PER_PF -#define CN23XX_MAX_OQ_DESCRIPTORS 2048 +#define CN23XX_MAX_OQ_DESCRIPTORS 512 #define CN23XX_OQ_BUF_SIZE 1536 #define CN23XX_OQ_PKTSPER_INTR 128 /*#define CAVIUM_ONLY_CN23XX_RX_PERF*/ -#define CN23XX_OQ_REFIL_THRESHOLD 128 +#define CN23XX_OQ_REFIL_THRESHOLD 16 #define CN23XX_OQ_INTR_PKT 64 #define CN23XX_OQ_INTR_TIME 100 diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 0be87d119a97..79f809479af6 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -155,11 +155,6 @@ octeon_droq_destroy_ring_buffers(struct octeon_device *oct, recv_buffer_destroy(droq->recv_buf_list[i].buffer, pg_info); - if (droq->desc_ring && droq->desc_ring[i].info_ptr) - lio_unmap_ring_info(oct->pci_dev, - (u64)droq-> - desc_ring[i].info_ptr, - OCT_DROQ_INFO_SIZE); droq->recv_buf_list[i].buffer = NULL; } @@ -211,10 +206,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no) vfree(droq->recv_buf_list); if (droq->info_base_addr) - cnnic_free_aligned_dma(oct->pci_dev, droq->info_list, - droq->info_alloc_size, - droq->info_base_addr, - droq->info_list_dma); + lio_free_info_buffer(oct, droq); if (droq->desc_ring) lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE), @@ -294,12 +286,7 @@ int octeon_init_droq(struct octeon_device *oct, dev_dbg(&oct->pci_dev->dev, "droq[%d]: num_desc: %d\n", q_no, droq->max_count); - droq->info_list = - cnnic_numa_alloc_aligned_dma((droq->max_count * - OCT_DROQ_INFO_SIZE), - &droq->info_alloc_size, - &droq->info_base_addr, - numa_node); + droq->info_list = lio_alloc_info_buffer(oct, droq); if (!droq->info_list) { dev_err(&oct->pci_dev->dev, "Cannot allocate memory for info list.\n"); lio_dma_free(oct, (droq->max_count * OCT_DROQ_DESC_SIZE), diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index e62074090681..6982c0af5ecc 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h @@ -325,10 +325,10 @@ struct octeon_droq { size_t desc_ring_dma; /** Info ptr list are allocated at this virtual address. */ - size_t info_base_addr; + void *info_base_addr; /** DMA mapped address of the info list */ - size_t info_list_dma; + dma_addr_t info_list_dma; /** Allocated size of info list. */ u32 info_alloc_size; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index aa36e9ae7676..bed9ef17bc26 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -140,48 +140,6 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct, return 1; } -static inline void * -cnnic_numa_alloc_aligned_dma(u32 size, - u32 *alloc_size, - size_t *orig_ptr, - int numa_node) -{ - int retries = 0; - void *ptr = NULL; - -#define OCTEON_MAX_ALLOC_RETRIES 1 - do { - struct page *page = NULL; - - page = alloc_pages_node(numa_node, - GFP_KERNEL, - get_order(size)); - if (!page) - page = alloc_pages(GFP_KERNEL, - get_order(size)); - ptr = (void *)page_address(page); - if ((unsigned long)ptr & 0x07) { - __free_pages(page, get_order(size)); - ptr = NULL; - /* Increment the size required if the first - * attempt failed. - */ - if (!retries) - size += 7; - } - retries++; - } while ((retries <= OCTEON_MAX_ALLOC_RETRIES) && !ptr); - - *alloc_size = size; - *orig_ptr = (unsigned long)ptr; - if ((unsigned long)ptr & 0x07) - ptr = (void *)(((unsigned long)ptr + 7) & ~(7UL)); - return ptr; -} - -#define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \ - free_pages(orig_ptr, get_order(size)) - static inline int sleep_cond(wait_queue_head_t *wait_queue, int *condition) { diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index 6bb89419006e..eef2a1e8a7e3 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -62,6 +62,9 @@ struct lio { /** Array of gather component linked lists */ struct list_head *glist; + void **glists_virt_base; + dma_addr_t *glists_dma_base; + u32 glist_entry_size; /** Pointer to the NIC properties for the Octeon device this network * interface is associated with. @@ -344,6 +347,29 @@ static inline void tx_buffer_free(void *buffer) #define lio_dma_free(oct, size, virt_addr, dma_addr) \ dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr) +static inline void * +lio_alloc_info_buffer(struct octeon_device *oct, + struct octeon_droq *droq) +{ + void *virt_ptr; + + virt_ptr = lio_dma_alloc(oct, (droq->max_count * OCT_DROQ_INFO_SIZE), + &droq->info_list_dma); + if (virt_ptr) { + droq->info_alloc_size = droq->max_count * OCT_DROQ_INFO_SIZE; + droq->info_base_addr = virt_ptr; + } + + return virt_ptr; +} + +static inline void lio_free_info_buffer(struct octeon_device *oct, + struct octeon_droq *droq) +{ + lio_dma_free(oct, droq->info_alloc_size, droq->info_base_addr, + droq->info_list_dma); +} + static inline void *get_rbd(struct sk_buff *skb) { @@ -359,22 +385,7 @@ void *get_rbd(struct sk_buff *skb) static inline u64 lio_map_ring_info(struct octeon_droq *droq, u32 i) { - dma_addr_t dma_addr; - struct octeon_device *oct = droq->oct_dev; - - dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i], - OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE); - - WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr)); - - return (u64)dma_addr; -} - -static inline void -lio_unmap_ring_info(struct pci_dev *pci_dev, - u64 info_ptr, u32 size) -{ - dma_unmap_single(&pci_dev->dev, info_ptr, size, DMA_FROM_DEVICE); + return droq->info_list_dma + (i * sizeof(struct octeon_droq_info)); } static inline u64 From 3b12f73a5c2977153f28a224392fd4729b50d1dc Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 7 Mar 2017 02:48:36 -0500 Subject: [PATCH 135/384] rds: ib: add error handle In the function rds_ib_setup_qp, the error handle is missing. When some error occurs, it is possible that memory leak occurs. As such, error handle is added. Cc: Joe Jin Reviewed-by: Junxiao Bi Reviewed-by: Guanglei Li Signed-off-by: Zhu Yanjun Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_cm.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index ce3775abc6e7..1c38d2c7caa8 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -442,7 +442,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ic->i_send_cq = NULL; ibdev_put_vector(rds_ibdev, ic->i_scq_vector); rdsdebug("ib_create_cq send failed: %d\n", ret); - goto out; + goto rds_ibdev_out; } ic->i_rcq_vector = ibdev_get_unused_vector(rds_ibdev); @@ -456,19 +456,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ic->i_recv_cq = NULL; ibdev_put_vector(rds_ibdev, ic->i_rcq_vector); rdsdebug("ib_create_cq recv failed: %d\n", ret); - goto out; + goto send_cq_out; } ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); if (ret) { rdsdebug("ib_req_notify_cq send failed: %d\n", ret); - goto out; + goto recv_cq_out; } ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); if (ret) { rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); - goto out; + goto recv_cq_out; } /* XXX negotiate max send/recv with remote? */ @@ -494,7 +494,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); if (ret) { rdsdebug("rdma_create_qp failed: %d\n", ret); - goto out; + goto recv_cq_out; } ic->i_send_hdrs = ib_dma_alloc_coherent(dev, @@ -504,7 +504,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent send failed\n"); - goto out; + goto qp_out; } ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, @@ -514,7 +514,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent recv failed\n"); - goto out; + goto send_hdrs_dma_out; } ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), @@ -522,7 +522,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("ib_dma_alloc_coherent ack failed\n"); - goto out; + goto recv_hdrs_dma_out; } ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), @@ -530,7 +530,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_sends) { ret = -ENOMEM; rdsdebug("send allocation failed\n"); - goto out; + goto ack_dma_out; } ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), @@ -538,7 +538,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) if (!ic->i_recvs) { ret = -ENOMEM; rdsdebug("recv allocation failed\n"); - goto out; + goto sends_out; } rds_ib_recv_init_ack(ic); @@ -546,8 +546,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn) rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); -out: + return ret; + +sends_out: + vfree(ic->i_sends); +ack_dma_out: + ib_dma_free_coherent(dev, sizeof(struct rds_header), + ic->i_ack, ic->i_ack_dma); +recv_hdrs_dma_out: + ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr * + sizeof(struct rds_header), + ic->i_recv_hdrs, ic->i_recv_hdrs_dma); +send_hdrs_dma_out: + ib_dma_free_coherent(dev, ic->i_send_ring.w_nr * + sizeof(struct rds_header), + ic->i_send_hdrs, ic->i_send_hdrs_dma); +qp_out: + rdma_destroy_qp(ic->i_cm_id); +recv_cq_out: + if (!ib_destroy_cq(ic->i_recv_cq)) + ic->i_recv_cq = NULL; +send_cq_out: + if (!ib_destroy_cq(ic->i_send_cq)) + ic->i_send_cq = NULL; +rds_ibdev_out: + rds_ib_remove_conn(rds_ibdev, conn); rds_ib_dev_put(rds_ibdev); + return ret; } From 83abb7d7c91f4ac20e47c3089a10bb93b2ea8994 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 7 Mar 2017 18:09:08 +0530 Subject: [PATCH 136/384] net: thunderx: Fix IOMMU translation faults ACPI support has been added to ARM IOMMU driver in 4.10 kernel and that has resulted in VNIC interfaces throwing translation faults when kernel is booted with ACPI as driver was not using DMA API. This patch fixes the issue by using DMA API which inturn will create translation tables when IOMMU is enabled. Also VNIC doesn't have a seperate receive buffer ring per receive queue, so there is no 1:1 descriptor index matching between CQE_RX and the index in buffer ring from where a buffer has been used for DMA'ing. Unlike other NICs, here it's not possible to maintain dma address to virt address mappings within the driver. This leaves us no other choice but to use IOMMU's IOVA address conversion API to get buffer's virtual address which can be given to network stack for processing. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 1 + .../net/ethernet/cavium/thunder/nicvf_main.c | 12 +- .../ethernet/cavium/thunder/nicvf_queues.c | 178 ++++++++++++++---- .../ethernet/cavium/thunder/nicvf_queues.h | 4 +- 4 files changed, 156 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index e739c7153562..2269ff562d95 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -269,6 +269,7 @@ struct nicvf { #define MAX_QUEUES_PER_QSET 8 struct queue_set *qs; struct nicvf_cq_poll *napi[8]; + void *iommu_domain; u8 vf_id; u8 sqs_id; bool sqs_mode; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 6feaa24bcfd4..24017588f531 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "nic_reg.h" #include "nic.h" @@ -525,7 +526,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, /* Get actual TSO descriptors and free them */ tso_sqe = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); + nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, + tso_sqe->subdesc_cnt); nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1); + } else { + nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, + hdr->subdesc_cnt); } nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); prefetch(skb); @@ -576,6 +582,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, { struct sk_buff *skb; struct nicvf *nic = netdev_priv(netdev); + struct nicvf *snic = nic; int err = 0; int rq_idx; @@ -592,7 +599,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, if (err && !cqe_rx->rb_cnt) return; - skb = nicvf_get_rcv_skb(nic, cqe_rx); + skb = nicvf_get_rcv_skb(snic, cqe_rx); if (!skb) { netdev_dbg(nic->netdev, "Packet not received\n"); return; @@ -1643,6 +1650,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!pass1_silicon(nic->pdev)) nic->hw_tso = true; + /* Get iommu domain for iova to physical addr conversion */ + nic->iommu_domain = iommu_get_domain_for_dev(dev); + pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); if (sdevid == 0xA134) nic->t88 = true; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index ac0390be3b12..b1962dbd0409 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -18,6 +19,16 @@ #include "q_struct.h" #include "nicvf_queues.h" +#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0) + +static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) +{ + /* Translation is installed only when IOMMU is present */ + if (nic->iommu_domain) + return iommu_iova_to_phys(nic->iommu_domain, dma_addr); + return dma_addr; +} + static void nicvf_get_page(struct nicvf *nic) { if (!nic->rb_pageref || !nic->rb_page) @@ -87,7 +98,7 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, u32 buf_len, u64 **rbuf) { - int order = (PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0; + int order = NICVF_PAGE_ORDER; /* Check if request can be accomodated in previous allocated page */ if (nic->rb_page && @@ -97,22 +108,27 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, } nicvf_get_page(nic); - nic->rb_page = NULL; /* Allocate a new page */ + nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, + order); if (!nic->rb_page) { - nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, - order); - if (!nic->rb_page) { - this_cpu_inc(nic->pnicvf->drv_stats-> - rcv_buffer_alloc_failures); - return -ENOMEM; - } - nic->rb_page_offset = 0; + this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures); + return -ENOMEM; } - + nic->rb_page_offset = 0; ret: - *rbuf = (u64 *)((u64)page_address(nic->rb_page) + nic->rb_page_offset); + /* HW will ensure data coherency, CPU sync not required */ + *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, + nic->rb_page_offset, buf_len, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC)); + if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { + if (!nic->rb_page_offset) + __free_pages(nic->rb_page, order); + nic->rb_page = NULL; + return -ENOMEM; + } nic->rb_page_offset += buf_len; return 0; @@ -158,16 +174,21 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, rbdr->dma_size = buf_size; rbdr->enable = true; rbdr->thresh = RBDR_THRESH; + rbdr->head = 0; + rbdr->tail = 0; nic->rb_page = NULL; for (idx = 0; idx < ring_len; idx++) { err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN, &rbuf); - if (err) + if (err) { + /* To free already allocated and mapped ones */ + rbdr->tail = idx - 1; return err; + } desc = GET_RBDR_DESC(rbdr, idx); - desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; + desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; } nicvf_get_page(nic); @@ -179,7 +200,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) { int head, tail; - u64 buf_addr; + u64 buf_addr, phys_addr; struct rbdr_entry_t *desc; if (!rbdr) @@ -192,18 +213,26 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) head = rbdr->head; tail = rbdr->tail; - /* Free SKBs */ + /* Release page references */ while (head != tail) { desc = GET_RBDR_DESC(rbdr, head); - buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; - put_page(virt_to_page(phys_to_virt(buf_addr))); + buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; + phys_addr = nicvf_iova_to_phys(nic, buf_addr); + dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (phys_addr) + put_page(virt_to_page(phys_to_virt(phys_addr))); head++; head &= (rbdr->dmem.q_len - 1); } - /* Free SKB of tail desc */ + /* Release buffer of tail desc */ desc = GET_RBDR_DESC(rbdr, tail); - buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; - put_page(virt_to_page(phys_to_virt(buf_addr))); + buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; + phys_addr = nicvf_iova_to_phys(nic, buf_addr); + dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (phys_addr) + put_page(virt_to_page(phys_to_virt(phys_addr))); /* Free RBDR ring */ nicvf_free_q_desc_mem(nic, &rbdr->dmem); @@ -250,7 +279,7 @@ static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp) break; desc = GET_RBDR_DESC(rbdr, tail); - desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; + desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; refill_rb_cnt--; new_rb++; } @@ -361,9 +390,29 @@ static int nicvf_init_snd_queue(struct nicvf *nic, return 0; } +void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, + int hdr_sqe, u8 subdesc_cnt) +{ + u8 idx; + struct sq_gather_subdesc *gather; + + /* Unmap DMA mapped skb data buffers */ + for (idx = 0; idx < subdesc_cnt; idx++) { + hdr_sqe++; + hdr_sqe &= (sq->dmem.q_len - 1); + gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, hdr_sqe); + /* HW will ensure data coherency, CPU sync not required */ + dma_unmap_page_attrs(&nic->pdev->dev, gather->addr, + gather->size, DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } +} + static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) { struct sk_buff *skb; + struct sq_hdr_subdesc *hdr; + struct sq_hdr_subdesc *tso_sqe; if (!sq) return; @@ -379,8 +428,22 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) smp_rmb(); while (sq->head != sq->tail) { skb = (struct sk_buff *)sq->skbuff[sq->head]; - if (skb) - dev_kfree_skb_any(skb); + if (!skb) + goto next; + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); + /* Check for dummy descriptor used for HW TSO offload on 88xx */ + if (hdr->dont_send) { + /* Get actual TSO descriptors and unmap them */ + tso_sqe = + (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); + nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, + tso_sqe->subdesc_cnt); + } else { + nicvf_unmap_sndq_buffers(nic, sq, sq->head, + hdr->subdesc_cnt); + } + dev_kfree_skb_any(skb); +next: sq->head++; sq->head &= (sq->dmem.q_len - 1); } @@ -882,6 +945,14 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt) return qentry; } +/* Rollback to previous tail pointer when descriptors not used */ +static inline void nicvf_rollback_sq_desc(struct snd_queue *sq, + int qentry, int desc_cnt) +{ + sq->tail = qentry; + atomic_add(desc_cnt, &sq->free_cnt); +} + /* Free descriptor back to SQ for future use */ void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt) { @@ -1207,8 +1278,9 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, struct sk_buff *skb, u8 sq_num) { int i, size; - int subdesc_cnt, tso_sqe = 0; + int subdesc_cnt, hdr_sqe = 0; int qentry; + u64 dma_addr; subdesc_cnt = nicvf_sq_subdesc_required(nic, skb); if (subdesc_cnt > atomic_read(&sq->free_cnt)) @@ -1223,12 +1295,21 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, /* Add SQ header subdesc */ nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1, skb, skb->len); - tso_sqe = qentry; + hdr_sqe = qentry; /* Add SQ gather subdescs */ qentry = nicvf_get_nxt_sqentry(sq, qentry); size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len; - nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data)); + /* HW will ensure data coherency, CPU sync not required */ + dma_addr = dma_map_page_attrs(&nic->pdev->dev, virt_to_page(skb->data), + offset_in_page(skb->data), size, + DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { + nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt); + return 0; + } + + nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr); /* Check for scattered buffer */ if (!skb_is_nonlinear(skb)) @@ -1241,15 +1322,26 @@ int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, qentry = nicvf_get_nxt_sqentry(sq, qentry); size = skb_frag_size(frag); - nicvf_sq_add_gather_subdesc(sq, qentry, size, - virt_to_phys( - skb_frag_address(frag))); + dma_addr = dma_map_page_attrs(&nic->pdev->dev, + skb_frag_page(frag), + frag->page_offset, size, + DMA_TO_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&nic->pdev->dev, dma_addr)) { + /* Free entire chain of mapped buffers + * here 'i' = frags mapped + above mapped skb->data + */ + nicvf_unmap_sndq_buffers(nic, sq, hdr_sqe, i); + nicvf_rollback_sq_desc(sq, qentry, subdesc_cnt); + return 0; + } + nicvf_sq_add_gather_subdesc(sq, qentry, size, dma_addr); } doorbell: if (nic->t88 && skb_shinfo(skb)->gso_size) { qentry = nicvf_get_nxt_sqentry(sq, qentry); - nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb); + nicvf_sq_add_cqe_subdesc(sq, qentry, hdr_sqe, skb); } nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt); @@ -1282,6 +1374,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) int offset; u16 *rb_lens = NULL; u64 *rb_ptrs = NULL; + u64 phys_addr; rb_lens = (void *)cqe_rx + (3 * sizeof(u64)); /* Except 88xx pass1 on all other chips CQE_RX2_S is added to @@ -1296,15 +1389,23 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) else rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64)); - netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n", - __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz); - for (frag = 0; frag < cqe_rx->rb_cnt; frag++) { payload_len = rb_lens[frag_num(frag)]; + phys_addr = nicvf_iova_to_phys(nic, *rb_ptrs); + if (!phys_addr) { + if (skb) + dev_kfree_skb_any(skb); + return NULL; + } + if (!frag) { /* First fragment */ + dma_unmap_page_attrs(&nic->pdev->dev, + *rb_ptrs - cqe_rx->align_pad, + RCV_FRAG_LEN, DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); skb = nicvf_rb_ptr_to_skb(nic, - *rb_ptrs - cqe_rx->align_pad, + phys_addr - cqe_rx->align_pad, payload_len); if (!skb) return NULL; @@ -1312,8 +1413,11 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) skb_put(skb, payload_len); } else { /* Add fragments */ - page = virt_to_page(phys_to_virt(*rb_ptrs)); - offset = phys_to_virt(*rb_ptrs) - page_address(page); + dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs, + RCV_FRAG_LEN, DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + page = virt_to_page(phys_to_virt(phys_addr)); + offset = phys_to_virt(phys_addr) - page_address(page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset, payload_len, RCV_FRAG_LEN); } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 5cb84da99a2d..10cb4b84625b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -87,7 +87,7 @@ #define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13)) #define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13)) #define RBDR_THRESH (RCV_BUF_COUNT / 2) -#define DMA_BUFFER_LEN 2048 /* In multiples of 128bytes */ +#define DMA_BUFFER_LEN 1536 /* In multiples of 128bytes */ #define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) @@ -301,6 +301,8 @@ struct queue_set { #define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT) +void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, + int hdr_sqe, u8 subdesc_cnt); void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features); int nicvf_set_qset_resources(struct nicvf *nic); From 18de7ba95f6e5ab150e482618123d92ee2240dc0 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 7 Mar 2017 18:09:09 +0530 Subject: [PATCH 137/384] net: thunderx: Fix LMAC mode debug prints for QSGMII mode When BGX/LMACs are in QSGMII mode, for some LMACs, mode info is not being printed. This patch will fix that. With changes already done to not do any sort of serdes 2 lane mapping config calculation in kernel driver, we can get rid of this logic. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 4c8e8cf730bb..9b8a53e138e7 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1011,12 +1011,6 @@ static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) dev_info(dev, "%s: 40G_KR4\n", (char *)str); break; case BGX_MODE_QSGMII: - if ((lmacid == 0) && - (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid)) - return; - if ((lmacid == 2) && - (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid)) - return; dev_info(dev, "%s: QSGMII\n", (char *)str); break; case BGX_MODE_RGMII: From 78aacb6f6eeea3c581a29b4a50438d0bdf85ad0b Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 7 Mar 2017 18:09:10 +0530 Subject: [PATCH 138/384] net: thunderx: Fix invalid mac addresses for node1 interfaces When booted with ACPI, random mac addresses are being assigned to node1 interfaces due to mismatch of bgx_id in BGX driver and ACPI tables. This patch fixes this issue by setting maximum BGX devices per node based on platform/soc instead of a macro. This change will set the bgx_id appropriately. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/cavium/thunder/thunder_bgx.c | 58 ++++++++++++++----- .../net/ethernet/cavium/thunder/thunder_bgx.h | 1 - 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 9b8a53e138e7..64a1095e4d14 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -123,14 +123,44 @@ static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero) return 1; } +static int max_bgx_per_node; +static void set_max_bgx_per_node(struct pci_dev *pdev) +{ + u16 sdevid; + + if (max_bgx_per_node) + return; + + pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); + switch (sdevid) { + case PCI_SUBSYS_DEVID_81XX_BGX: + max_bgx_per_node = MAX_BGX_PER_CN81XX; + break; + case PCI_SUBSYS_DEVID_83XX_BGX: + max_bgx_per_node = MAX_BGX_PER_CN83XX; + break; + case PCI_SUBSYS_DEVID_88XX_BGX: + default: + max_bgx_per_node = MAX_BGX_PER_CN88XX; + break; + } +} + +static struct bgx *get_bgx(int node, int bgx_idx) +{ + int idx = (node * max_bgx_per_node) + bgx_idx; + + return bgx_vnic[idx]; +} + /* Return number of BGX present in HW */ unsigned bgx_get_map(int node) { int i; unsigned map = 0; - for (i = 0; i < MAX_BGX_PER_NODE; i++) { - if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i]) + for (i = 0; i < max_bgx_per_node; i++) { + if (bgx_vnic[(node * max_bgx_per_node) + i]) map |= (1 << i); } @@ -143,7 +173,7 @@ int bgx_get_lmac_count(int node, int bgx_idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + bgx = get_bgx(node, bgx_idx); if (bgx) return bgx->lmac_count; @@ -158,7 +188,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status) struct bgx *bgx; struct lmac *lmac; - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + bgx = get_bgx(node, bgx_idx); if (!bgx) return; @@ -172,7 +202,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state); const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + struct bgx *bgx = get_bgx(node, bgx_idx); if (bgx) return bgx->lmac[lmacid].mac; @@ -183,7 +213,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac); void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + struct bgx *bgx = get_bgx(node, bgx_idx); if (!bgx) return; @@ -194,7 +224,7 @@ EXPORT_SYMBOL(bgx_set_lmac_mac); void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + struct bgx *bgx = get_bgx(node, bgx_idx); struct lmac *lmac; u64 cfg; @@ -217,7 +247,7 @@ EXPORT_SYMBOL(bgx_lmac_rx_tx_enable); void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause) { struct pfc *pfc = (struct pfc *)pause; - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct bgx *bgx = get_bgx(node, bgx_idx); struct lmac *lmac; u64 cfg; @@ -237,7 +267,7 @@ EXPORT_SYMBOL(bgx_lmac_get_pfc); void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause) { struct pfc *pfc = (struct pfc *)pause; - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct bgx *bgx = get_bgx(node, bgx_idx); struct lmac *lmac; u64 cfg; @@ -369,7 +399,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + bgx = get_bgx(node, bgx_idx); if (!bgx) return 0; @@ -383,7 +413,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + bgx = get_bgx(node, bgx_idx); if (!bgx) return 0; @@ -411,7 +441,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx, struct lmac *lmac; u64 cfg; - bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + bgx = get_bgx(node, bgx_idx); if (!bgx) return; @@ -1328,11 +1358,13 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_release_regions; } + set_max_bgx_per_node(pdev); + pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid); if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) { bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK; - bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE; + bgx->bgx_id += nic_get_node_id(pdev) * max_bgx_per_node; bgx->max_lmac = MAX_LMAC_PER_BGX; bgx_vnic[bgx->bgx_id] = bgx; } else { diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index a60f189429bb..c5080f2cead5 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -22,7 +22,6 @@ #define MAX_BGX_PER_CN88XX 2 #define MAX_BGX_PER_CN81XX 3 /* 2 BGXs + 1 RGX */ #define MAX_BGX_PER_CN83XX 4 -#define MAX_BGX_PER_NODE 4 #define MAX_LMAC_PER_BGX 4 #define MAX_BGX_CHANS_PER_LMAC 16 #define MAX_DMAC_PER_LMAC 8 From 36fa35d22bffc78c85b5e68adbdd99e914bec764 Mon Sep 17 00:00:00 2001 From: Thanneeru Srinivasulu Date: Tue, 7 Mar 2017 18:09:11 +0530 Subject: [PATCH 139/384] net: thunderx: Allow IPv6 frames with zero UDP checksum Do not consider IPv6 frames with zero UDP checksum as frames with bad checksum and drop them. Signed-off-by: Thanneeru Srinivasulu Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index b1962dbd0409..f13289f0d238 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -622,9 +622,11 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, nicvf_send_msg_to_pf(nic, &mbx); if (!nic->sqs_mode && (qidx == 0)) { - /* Enable checking L3/L4 length and TCP/UDP checksums */ + /* Enable checking L3/L4 length and TCP/UDP checksums + * Also allow IPv6 pkts with zero UDP checksum. + */ nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, - (BIT(24) | BIT(23) | BIT(21))); + (BIT(24) | BIT(23) | BIT(21) | BIT(20))); nicvf_config_vlan_stripping(nic, nic->netdev->features); } From 8aad6f14c09d78862fc04e47214d398add9bb8ce Mon Sep 17 00:00:00 2001 From: "robert.foss@collabora.com" Date: Tue, 7 Mar 2017 11:46:25 -0500 Subject: [PATCH 140/384] qed: Fix copy of uninitialized memory In qed_ll2_start_ooo() the ll2_info variable is uninitialized and then passed to qed_ll2_acquire_connection() where it is copied into a new memory space. This shouldn't cause any issue as long as non of the copied memory is every read. But the potential for a bug being introduced by reading this memory is real. Detected by CoverityScan, CID#1399632 ("Uninitialized scalar variable") Signed-off-by: Robert Foss Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 9a0b9af10a57..5fb34db377c8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -968,7 +968,7 @@ static int qed_ll2_start_ooo(struct qed_dev *cdev, { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id; - struct qed_ll2_conn ll2_info; + struct qed_ll2_conn ll2_info = { 0 }; int rc; ll2_info.conn_type = QED_LL2_TYPE_ISCSI_OOO; From 294acf1c01bace5cea5d30b510504238bf5f7c25 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 7 Mar 2017 18:33:31 +0100 Subject: [PATCH 141/384] net/tunnel: set inner protocol in network gro hooks The gso code of several tunnels type (gre and udp tunnels) takes for granted that the skb->inner_protocol is properly initialized and drops the packet elsewhere. On the forwarding path no one is initializing such field, so gro encapsulated packets are dropped on forward. Since commit 38720352412a ("gre: Use inner_proto to obtain inner header protocol"), this can be reproduced when the encapsulated packets use gre as the tunneling protocol. The issue happens also with vxlan and geneve tunnels since commit 8bce6d7d0d1e ("udp: Generalize skb_udp_segment"), if the forwarding host's ingress nic has h/w offload for such tunnel and a vxlan/geneve device is configured on top of it, regardless of the configured peer address and vni. To address the issue, this change initialize the inner_protocol field for encapsulated packets in both ipv4 and ipv6 gro complete callbacks. Fixes: 38720352412a ("gre: Use inner_proto to obtain inner header protocol") Fixes: 8bce6d7d0d1e ("udp: Generalize skb_udp_segment") Signed-off-by: Paolo Abeni Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 4 +++- net/ipv6/ip6_offload.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 602d40f43687..5091f46826fa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1487,8 +1487,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff) int proto = iph->protocol; int err = -ENOSYS; - if (skb->encapsulation) + if (skb->encapsulation) { + skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP)); skb_set_inner_network_header(skb, nhoff); + } csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 0838e6d01d2e..93e58a5e1837 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -294,8 +294,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); int err = -ENOSYS; - if (skb->encapsulation) + if (skb->encapsulation) { + skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6)); skb_set_inner_network_header(skb, nhoff); + } iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); From 745cb7f8a5de0805cade3de3991b7a95317c7c73 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 7 Mar 2017 23:50:50 +0300 Subject: [PATCH 142/384] uapi: fix linux/packet_diag.h userspace compilation error Replace MAX_ADDR_LEN with its numeric value to fix the following linux/packet_diag.h userspace compilation error: /usr/include/linux/packet_diag.h:67:17: error: 'MAX_ADDR_LEN' undeclared here (not in a function) __u8 pdmc_addr[MAX_ADDR_LEN]; This is not the first case in the UAPI where the numeric value of MAX_ADDR_LEN is used instead of symbolic one, uapi/linux/if_link.h already does the same: $ grep MAX_ADDR_LEN include/uapi/linux/if_link.h __u8 mac[32]; /* MAX_ADDR_LEN */ There are no UAPI headers besides these two that use MAX_ADDR_LEN. Signed-off-by: Dmitry V. Levin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/uapi/linux/packet_diag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h index d08c63f3dd6f..0c5d5dd61b6a 100644 --- a/include/uapi/linux/packet_diag.h +++ b/include/uapi/linux/packet_diag.h @@ -64,7 +64,7 @@ struct packet_diag_mclist { __u32 pdmc_count; __u16 pdmc_type; __u16 pdmc_alen; - __u8 pdmc_addr[MAX_ADDR_LEN]; + __u8 pdmc_addr[32]; /* MAX_ADDR_LEN */ }; struct packet_diag_ring { From 9f691549f76d488a0c74397b3e51e943865ea01f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 7 Mar 2017 20:00:12 -0800 Subject: [PATCH 143/384] bpf: fix struct htab_elem layout when htab_elem is removed from the bucket list the htab_elem.hash_node.next field should not be overridden too early otherwise we have a tiny race window between lookup and delete. The bug was discovered by manual code analysis and reproducible only with explicit udelay() in lookup_elem_raw(). Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Reported-by: Jonathan Perry Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/hashtab.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3ea87fb19a94..63c86a7be2a1 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -45,8 +45,13 @@ enum extra_elem_state { struct htab_elem { union { struct hlist_node hash_node; - struct bpf_htab *htab; - struct pcpu_freelist_node fnode; + struct { + void *padding; + union { + struct bpf_htab *htab; + struct pcpu_freelist_node fnode; + }; + }; }; union { struct rcu_head rcu; @@ -162,7 +167,8 @@ static int prealloc_init(struct bpf_htab *htab) offsetof(struct htab_elem, lru_node), htab->elem_size, htab->map.max_entries); else - pcpu_freelist_populate(&htab->freelist, htab->elems, + pcpu_freelist_populate(&htab->freelist, + htab->elems + offsetof(struct htab_elem, fnode), htab->elem_size, htab->map.max_entries); return 0; @@ -217,6 +223,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) int err, i; u64 cost; + BUILD_BUG_ON(offsetof(struct htab_elem, htab) != + offsetof(struct htab_elem, hash_node.pprev)); + BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != + offsetof(struct htab_elem, hash_node.pprev)); + if (lru && !capable(CAP_SYS_ADMIN)) /* LRU implementation is much complicated than other * maps. Hence, limit to CAP_SYS_ADMIN for now. @@ -582,9 +593,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, int err = 0; if (prealloc) { - l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); - if (!l_new) + struct pcpu_freelist_node *l; + + l = pcpu_freelist_pop(&htab->freelist); + if (!l) err = -E2BIG; + else + l_new = container_of(l, struct htab_elem, fnode); } else { if (atomic_inc_return(&htab->count) > htab->map.max_entries) { atomic_dec(&htab->count); From 4fe8435909fddc97b81472026aa954e06dd192a5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 7 Mar 2017 20:00:13 -0800 Subject: [PATCH 144/384] bpf: convert htab map to hlist_nulls when all map elements are pre-allocated one cpu can delete and reuse htab_elem while another cpu is still walking the hlist. In such case the lookup may miss the element. Convert hlist to hlist_nulls to avoid such scenario. When bucket lock is taken there is no need to take such precautions, so only convert map_lookup and map_get_next to nulls. The race window is extremely small and only reproducible with explicit udelay() inside lookup_nulls_elem_raw() Similar to hlist add hlist_nulls_for_each_entry_safe() and hlist_nulls_entry_safe() helpers. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Reported-by: Jonathan Perry Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/list_nulls.h | 5 ++ include/linux/rculist_nulls.h | 14 ++++++ kernel/bpf/hashtab.c | 94 ++++++++++++++++++++++------------- 3 files changed, 79 insertions(+), 34 deletions(-) diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index b01fe1009084..87ff4f58a2f0 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -29,6 +29,11 @@ struct hlist_nulls_node { ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_nulls_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \ + }) /** * ptr_is_a_nulls - Test if a ptr is a nulls * @ptr: ptr to be tested diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 4ae95f7e8597..a23a33153180 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -156,5 +156,19 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) +/** + * hlist_nulls_for_each_entry_safe - + * iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_nulls_node within the struct. + */ +#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ + for (({barrier();}), \ + pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) #endif #endif diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 63c86a7be2a1..afe5bab376c9 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -13,11 +13,12 @@ #include #include #include +#include #include "percpu_freelist.h" #include "bpf_lru_list.h" struct bucket { - struct hlist_head head; + struct hlist_nulls_head head; raw_spinlock_t lock; }; @@ -44,7 +45,7 @@ enum extra_elem_state { /* each htab element is struct htab_elem + key + value */ struct htab_elem { union { - struct hlist_node hash_node; + struct hlist_nulls_node hash_node; struct { void *padding; union { @@ -337,7 +338,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) goto free_htab; for (i = 0; i < htab->n_buckets; i++) { - INIT_HLIST_HEAD(&htab->buckets[i].head); + INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); raw_spin_lock_init(&htab->buckets[i].lock); } @@ -377,28 +378,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) return &htab->buckets[hash & (htab->n_buckets - 1)]; } -static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) +static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) { return &__select_bucket(htab, hash)->head; } -static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, +/* this lookup function can only be called with bucket lock taken */ +static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, void *key, u32 key_size) { + struct hlist_nulls_node *n; struct htab_elem *l; - hlist_for_each_entry_rcu(l, head, hash_node) + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l->hash == hash && !memcmp(&l->key, key, key_size)) return l; return NULL; } +/* can be called without bucket lock. it will repeat the loop in + * the unlikely event when elements moved from one bucket into another + * while link list is being walked + */ +static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, + u32 hash, void *key, + u32 key_size, u32 n_buckets) +{ + struct hlist_nulls_node *n; + struct htab_elem *l; + +again: + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) + if (l->hash == hash && !memcmp(&l->key, key, key_size)) + return l; + + if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) + goto again; + + return NULL; +} + /* Called from syscall or from eBPF program */ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct htab_elem *l; u32 hash, key_size; @@ -411,7 +436,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) head = select_bucket(htab, hash); - l = lookup_elem_raw(head, hash, key, key_size); + l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); return l; } @@ -444,8 +469,9 @@ static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) { struct bpf_htab *htab = (struct bpf_htab *)arg; - struct htab_elem *l, *tgt_l; - struct hlist_head *head; + struct htab_elem *l = NULL, *tgt_l; + struct hlist_nulls_head *head; + struct hlist_nulls_node *n; unsigned long flags; struct bucket *b; @@ -455,9 +481,9 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) raw_spin_lock_irqsave(&b->lock, flags); - hlist_for_each_entry_rcu(l, head, hash_node) + hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) if (l == tgt_l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); break; } @@ -470,7 +496,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct htab_elem *l, *next_l; u32 hash, key_size; int i; @@ -484,7 +510,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) head = select_bucket(htab, hash); /* lookup the key */ - l = lookup_elem_raw(head, hash, key, key_size); + l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); if (!l) { i = 0; @@ -492,7 +518,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) } /* key was found, get next key in the same bucket */ - next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), + next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), struct htab_elem, hash_node); if (next_l) { @@ -511,7 +537,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) head = select_bucket(htab, i); /* pick first element in the bucket */ - next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), + next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), struct htab_elem, hash_node); if (next_l) { /* if it's not empty, just return it */ @@ -676,7 +702,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -715,9 +741,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, /* add new element to the head of the list, so that * concurrent search will find it before old elem */ - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); if (l_old) { - hlist_del_rcu(&l_old->hash_node); + hlist_nulls_del_rcu(&l_old->hash_node); free_htab_elem(htab, l_old); } ret = 0; @@ -731,7 +757,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new, *l_old = NULL; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -772,10 +798,10 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, /* add new element to the head of the list, so that * concurrent search will find it before old elem */ - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); if (l_old) { bpf_lru_node_set_ref(&l_new->lru_node); - hlist_del_rcu(&l_old->hash_node); + hlist_nulls_del_rcu(&l_old->hash_node); } ret = 0; @@ -796,7 +822,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -835,7 +861,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, ret = PTR_ERR(l_new); goto err; } - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); } ret = 0; err: @@ -849,7 +875,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct htab_elem *l_new = NULL, *l_old; - struct hlist_head *head; + struct hlist_nulls_head *head; unsigned long flags; struct bucket *b; u32 key_size, hash; @@ -897,7 +923,7 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, } else { pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size), value, onallcpus); - hlist_add_head_rcu(&l_new->hash_node, head); + hlist_nulls_add_head_rcu(&l_new->hash_node, head); l_new = NULL; } ret = 0; @@ -925,7 +951,7 @@ static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, static int htab_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct bucket *b; struct htab_elem *l; unsigned long flags; @@ -945,7 +971,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) l = lookup_elem_raw(head, hash, key, key_size); if (l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); free_htab_elem(htab, l); ret = 0; } @@ -957,7 +983,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); - struct hlist_head *head; + struct hlist_nulls_head *head; struct bucket *b; struct htab_elem *l; unsigned long flags; @@ -977,7 +1003,7 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) l = lookup_elem_raw(head, hash, key, key_size); if (l) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_del_rcu(&l->hash_node); ret = 0; } @@ -992,12 +1018,12 @@ static void delete_all_elements(struct bpf_htab *htab) int i; for (i = 0; i < htab->n_buckets; i++) { - struct hlist_head *head = select_bucket(htab, i); - struct hlist_node *n; + struct hlist_nulls_head *head = select_bucket(htab, i); + struct hlist_nulls_node *n; struct htab_elem *l; - hlist_for_each_entry_safe(l, n, head, hash_node) { - hlist_del_rcu(&l->hash_node); + hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { + hlist_nulls_del_rcu(&l->hash_node); if (l->state != HTAB_EXTRA_ELEM_USED) htab_elem_free(htab, l); } From 9f138fa609c47403374a862a08a41394be53d461 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 8 Mar 2017 18:08:16 +0100 Subject: [PATCH 145/384] net: initialize msg.msg_flags in recvfrom KMSAN reports a use of uninitialized memory in put_cmsg() because msg.msg_flags in recvfrom haven't been initialized properly. The flag values don't affect the result on this path, but it's still a good idea to initialize them explicitly. Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller --- net/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/socket.c b/net/socket.c index 2c1e8677ff2d..e0757e648c0c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1731,6 +1731,7 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, /* We assume all kernel code knows the size of sockaddr_storage */ msg.msg_namelen = 0; msg.msg_iocb = NULL; + msg.msg_flags = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, flags); From 6cbac9828627b89487144c1e4448d7e0a6ab22ca Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 8 Mar 2017 16:46:57 +0100 Subject: [PATCH 146/384] tun: remove copyright printing Printing copyright does not give any useful information on the boot process. Furthermore, the email address printed is obsolete since commit ba57b6f20429 ("MAINTAINERS: fix bouncing tun/tap entries") Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index dc1b1dd9157c..f58b7d850114 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -2570,7 +2570,6 @@ static int __init tun_init(void) int ret = 0; pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION); - pr_info("%s\n", DRV_COPYRIGHT); ret = rtnl_link_register(&tun_link_ops); if (ret) { From 3c2217a675bac22afb149166e0de71809189850d Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 8 Mar 2017 18:44:32 -0500 Subject: [PATCH 147/384] bnxt_en: Perform function reset earlier during probe. The firmware call to do function reset is done too late. It is causing the rings that have been reserved to be freed. In NPAR mode, this bug is causing us to run out of rings. Fixes: 391be5c27364 ("bnxt_en: Implement new scheme to reserve tx rings.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 235733e91c79..33293ac32779 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7444,6 +7444,10 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err_pci_clean; + rc = bnxt_hwrm_func_reset(bp); + if (rc) + goto init_err_pci_clean; + bnxt_hwrm_fw_set_time(bp); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | @@ -7554,10 +7558,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err_pci_clean; - rc = bnxt_hwrm_func_reset(bp); - if (rc) - goto init_err_pci_clean; - rc = bnxt_init_int_mode(bp); if (rc) goto init_err_pci_clean; From b386cd362ffea09d05c56bfa85d104562e860647 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 8 Mar 2017 18:44:33 -0500 Subject: [PATCH 148/384] bnxt_en: Call bnxt_ulp_stop() during tx timeout. If we call bnxt_reset_task() due to tx timeout, we should call bnxt_ulp_stop() to inform the RDMA driver about the error and the impending reset. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 33293ac32779..bbe93713e226 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6495,8 +6495,14 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent) if (!silent) bnxt_dbg_dump_states(bp); if (netif_running(bp->dev)) { + int rc; + + if (!silent) + bnxt_ulp_stop(bp); bnxt_close_nic(bp, false, false); - bnxt_open_nic(bp, false, false); + rc = bnxt_open_nic(bp, false, false); + if (!silent && !rc) + bnxt_ulp_start(bp); } } From bc39f885a9c3bdbff0a96ecaf07b162a78eff6e4 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 8 Mar 2017 18:44:34 -0500 Subject: [PATCH 149/384] bnxt_en: Check if firmware LLDP agent is running. Set DCB_CAP_DCBX_HOST capability flag only if the firmware LLDP agent is not running. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bbe93713e226..869d4c97a9e8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4465,6 +4465,10 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK; } #endif + if (BNXT_PF(bp) && (le16_to_cpu(resp->flags) & + FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED)) + bp->flags |= BNXT_FLAG_FW_LLDP_AGENT; + switch (resp->port_partition_type) { case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0: case FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index faf26a2f726b..c7a5b84a5cb2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -993,6 +993,7 @@ struct bnxt { BNXT_FLAG_ROCEV2_CAP) #define BNXT_FLAG_NO_AGG_RINGS 0x20000 #define BNXT_FLAG_RX_PAGE_MODE 0x40000 + #define BNXT_FLAG_FW_LLDP_AGENT 0x80000 #define BNXT_FLAG_CHIP_NITRO_A0 0x1000000 #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index fdf2d8caf7bf..03532061d211 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -474,7 +474,7 @@ void bnxt_dcb_init(struct bnxt *bp) return; bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE; - if (BNXT_PF(bp)) + if (BNXT_PF(bp) && !(bp->flags & BNXT_FLAG_FW_LLDP_AGENT)) bp->dcbx_cap |= DCB_CAP_DCBX_HOST; else bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED; From 520ad89a54edea84496695d528f73ddcf4a52ea4 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 8 Mar 2017 18:44:35 -0500 Subject: [PATCH 150/384] bnxt_en: Ignore 0 value in autoneg supported speed from firmware. In some situations, the firmware will return 0 for autoneg supported speed. This may happen if the firmware detects no SFP module, for example. The driver should ignore this so that we don't end up with an invalid autoneg setting with nothing advertised. When SFP module is inserted, we'll get the updated settings from firmware at that time. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 869d4c97a9e8..32de4589d16a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5511,8 +5511,9 @@ static int bnxt_hwrm_phy_qcaps(struct bnxt *bp) bp->lpi_tmr_hi = le32_to_cpu(resp->valid_tx_lpi_timer_high) & PORT_PHY_QCAPS_RESP_TX_LPI_TIMER_HIGH_MASK; } - link_info->support_auto_speeds = - le16_to_cpu(resp->supported_speeds_auto_mode); + if (resp->supported_speeds_auto_mode) + link_info->support_auto_speeds = + le16_to_cpu(resp->supported_speeds_auto_mode); hwrm_phy_qcaps_exit: mutex_unlock(&bp->hwrm_cmd_lock); From cdfbabfb2f0ce983fdaa42f20e5f7842178fc01e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Mar 2017 08:09:05 +0000 Subject: [PATCH 151/384] net: Work around lockdep limitation in sockets that use sockets Lockdep issues a circular dependency warning when AFS issues an operation through AF_RXRPC from a context in which the VFS/VM holds the mmap_sem. The theory lockdep comes up with is as follows: (1) If the pagefault handler decides it needs to read pages from AFS, it calls AFS with mmap_sem held and AFS begins an AF_RXRPC call, but creating a call requires the socket lock: mmap_sem must be taken before sk_lock-AF_RXRPC (2) afs_open_socket() opens an AF_RXRPC socket and binds it. rxrpc_bind() binds the underlying UDP socket whilst holding its socket lock. inet_bind() takes its own socket lock: sk_lock-AF_RXRPC must be taken before sk_lock-AF_INET (3) Reading from a TCP socket into a userspace buffer might cause a fault and thus cause the kernel to take the mmap_sem, but the TCP socket is locked whilst doing this: sk_lock-AF_INET must be taken before mmap_sem However, lockdep's theory is wrong in this instance because it deals only with lock classes and not individual locks. The AF_INET lock in (2) isn't really equivalent to the AF_INET lock in (3) as the former deals with a socket entirely internal to the kernel that never sees userspace. This is a limitation in the design of lockdep. Fix the general case by: (1) Double up all the locking keys used in sockets so that one set are used if the socket is created by userspace and the other set is used if the socket is created by the kernel. (2) Store the kern parameter passed to sk_alloc() in a variable in the sock struct (sk_kern_sock). This informs sock_lock_init(), sock_init_data() and sk_clone_lock() as to the lock keys to be used. Note that the child created by sk_clone_lock() inherits the parent's kern setting. (3) Add a 'kern' parameter to ->accept() that is analogous to the one passed in to ->create() that distinguishes whether kernel_accept() or sys_accept4() was the caller and can be passed to sk_alloc(). Note that a lot of accept functions merely dequeue an already allocated socket. I haven't touched these as the new socket already exists before we get the parameter. Note also that there are a couple of places where I've made the accepted socket unconditionally kernel-based: irda_accept() rds_rcp_accept_one() tcp_accept_from_sock() because they follow a sock_create_kern() and accept off of that. Whilst creating this, I noticed that lustre and ocfs don't create sockets through sock_create_kern() and thus they aren't marked as for-kernel, though they appear to be internal. I wonder if these should do that so that they use the new set of lock keys. Signed-off-by: David Howells Signed-off-by: David S. Miller --- crypto/af_alg.c | 9 +- crypto/algif_hash.c | 9 +- drivers/staging/lustre/lnet/lnet/lib-socket.c | 4 +- fs/dlm/lowcomms.c | 2 +- fs/ocfs2/cluster/tcp.c | 2 +- include/crypto/if_alg.h | 2 +- include/linux/net.h | 2 +- include/net/inet_common.h | 3 +- include/net/inet_connection_sock.h | 2 +- include/net/sctp/structs.h | 3 +- include/net/sock.h | 9 +- net/atm/svc.c | 5 +- net/ax25/af_ax25.c | 3 +- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/sock.c | 3 +- net/bluetooth/sco.c | 2 +- net/core/sock.c | 106 ++++++++++-------- net/decnet/af_decnet.c | 5 +- net/ipv4/af_inet.c | 5 +- net/ipv4/inet_connection_sock.c | 2 +- net/irda/af_irda.c | 5 +- net/iucv/af_iucv.c | 2 +- net/llc/af_llc.c | 4 +- net/netrom/af_netrom.c | 3 +- net/nfc/llcp_sock.c | 2 +- net/phonet/pep.c | 6 +- net/phonet/socket.c | 4 +- net/rds/tcp_listen.c | 2 +- net/rose/af_rose.c | 3 +- net/sctp/ipv6.c | 5 +- net/sctp/protocol.c | 5 +- net/sctp/socket.c | 4 +- net/smc/af_smc.c | 2 +- net/socket.c | 4 +- net/tipc/socket.c | 8 +- net/unix/af_unix.c | 5 +- net/vmw_vsock/af_vsock.c | 3 +- net/x25/af_x25.c | 3 +- 38 files changed, 142 insertions(+), 108 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f5e18c2a4852..690deca17c35 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -266,7 +266,7 @@ static int alg_setsockopt(struct socket *sock, int level, int optname, return err; } -int af_alg_accept(struct sock *sk, struct socket *newsock) +int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) { struct alg_sock *ask = alg_sk(sk); const struct af_alg_type *type; @@ -281,7 +281,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) if (!type) goto unlock; - sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, 0); + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern); err = -ENOMEM; if (!sk2) goto unlock; @@ -323,9 +323,10 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) } EXPORT_SYMBOL_GPL(af_alg_accept); -static int alg_accept(struct socket *sock, struct socket *newsock, int flags) +static int alg_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { - return af_alg_accept(sock->sk, newsock); + return af_alg_accept(sock->sk, newsock, kern); } static const struct proto_ops alg_proto_ops = { diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 54fc90e8339c..5e92bd275ef3 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -239,7 +239,8 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, return err ?: len; } -static int hash_accept(struct socket *sock, struct socket *newsock, int flags) +static int hash_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); @@ -260,7 +261,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags) if (err) return err; - err = af_alg_accept(ask->parent, newsock); + err = af_alg_accept(ask->parent, newsock, kern); if (err) return err; @@ -378,7 +379,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg, } static int hash_accept_nokey(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { int err; @@ -386,7 +387,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock, if (err) return err; - return hash_accept(sock, newsock, flags); + return hash_accept(sock, newsock, flags, kern); } static struct proto_ops algif_hash_ops_nokey = { diff --git a/drivers/staging/lustre/lnet/lnet/lib-socket.c b/drivers/staging/lustre/lnet/lnet/lib-socket.c index b7b87ecefcdf..9fca8d225ee0 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-socket.c +++ b/drivers/staging/lustre/lnet/lnet/lib-socket.c @@ -532,7 +532,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock) newsock->ops = sock->ops; - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); if (rc == -EAGAIN) { /* Nothing ready, so wait for activity */ init_waitqueue_entry(&wait, current); @@ -540,7 +540,7 @@ lnet_sock_accept(struct socket **newsockp, struct socket *sock) set_current_state(TASK_INTERRUPTIBLE); schedule(); remove_wait_queue(sk_sleep(sock->sk), &wait); - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + rc = sock->ops->accept(sock, newsock, O_NONBLOCK, false); } if (rc) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 7d398d300e97..9382db998ec9 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con) newsock->type = con->sock->type; newsock->ops = con->sock->ops; - result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); + result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true); if (result < 0) goto accept_err; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 4348027384f5..d0ab7e56d0b4 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); if (ret < 0) goto out; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index a2bfd7843f18..e2b9c6fe2714 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -73,7 +73,7 @@ int af_alg_unregister_type(const struct af_alg_type *type); int af_alg_release(struct socket *sock); void af_alg_release_parent(struct sock *sk); -int af_alg_accept(struct sock *sk, struct socket *newsock); +int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern); int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len); void af_alg_free_sg(struct af_alg_sgl *sgl); diff --git a/include/linux/net.h b/include/linux/net.h index cd0c8bd0a1de..0620f5e18c96 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -146,7 +146,7 @@ struct proto_ops { int (*socketpair)(struct socket *sock1, struct socket *sock2); int (*accept) (struct socket *sock, - struct socket *newsock, int flags); + struct socket *newsock, int flags, bool kern); int (*getname) (struct socket *sock, struct sockaddr *addr, int *sockaddr_len, int peer); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index b7952d55b9c0..f39ae697347f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -20,7 +20,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags, int is_sendmsg); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); -int inet_accept(struct socket *sock, struct socket *newsock, int flags); +int inet_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 826f198374f8..c7a577976bec 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -258,7 +258,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, return (unsigned long)min_t(u64, when, max_when); } -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern); int inet_csk_get_port(struct sock *sk, unsigned short snum); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a244db5e5ff7..07a0b128625a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -476,7 +476,8 @@ struct sctp_pf { int (*send_verify) (struct sctp_sock *, union sctp_addr *); int (*supported_addrs)(const struct sctp_sock *, __be16 *); struct sock *(*create_accept_sk) (struct sock *sk, - struct sctp_association *asoc); + struct sctp_association *asoc, + bool kern); int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr); void (*to_sk_saddr)(union sctp_addr *, struct sock *sk); void (*to_sk_daddr)(union sctp_addr *, struct sock *sk); diff --git a/include/net/sock.h b/include/net/sock.h index 5e5997654db6..03252d53975d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -236,6 +236,7 @@ struct sock_common { * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer + * @sk_kern_sock: True if sock is using kernel lock classes * @sk_rcvbuf: size of receive buffer in bytes * @sk_wq: sock wait queue and async head * @sk_rx_dst: receive input route used by early demux @@ -430,7 +431,8 @@ struct sock { #endif kmemcheck_bitfield_begin(flags); - unsigned int sk_padding : 2, + unsigned int sk_padding : 1, + sk_kern_sock : 1, sk_no_check_tx : 1, sk_no_check_rx : 1, sk_userlocks : 4, @@ -1015,7 +1017,8 @@ struct proto { int addr_len); int (*disconnect)(struct sock *sk, int flags); - struct sock * (*accept)(struct sock *sk, int flags, int *err); + struct sock * (*accept)(struct sock *sk, int flags, int *err, + bool kern); int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); @@ -1573,7 +1576,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, int sock_no_bind(struct socket *, struct sockaddr *, int); int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); -int sock_no_accept(struct socket *, struct socket *, int); +int sock_no_accept(struct socket *, struct socket *, int, bool); int sock_no_getname(struct socket *, struct sockaddr *, int *, int); unsigned int sock_no_poll(struct file *, struct socket *, struct poll_table_struct *); diff --git a/net/atm/svc.c b/net/atm/svc.c index db9794ec61d8..5589de7086af 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -318,7 +318,8 @@ static int svc_listen(struct socket *sock, int backlog) return error; } -static int svc_accept(struct socket *sock, struct socket *newsock, int flags) +static int svc_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct sk_buff *skb; @@ -329,7 +330,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags) lock_sock(sk); - error = svc_create(sock_net(sk), newsock, 0, 0); + error = svc_create(sock_net(sk), newsock, 0, kern); if (error) goto out; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a8e42cedf1db..b7c486752b3a 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1320,7 +1320,8 @@ static int __must_check ax25_connect(struct socket *sock, return err; } -static int ax25_accept(struct socket *sock, struct socket *newsock, int flags) +static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sk_buff *skb; struct sock *newsk; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index f307b145ea54..507b80d59dec 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -301,7 +301,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) } static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index aa1a814ceddc..ac3c650cb234 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -471,7 +471,8 @@ static int rfcomm_sock_listen(struct socket *sock, int backlog) return err; } -static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags) +static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e4e9a2da1e7e..728e0c8dc8e7 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -627,7 +627,7 @@ static int sco_sock_listen(struct socket *sock, int backlog) } static int sco_sock_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; diff --git a/net/core/sock.c b/net/core/sock.c index f6fd79f33097..a96d5f7a5734 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -197,66 +197,55 @@ EXPORT_SYMBOL(sk_net_capable); /* * Each address family might have different locking rules, so we have - * one slock key per address family: + * one slock key per address family and separate keys for internal and + * userspace sockets. */ static struct lock_class_key af_family_keys[AF_MAX]; +static struct lock_class_key af_family_kern_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; +static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket * locks is fast): */ + +#define _sock_locks(x) \ + x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \ + x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \ + x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \ + x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \ + x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \ + x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \ + x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \ + x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \ + x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \ + x "27" , x "28" , x "AF_CAN" , \ + x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \ + x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ + x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ + x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ + x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX" + static const char *const af_family_key_strings[AF_MAX+1] = { - "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" , - "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK", - "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" , - "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" , - "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , - "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , - "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , - "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , - "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , - "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , - "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , - "sk_lock-AF_QIPCRTR", "sk_lock-AF_SMC" , "sk_lock-AF_MAX" + _sock_locks("sk_lock-") }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { - "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , - "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK", - "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" , - "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" , - "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , - "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , - "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , - "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , - "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , - "slock-27" , "slock-28" , "slock-AF_CAN" , - "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , - "slock-AF_QIPCRTR", "slock-AF_SMC" , "slock-AF_MAX" + _sock_locks("slock-") }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { - "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , - "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK", - "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" , - "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" , - "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , - "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , - "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , - "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , - "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , - "clock-27" , "clock-28" , "clock-AF_CAN" , - "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , - "clock-AF_QIPCRTR", "clock-AF_SMC" , "clock-AF_MAX" + _sock_locks("clock-") +}; + +static const char *const af_family_kern_key_strings[AF_MAX+1] = { + _sock_locks("k-sk_lock-") +}; +static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = { + _sock_locks("k-slock-") +}; +static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = { + _sock_locks("k-clock-") }; /* @@ -264,6 +253,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { * so split the lock classes by using a per-AF key: */ static struct lock_class_key af_callback_keys[AF_MAX]; +static struct lock_class_key af_kern_callback_keys[AF_MAX]; /* Take into consideration the size of the struct sk_buff overhead in the * determination of these values, since that is non-constant across @@ -1293,7 +1283,16 @@ int sock_getsockopt(struct socket *sock, int level, int optname, */ static inline void sock_lock_init(struct sock *sk) { - sock_lock_init_class_and_name(sk, + if (sk->sk_kern_sock) + sock_lock_init_class_and_name( + sk, + af_family_kern_slock_key_strings[sk->sk_family], + af_family_kern_slock_keys + sk->sk_family, + af_family_kern_key_strings[sk->sk_family], + af_family_kern_keys + sk->sk_family); + else + sock_lock_init_class_and_name( + sk, af_family_slock_key_strings[sk->sk_family], af_family_slock_keys + sk->sk_family, af_family_key_strings[sk->sk_family], @@ -1399,6 +1398,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, * why we need sk_prot_creator -acme */ sk->sk_prot = sk->sk_prot_creator = prot; + sk->sk_kern_sock = kern; sock_lock_init(sk); sk->sk_net_refcnt = kern ? 0 : 1; if (likely(sk->sk_net_refcnt)) @@ -2277,7 +2277,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2) } EXPORT_SYMBOL(sock_no_socketpair); -int sock_no_accept(struct socket *sock, struct socket *newsock, int flags) +int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { return -EOPNOTSUPP; } @@ -2481,7 +2482,14 @@ void sock_init_data(struct socket *sock, struct sock *sk) } rwlock_init(&sk->sk_callback_lock); - lockdep_set_class_and_name(&sk->sk_callback_lock, + if (sk->sk_kern_sock) + lockdep_set_class_and_name( + &sk->sk_callback_lock, + af_kern_callback_keys + sk->sk_family, + af_family_kern_clock_key_strings[sk->sk_family]); + else + lockdep_set_class_and_name( + &sk->sk_callback_lock, af_callback_keys + sk->sk_family, af_family_clock_key_strings[sk->sk_family]); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index e6e79eda9763..7de5b40a5d0d 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1070,7 +1070,8 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo) return skb == NULL ? ERR_PTR(err) : skb; } -static int dn_accept(struct socket *sock, struct socket *newsock, int flags) +static int dn_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk, *newsk; struct sk_buff *skb = NULL; @@ -1099,7 +1100,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags) cb = DN_SKB_CB(skb); sk->sk_ack_backlog--; - newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0); + newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern); if (newsk == NULL) { release_sock(sk); kfree_skb(skb); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5091f46826fa..6b1fc6e4278e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -689,11 +689,12 @@ EXPORT_SYMBOL(inet_stream_connect); * Accept a pending connection. The TCP layer now gives BSD semantics. */ -int inet_accept(struct socket *sock, struct socket *newsock, int flags) +int inet_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk1 = sock->sk; int err = -EINVAL; - struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err); + struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern); if (!sk2) goto do_err; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b4d5980ade3b..5e313c1ac94f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -424,7 +424,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) /* * This will accept the next outstanding connection. */ -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock_queue *queue = &icsk->icsk_accept_queue; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 81adc29a448d..8d77ad5cadaf 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -828,7 +828,8 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * Wait for incoming connection * */ -static int irda_accept(struct socket *sock, struct socket *newsock, int flags) +static int irda_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct irda_sock *new, *self = irda_sk(sk); @@ -836,7 +837,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) struct sk_buff *skb = NULL; int err; - err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); + err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern); if (err) return err; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 89bbde1081ce..84de7b6326dc 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -938,7 +938,7 @@ static int iucv_sock_listen(struct socket *sock, int backlog) /* Accept a pending connection */ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *nsk; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 06186d608a27..cb4fff785cbf 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -641,11 +641,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) * @sock: Socket which connections arrive on. * @newsock: Socket to move incoming connection to. * @flags: User specified operational flags. + * @kern: If the socket is kernel internal * * Accept a new incoming connection. * Returns 0 upon success, negative otherwise. */ -static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags) +static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk, *newsk; struct llc_sock *llc, *newllc; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4bbf4526b885..ebf16f7f9089 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -765,7 +765,8 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr, return err; } -static int nr_accept(struct socket *sock, struct socket *newsock, int flags) +static int nr_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sk_buff *skb; struct sock *newsk; diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 879885b31cce..2ffb18e73df6 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -441,7 +441,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, } static int llcp_sock_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *new_sk; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 222bedcd9575..e81537991ddf 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -772,7 +772,8 @@ static void pep_sock_close(struct sock *sk, long timeout) sock_put(sk); } -static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, + bool kern) { struct pep_sock *pn = pep_sk(sk), *newpn; struct sock *newsk = NULL; @@ -846,7 +847,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) } /* Create a new to-be-accepted sock */ - newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0); + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, + kern); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; diff --git a/net/phonet/socket.c b/net/phonet/socket.c index a6c8da3ee893..64634e3ec2fc 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -305,7 +305,7 @@ static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, } static int pn_socket_accept(struct socket *sock, struct socket *newsock, - int flags) + int flags, bool kern) { struct sock *sk = sock->sk; struct sock *newsk; @@ -314,7 +314,7 @@ static int pn_socket_accept(struct socket *sock, struct socket *newsock, if (unlikely(sk->sk_state != TCP_LISTEN)) return -EINVAL; - newsk = sk->sk_prot->accept(sk, flags, &err); + newsk = sk->sk_prot->accept(sk, flags, &err, kern); if (!newsk) return err; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 2c69a508a693..507678853e6c 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -133,7 +133,7 @@ int rds_tcp_accept_one(struct socket *sock) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); if (ret < 0) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index b8a1df2c9785..4a9729257023 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -871,7 +871,8 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le return err; } -static int rose_accept(struct socket *sock, struct socket *newsock, int flags) +static int rose_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sk_buff *skb; struct sock *newsk; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 063baac5b9fe..961ee59f696a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -640,14 +640,15 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr) /* Create and initialize a new sk for the socket to be returned by accept(). */ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, - struct sctp_association *asoc) + struct sctp_association *asoc, + bool kern) { struct sock *newsk; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; struct ipv6_txoptions *opt; - newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0); + newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern); if (!newsk) goto out; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1b6d4574d2b0..989a900383b5 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -575,10 +575,11 @@ static int sctp_v4_is_ce(const struct sk_buff *skb) /* Create and initialize a new sk for the socket returned by accept(). */ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, - struct sctp_association *asoc) + struct sctp_association *asoc, + bool kern) { struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, - sk->sk_prot, 0); + sk->sk_prot, kern); struct inet_sock *newinet; if (!newsk) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6f0a9be50f50..0f378ea2ae38 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4116,7 +4116,7 @@ static int sctp_disconnect(struct sock *sk, int flags) * descriptor will be returned from accept() to represent the newly * formed association. */ -static struct sock *sctp_accept(struct sock *sk, int flags, int *err) +static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -4151,7 +4151,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err) */ asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); - newsk = sp->pf->create_accept_sk(sk, asoc); + newsk = sp->pf->create_accept_sk(sk, asoc, kern); if (!newsk) { error = -ENOMEM; goto out; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 85837ab90e89..093803786eac 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -944,7 +944,7 @@ static int smc_listen(struct socket *sock, int backlog) } static int smc_accept(struct socket *sock, struct socket *new_sock, - int flags) + int flags, bool kern) { struct sock *sk = sock->sk, *nsk; DECLARE_WAITQUEUE(wait, current); diff --git a/net/socket.c b/net/socket.c index e0757e648c0c..e034fe4164be 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1506,7 +1506,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, if (err) goto out_fd; - err = sock->ops->accept(sock, newsock, sock->file->f_flags); + err = sock->ops->accept(sock, newsock, sock->file->f_flags, false); if (err < 0) goto out_fd; @@ -3239,7 +3239,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) if (err < 0) goto done; - err = sock->ops->accept(sock, *newsock, flags); + err = sock->ops->accept(sock, *newsock, flags, true); if (err < 0) { sock_release(*newsock); *newsock = NULL; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 43e4045e72bc..7130e73bd42c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -115,7 +115,8 @@ static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags); +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, + bool kern); static void tipc_sk_timeout(unsigned long data); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); @@ -2029,7 +2030,8 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * * Returns 0 on success, errno otherwise */ -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) +static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, + bool kern) { struct sock *new_sk, *sk = sock->sk; struct sk_buff *buf; @@ -2051,7 +2053,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ee37b390260a..928691c43408 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -636,7 +636,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); -static int unix_accept(struct socket *, struct socket *, int); +static int unix_accept(struct socket *, struct socket *, int, bool); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); static unsigned int unix_dgram_poll(struct file *, struct socket *, @@ -1402,7 +1402,8 @@ static void unix_sock_inherit_flags(const struct socket *old, set_bit(SOCK_PASSSEC, &new->flags); } -static int unix_accept(struct socket *sock, struct socket *newsock, int flags) +static int unix_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct sock *tsk; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 9192ead66751..9f770f33c100 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1250,7 +1250,8 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, return err; } -static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) +static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *listener; int err; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index fd28a49dbe8f..8b911c29860e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -852,7 +852,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout) return rc; } -static int x25_accept(struct socket *sock, struct socket *newsock, int flags) +static int x25_accept(struct socket *sock, struct socket *newsock, int flags, + bool kern) { struct sock *sk = sock->sk; struct sock *newsk; From 4b3b45edba9222e518a1ec72df841eba3609fe34 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 9 Mar 2017 13:56:46 +0300 Subject: [PATCH 152/384] udp: avoid ufo handling on IP payload compression packets commit c146066ab802 ("ipv4: Don't use ufo handling on later transformed packets") and commit f89c56ce710a ("ipv6: Don't use ufo handling on later transformed packets") added a check that 'rt->dst.header_len' isn't zero in order to skip UFO, but it doesn't include IPcomp in transport mode where it equals zero. Packets, after payload compression, may not require further fragmentation, and if original length exceeds MTU, later compressed packets will be transmitted incorrectly. This can be reproduced with LTP udp_ipsec.sh test on veth device with enabled UFO, MTU is 1500 and UDP payload is 2000: * IPv4 case, offset is wrong + unnecessary fragmentation udp_ipsec.sh -p comp -m transport -s 2000 & tcpdump -ni ltp_ns_veth2 ... IP (tos 0x0, ttl 64, id 45203, offset 0, flags [+], proto Compressed IP (108), length 49) 10.0.0.2 > 10.0.0.1: IPComp(cpi=0x1000) IP (tos 0x0, ttl 64, id 45203, offset 1480, flags [none], proto UDP (17), length 21) 10.0.0.2 > 10.0.0.1: ip-proto-17 * IPv6 case, sending small fragments udp_ipsec.sh -6 -p comp -m transport -s 2000 & tcpdump -ni ltp_ns_veth2 ... IP6 (flowlabel 0x6b9ba, hlim 64, next-header Compressed IP (108) payload length: 37) fd00::2 > fd00::1: IPComp(cpi=0x1000) IP6 (flowlabel 0x6b9ba, hlim 64, next-header Compressed IP (108) payload length: 21) fd00::2 > fd00::1: IPComp(cpi=0x1000) Fix it by checking 'rt->dst.xfrm' pointer to 'xfrm_state' struct, skip UFO if xfrm is set. So the new check will include both cases: IPcomp and IPsec. Fixes: c146066ab802 ("ipv4: Don't use ufo handling on later transformed packets") Fixes: f89c56ce710a ("ipv6: Don't use ufo handling on later transformed packets") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 2 +- net/ipv6/ip6_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 737ce826d7ec..7a3fd25e8913 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -966,7 +966,7 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && + (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 528b3c1f3fde..df42096e1f04 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1385,7 +1385,7 @@ static int __ip6_append_data(struct sock *sk, if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && + (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, From 6fc166d62c6f9f6eda5ea300f671d34e89bdd3c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Mar 2017 08:10:32 +0000 Subject: [PATCH 153/384] rxrpc: rxrpc_kernel_send_data() needs to handle failed call better If rxrpc_kernel_send_data() is asked to send data through a call that has already failed (due to a remote abort, received protocol error or network error), then return the associated error code saved in the call rather than ESHUTDOWN. This allows the caller to work out whether to ask for the abort code or not based on this. Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/sendmsg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 47ccfeacc1c6..97ab214ca411 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -618,8 +618,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); break; case RXRPC_CALL_COMPLETE: - /* It's too late for this call */ - ret = -ESHUTDOWN; + read_lock_bh(&call->state_lock); + ret = -call->error; + read_unlock_bh(&call->state_lock); break; default: /* Request phase complete for this client call */ From d7aba644ffdebf756e51e26a2229055211838e89 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Thu, 9 Mar 2017 17:48:23 -0600 Subject: [PATCH 154/384] amd-xgbe: Enable IRQs only if napi_complete_done() is true Depending on the hardware, the amd-xgbe driver may use disable_irq_nosync() and enable_irq() when an interrupt is received to process Rx packets. If the napi_complete_done() return value isn't checked an unbalanced enable for the IRQ could result, generating a warning stack trace. Update the driver to only enable interrupts if napi_complete_done() returns true. Reported-by: Jeremy Linton Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 248f60d171a5..ffea9859f5a7 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -2272,10 +2272,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget) processed = xgbe_rx_poll(channel, budget); /* If we processed everything, we are done */ - if (processed < budget) { - /* Turn off polling */ - napi_complete_done(napi, processed); - + if ((processed < budget) && napi_complete_done(napi, processed)) { /* Enable Tx and Rx interrupts */ if (pdata->channel_irq_mode) xgbe_enable_rx_tx_int(pdata, channel); @@ -2317,10 +2314,7 @@ static int xgbe_all_poll(struct napi_struct *napi, int budget) } while ((processed < budget) && (processed != last_processed)); /* If we processed everything, we are done */ - if (processed < budget) { - /* Turn off polling */ - napi_complete_done(napi, processed); - + if ((processed < budget) && napi_complete_done(napi, processed)) { /* Enable Tx and Rx interrupts */ xgbe_enable_rx_tx_ints(pdata); } From ffff71328a3c321f7c14cc1edd33577717037744 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:43 -0800 Subject: [PATCH 155/384] net: bcmgenet: correct the RBUF_OVFL_CNT and RBUF_ERR_CNT MIB values The location of the RBUF overflow and error counters has moved between different version of the GENET MAC. This commit corrects the driver to read from the correct locations depending on the version of the GENET MAC. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 60 ++++++++++++++++--- .../net/ethernet/broadcom/genet/bcmgenet.h | 10 +++- 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index f92896835d2a..f0fb7eca8eb4 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1,7 +1,7 @@ /* * Broadcom GENET (Gigabit Ethernet) controller driver * - * Copyright (c) 2014 Broadcom Corporation + * Copyright (c) 2014-2017 Broadcom * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -778,8 +778,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { STAT_GENET_RUNT("rx_runt_bytes", mib.rx_runt_bytes), /* Misc UniMAC counters */ STAT_GENET_MISC("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt, - UMAC_RBUF_OVFL_CNT), - STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT), + UMAC_RBUF_OVFL_CNT_V1), + STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, + UMAC_RBUF_ERR_CNT_V1), STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT), STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), @@ -821,6 +822,45 @@ static void bcmgenet_get_strings(struct net_device *dev, u32 stringset, } } +static u32 bcmgenet_update_stat_misc(struct bcmgenet_priv *priv, u16 offset) +{ + u16 new_offset; + u32 val; + + switch (offset) { + case UMAC_RBUF_OVFL_CNT_V1: + if (GENET_IS_V2(priv)) + new_offset = RBUF_OVFL_CNT_V2; + else + new_offset = RBUF_OVFL_CNT_V3PLUS; + + val = bcmgenet_rbuf_readl(priv, new_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_rbuf_writel(priv, 0, new_offset); + break; + case UMAC_RBUF_ERR_CNT_V1: + if (GENET_IS_V2(priv)) + new_offset = RBUF_ERR_CNT_V2; + else + new_offset = RBUF_ERR_CNT_V3PLUS; + + val = bcmgenet_rbuf_readl(priv, new_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_rbuf_writel(priv, 0, new_offset); + break; + default: + val = bcmgenet_umac_readl(priv, offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_umac_writel(priv, 0, offset); + break; + } + + return val; +} + static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) { int i, j = 0; @@ -845,10 +885,16 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) UMAC_MIB_START + j + offset); break; case BCMGENET_STAT_MISC: - val = bcmgenet_umac_readl(priv, s->reg_offset); - /* clear if overflowed */ - if (val == ~0) - bcmgenet_umac_writel(priv, 0, s->reg_offset); + if (GENET_IS_V1(priv)) { + val = bcmgenet_umac_readl(priv, s->reg_offset); + /* clear if overflowed */ + if (val == ~0) + bcmgenet_umac_writel(priv, 0, + s->reg_offset); + } else { + val = bcmgenet_update_stat_misc(priv, + s->reg_offset); + } break; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 1e2dc34d331a..d5fb0d772dcd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Broadcom Corporation + * Copyright (c) 2014-2017 Broadcom * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -214,7 +214,9 @@ struct bcmgenet_mib_counters { #define MDIO_REG_SHIFT 16 #define MDIO_REG_MASK 0x1F -#define UMAC_RBUF_OVFL_CNT 0x61C +#define UMAC_RBUF_OVFL_CNT_V1 0x61C +#define RBUF_OVFL_CNT_V2 0x80 +#define RBUF_OVFL_CNT_V3PLUS 0x94 #define UMAC_MPD_CTRL 0x620 #define MPD_EN (1 << 0) @@ -224,7 +226,9 @@ struct bcmgenet_mib_counters { #define UMAC_MPD_PW_MS 0x624 #define UMAC_MPD_PW_LS 0x628 -#define UMAC_RBUF_ERR_CNT 0x634 +#define UMAC_RBUF_ERR_CNT_V1 0x634 +#define RBUF_ERR_CNT_V2 0x84 +#define RBUF_ERR_CNT_V3PLUS 0x98 #define UMAC_MDF_ERR_CNT 0x638 #define UMAC_MDF_CTRL 0x650 #define UMAC_MDF_ADDR 0x654 From 1ad3d225e5a40ca6c586989b4baaca710544c15a Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:44 -0800 Subject: [PATCH 156/384] net: bcmgenet: correct MIB access of UniMAC RUNT counters The gap between the Tx status counters and the Rx RUNT counters is now being added to allow correct reporting of the registers. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index f0fb7eca8eb4..01a172d95328 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -876,13 +876,16 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) case BCMGENET_STAT_NETDEV: case BCMGENET_STAT_SOFT: continue; - case BCMGENET_STAT_MIB_RX: - case BCMGENET_STAT_MIB_TX: case BCMGENET_STAT_RUNT: - if (s->type != BCMGENET_STAT_MIB_RX) - offset = BCMGENET_STAT_OFFSET; + offset += BCMGENET_STAT_OFFSET; + /* fall through */ + case BCMGENET_STAT_MIB_TX: + offset += BCMGENET_STAT_OFFSET; + /* fall through */ + case BCMGENET_STAT_MIB_RX: val = bcmgenet_umac_readl(priv, UMAC_MIB_START + j + offset); + offset = 0; /* Reset Offset */ break; case BCMGENET_STAT_MISC: if (GENET_IS_V1(priv)) { From eca4bad73409aedc6ff22f823c18b67a4f08c851 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:45 -0800 Subject: [PATCH 157/384] net: bcmgenet: reserved phy revisions must be checked first The reserved gphy_rev value of 0x01ff must be tested before the old or new scheme for GPHY major versioning are tested, otherwise it will be treated as 0xff00 according to the old scheme. Fixes: b04a2f5b9ff5 ("net: bcmgenet: add support for new GENET PHY revision scheme") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 01a172d95328..99f8d9024633 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3226,6 +3226,12 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) */ gphy_rev = reg & 0xffff; + /* This is reserved so should require special treatment */ + if (gphy_rev == 0 || gphy_rev == 0x01ff) { + pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); + return; + } + /* This is the good old scheme, just GPHY major, no minor nor patch */ if ((gphy_rev & 0xf0) != 0) priv->gphy_rev = gphy_rev << 8; @@ -3234,12 +3240,6 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv) else if ((gphy_rev & 0xff00) != 0) priv->gphy_rev = gphy_rev; - /* This is reserved so should require special treatment */ - else if (gphy_rev == 0 || gphy_rev == 0x01ff) { - pr_warn("Invalid GPHY revision detected: 0x%04x\n", gphy_rev); - return; - } - #ifdef CONFIG_PHYS_ADDR_T_64BIT if (!(params->flags & GENET_HAS_40BITS)) pr_warn("GENET does not support 40-bits PA\n"); From 7627409cc4970e8c8b9de6945ad86a575290a94e Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:46 -0800 Subject: [PATCH 158/384] net: bcmgenet: power down internal phy if open or resume fails Since the internal PHY is powered up during the open and resume functions it should be powered back down if the functions fail. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 99f8d9024633..475dc14931af 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2850,6 +2850,8 @@ static int bcmgenet_open(struct net_device *dev) err_fini_dma: bcmgenet_fini_dma(priv); err_clk_disable: + if (priv->internal_phy) + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); clk_disable_unprepare(priv->clk); return ret; } @@ -3551,6 +3553,8 @@ static int bcmgenet_resume(struct device *d) return 0; out_clk_disable: + if (priv->internal_phy) + bcmgenet_power_down(priv, GENET_POWER_PASSIVE); clk_disable_unprepare(priv->clk); return ret; } From 07c52d6a0b955a8a28834f9354793cfc4b81d0e9 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:47 -0800 Subject: [PATCH 159/384] net: bcmgenet: synchronize irq0 status between the isr and task Add a spinlock to ensure that irq0_stat is not unintentionally altered as the result of preemption. Also removed unserviced irq0 interrupts and removed irq1_stat since there is no bottom half service for those interrupts. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 73 ++++++++++--------- .../net/ethernet/broadcom/genet/bcmgenet.h | 6 +- 2 files changed, 44 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 475dc14931af..527cecaf12c1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2506,24 +2506,28 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) /* Interrupt bottom half */ static void bcmgenet_irq_task(struct work_struct *work) { + unsigned long flags; + unsigned int status; struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); netif_dbg(priv, intr, priv->dev, "%s\n", __func__); - if (priv->irq0_stat & UMAC_IRQ_MPD_R) { - priv->irq0_stat &= ~UMAC_IRQ_MPD_R; + spin_lock_irqsave(&priv->lock, flags); + status = priv->irq0_stat; + priv->irq0_stat = 0; + spin_unlock_irqrestore(&priv->lock, flags); + + if (status & UMAC_IRQ_MPD_R) { netif_dbg(priv, wol, priv->dev, "magic packet detected, waking up\n"); bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); } /* Link UP/DOWN event */ - if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { + if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->phydev, - !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); - priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; - } + !!(status & UMAC_IRQ_LINK_UP)); } /* bcmgenet_isr1: handle Rx and Tx priority queues */ @@ -2532,22 +2536,21 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) struct bcmgenet_priv *priv = dev_id; struct bcmgenet_rx_ring *rx_ring; struct bcmgenet_tx_ring *tx_ring; - unsigned int index; + unsigned int index, status; - /* Save irq status for bottom-half processing. */ - priv->irq1_stat = - bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & + /* Read irq status */ + status = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ - bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); + bcmgenet_intrl2_1_writel(priv, status, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, - "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); + "%s: IRQ=0x%x\n", __func__, status); /* Check Rx priority queue interrupts */ for (index = 0; index < priv->hw_params->rx_queues; index++) { - if (!(priv->irq1_stat & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) + if (!(status & BIT(UMAC_IRQ1_RX_INTR_SHIFT + index))) continue; rx_ring = &priv->rx_rings[index]; @@ -2560,7 +2563,7 @@ static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) /* Check Tx priority queue interrupts */ for (index = 0; index < priv->hw_params->tx_queues; index++) { - if (!(priv->irq1_stat & BIT(index))) + if (!(status & BIT(index))) continue; tx_ring = &priv->tx_rings[index]; @@ -2580,19 +2583,20 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) struct bcmgenet_priv *priv = dev_id; struct bcmgenet_rx_ring *rx_ring; struct bcmgenet_tx_ring *tx_ring; + unsigned int status; + unsigned long flags; - /* Save irq status for bottom-half processing. */ - priv->irq0_stat = - bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & + /* Read irq status */ + status = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT) & ~bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ - bcmgenet_intrl2_0_writel(priv, priv->irq0_stat, INTRL2_CPU_CLEAR); + bcmgenet_intrl2_0_writel(priv, status, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, - "IRQ=0x%x\n", priv->irq0_stat); + "IRQ=0x%x\n", status); - if (priv->irq0_stat & UMAC_IRQ_RXDMA_DONE) { + if (status & UMAC_IRQ_RXDMA_DONE) { rx_ring = &priv->rx_rings[DESC_INDEX]; if (likely(napi_schedule_prep(&rx_ring->napi))) { @@ -2601,7 +2605,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } } - if (priv->irq0_stat & UMAC_IRQ_TXDMA_DONE) { + if (status & UMAC_IRQ_TXDMA_DONE) { tx_ring = &priv->tx_rings[DESC_INDEX]; if (likely(napi_schedule_prep(&tx_ring->napi))) { @@ -2610,20 +2614,21 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } } - if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | - UMAC_IRQ_PHY_DET_F | - UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_HFB_SM | - UMAC_IRQ_HFB_MM | - UMAC_IRQ_MPD_R)) { - /* all other interested interrupts handled in bottom half */ - schedule_work(&priv->bcmgenet_irq_work); + if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && + status & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { + wake_up(&priv->wq); } - if ((priv->hw_params->flags & GENET_HAS_MDIO_INTR) && - priv->irq0_stat & (UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR)) { - priv->irq0_stat &= ~(UMAC_IRQ_MDIO_DONE | UMAC_IRQ_MDIO_ERROR); - wake_up(&priv->wq); + /* all other interested interrupts handled in bottom half */ + status &= (UMAC_IRQ_LINK_EVENT | + UMAC_IRQ_MPD_R); + if (status) { + /* Save irq status for bottom-half processing. */ + spin_lock_irqsave(&priv->lock, flags); + priv->irq0_stat |= status; + spin_unlock_irqrestore(&priv->lock, flags); + + schedule_work(&priv->bcmgenet_irq_work); } return IRQ_HANDLED; @@ -3327,6 +3332,8 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } + spin_lock_init(&priv->lock); + SET_NETDEV_DEV(dev, &pdev->dev); dev_set_drvdata(&pdev->dev, dev); ether_addr_copy(dev->dev_addr, macaddr); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index d5fb0d772dcd..db7f289d65ae 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -623,11 +623,13 @@ struct bcmgenet_priv { struct work_struct bcmgenet_irq_work; int irq0; int irq1; - unsigned int irq0_stat; - unsigned int irq1_stat; int wol_irq; bool wol_irq_disabled; + /* shared status */ + spinlock_t lock; + unsigned int irq0_stat; + /* HW descriptors/checksum variables */ bool desc_64b_en; bool desc_rxchk_en; From 6be371b053dc86f11465cc1abce2e99bda0a0574 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:48 -0800 Subject: [PATCH 160/384] net: bcmgenet: Power up the internal PHY before probing the MII When using the internal PHY it must be powered up when the MII is probed or the PHY will not be detected. Since the PHY is powered up at reset this has not been a problem. However, when the kernel is restarted with kexec the PHY will likely be powered down when the kernel starts so it will not be detected and the Ethernet link will not be established. This commit explicitly powers up the internal PHY when the GENET driver is probed to correct this behavior. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 527cecaf12c1..f93098840775 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3289,6 +3289,7 @@ static int bcmgenet_probe(struct platform_device *pdev) const void *macaddr; struct resource *r; int err = -EIO; + const char *phy_mode_str; /* Up to GENET_MAX_MQ_CNT + 1 TX queues and RX queues */ dev = alloc_etherdev_mqs(sizeof(*priv), GENET_MAX_MQ_CNT + 1, @@ -3396,6 +3397,13 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->clk_eee = NULL; } + /* If this is an internal GPHY, power it on now, before UniMAC is + * brought out of reset as absolutely no UniMAC activity is allowed + */ + if (dn && !of_property_read_string(dn, "phy-mode", &phy_mode_str) && + !strcasecmp(phy_mode_str, "internal")) + bcmgenet_power_up(priv, GENET_POWER_PASSIVE); + err = reset_umac(priv); if (err) goto err_clk_disable; From 89316fa34ab8afac8d693f41a5bc268673f1da15 Mon Sep 17 00:00:00 2001 From: Edwin Chan Date: Thu, 9 Mar 2017 16:58:49 -0800 Subject: [PATCH 161/384] net: bcmgenet: add begin/complete ethtool ops Make sure clock is enabled for ethtool ops. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Edwin Chan Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index f93098840775..ec1f3014e410 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,6 +450,22 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } +static int bcmgenet_begin(struct net_device *dev) +{ + struct bcmgenet_priv *priv = netdev_priv(dev); + + /* Turn on the clock */ + return clk_prepare_enable(priv->clk); +} + +static void bcmgenet_complete(struct net_device *dev) +{ + struct bcmgenet_priv *priv = netdev_priv(dev); + + /* Turn off the clock */ + clk_disable_unprepare(priv->clk); +} + static int bcmgenet_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { @@ -1022,6 +1038,8 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) /* standard ethtool support functions. */ static const struct ethtool_ops bcmgenet_ethtool_ops = { + .begin = bcmgenet_begin, + .complete = bcmgenet_complete, .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, From 6d22fe14005ce66d1a120495ac16499b944feb95 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 9 Mar 2017 16:58:50 -0800 Subject: [PATCH 162/384] net: bcmgenet: decouple flow control from bcmgenet_tx_reclaim The bcmgenet_tx_reclaim() function is used to reclaim transmit resources in different places within the driver. Most of them should not affect the state of the transmit flow control. This commit relocates the logic for waking tx queues based on freed resources to the napi polling function where it is more appropriate. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index ec1f3014e410..69015fa50f20 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1234,7 +1234,6 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; struct enet_cb *tx_cb_ptr; - struct netdev_queue *txq; unsigned int pkts_compl = 0; unsigned int bytes_compl = 0; unsigned int c_index; @@ -1286,13 +1285,8 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, dev->stats.tx_packets += pkts_compl; dev->stats.tx_bytes += bytes_compl; - txq = netdev_get_tx_queue(dev, ring->queue); - netdev_tx_completed_queue(txq, pkts_compl, bytes_compl); - - if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { - if (netif_tx_queue_stopped(txq)) - netif_tx_wake_queue(txq); - } + netdev_tx_completed_queue(netdev_get_tx_queue(dev, ring->queue), + pkts_compl, bytes_compl); return pkts_compl; } @@ -1315,8 +1309,16 @@ static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) struct bcmgenet_tx_ring *ring = container_of(napi, struct bcmgenet_tx_ring, napi); unsigned int work_done = 0; + struct netdev_queue *txq; + unsigned long flags; - work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring); + spin_lock_irqsave(&ring->lock, flags); + work_done = __bcmgenet_tx_reclaim(ring->priv->dev, ring); + if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { + txq = netdev_get_tx_queue(ring->priv->dev, ring->queue); + netif_tx_wake_queue(txq); + } + spin_unlock_irqrestore(&ring->lock, flags); if (work_done == 0) { napi_complete(napi); From 46f401c4297a2232a037ad8801b6c83c90414cf7 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 9 Mar 2017 20:33:51 -0600 Subject: [PATCH 163/384] powerpc/pmac: Fix crash in dma-mapping.h with NULL dma_ops Commit 5657933dbb6e ("treewide: Move dma_ops from struct dev_archdata into struct device") introduced a crash for macio devices, an example backtrace being: kernel BUG at ./include/linux/dma-mapping.h:465! Oops: Exception in kernel mode, sig: 5 [#1] ... NIP [c031ddb0] dmam_alloc_coherent+0x74/0x140 LR [c031de70] dmam_alloc_coherent+0x134/0x140 Call Trace: dmam_alloc_coherent+0x134/0x140 (unreliable) pata_macio_port_start+0x3c/0x8c ata_host_start.part.5+0xfc/0x208 ata_host_activate+0x128/0x154 pata_macio_common_init+0x2f0/0x538 pata_macio_attach+0xd8/0x180 macio_device_probe+0x5c/0xec driver_probe_device+0x21c/0x314 __driver_attach+0xcc/0xd0 bus_for_each_dev+0x68/0xb4 bus_add_driver+0x1dc/0x244 driver_register+0x88/0x130 pata_macio_init+0x5c/0x88 do_one_initcall+0x40/0x170 kernel_init_freeable+0x134/0x1d0 kernel_init+0x18/0x110 ret_from_kernel_thread+0x5c/0x64 This was caused by the device having NULL dma_ops, triggering the BUG_ON(). Previously the device inherited its dma_ops via the assignment to dev->ofdev.dev.archdata. However after commit 5657933dbb6e the dma_ops are moved into dev->ofdev.dev, and so they need to be explicitly copied. Fixes: 5657933dbb6e ("treewide: Move dma_ops from struct dev_archdata into struct device") Signed-off-by: Larry Finger Suggested-by: Benjamin Herrenschmidt [mpe: Rewrite change log, add backtrace] Signed-off-by: Michael Ellerman --- drivers/macintosh/macio_asic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index 3f041b187033..f757cef293f8 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -392,6 +392,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, * To get all the fields, copy all archdata */ dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata; + dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops; #endif /* CONFIG_PCI */ #ifdef DEBUG From 1363875bdb6317a2d0798284d7aaf320f0782f6d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 28 Feb 2017 12:00:46 +1000 Subject: [PATCH 164/384] powerpc/64s: fix handling of non-synchronous machine checks A synchronous machine check is an exception raised by the attempt to execute the current instruction. If the error can't be corrected, it can make sense to SIGBUS the currently running process. In other cases, the error condition is not related to the current instruction, so killing the current process is not the right thing to do. Today, all machine checks are MCE_SEV_ERROR_SYNC, so this has no practical change. It will be used to handle POWER9 asynchronous machine checks. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 86d9fde93c17..e0f856bfbfe8 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -395,7 +395,6 @@ static int opal_recover_mce(struct pt_regs *regs, struct machine_check_event *evt) { int recovered = 0; - uint64_t ea = get_mce_fault_addr(evt); if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ @@ -404,26 +403,18 @@ static int opal_recover_mce(struct pt_regs *regs, } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { /* Platform corrected itself */ recovered = 1; - } else if (ea && !is_kernel_addr(ea)) { + } else if (evt->severity == MCE_SEV_FATAL) { + /* Fatal machine check */ + pr_err("Machine check interrupt is fatal\n"); + recovered = 0; + } else if ((evt->severity == MCE_SEV_ERROR_SYNC) && + (user_mode(regs) && !is_global_init(current))) { /* - * Faulting address is not in kernel text. We should be fine. - * We need to find which process uses this address. * For now, kill the task if we have received exception when * in userspace. * * TODO: Queue up this address for hwpoisioning later. */ - if (user_mode(regs) && !is_global_init(current)) { - _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); - recovered = 1; - } else - recovered = 0; - } else if (user_mode(regs) && !is_global_init(current) && - evt->severity == MCE_SEV_ERROR_SYNC) { - /* - * If we have received a synchronous error when in userspace - * kill the task. - */ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); recovered = 1; } From c1bbf387d6191e6e18f3adc4db45b922822c2ba4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 28 Feb 2017 12:00:47 +1000 Subject: [PATCH 165/384] powerpc/64s: allow machine check handler to set severity and initiator Currently severity and initiator are always set to MCE_SEV_ERROR_SYNC and MCE_INITIATOR_CPU in the core mce code. Allow them to be set by the machine specific mce handlers. No functional change for existing handlers. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mce.h | 3 ++- arch/powerpc/kernel/mce.c | 5 +++-- arch/powerpc/kernel/mce_power.c | 6 ++++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index f97d8cb6bdf6..b2a5865ccd87 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -177,7 +177,8 @@ struct mce_error_info { enum MCE_EratErrorType erat_error_type:8; enum MCE_TlbErrorType tlb_error_type:8; } u; - uint8_t reserved[2]; + enum MCE_Severity severity:8; + enum MCE_Initiator initiator:8; }; #define MAX_MC_EVT 100 diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index c6923ff45131..949507277436 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -90,13 +90,14 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->gpr3 = regs->gpr[3]; mce->in_use = 1; - mce->initiator = MCE_INITIATOR_CPU; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; - mce->severity = MCE_SEV_ERROR_SYNC; + + mce->initiator = mce_err->initiator; + mce->severity = mce_err->severity; /* * Populate the mce error_type and type-specific error_type. diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 7353991c4ece..c37fc5fdd433 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -281,6 +281,9 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs) long handled = 1; struct mce_error_info mce_error_info = { 0 }; + mce_error_info.severity = MCE_SEV_ERROR_SYNC; + mce_error_info.initiator = MCE_INITIATOR_CPU; + srr1 = regs->msr; nip = regs->nip; @@ -352,6 +355,9 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) long handled = 1; struct mce_error_info mce_error_info = { 0 }; + mce_error_info.severity = MCE_SEV_ERROR_SYNC; + mce_error_info.initiator = MCE_INITIATOR_CPU; + srr1 = regs->msr; nip = regs->nip; From 7b9f71f974a12740e79e918cfd58c2fce0b5b580 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 28 Feb 2017 12:00:48 +1000 Subject: [PATCH 166/384] powerpc/64s: POWER9 machine check handler Add POWER9 machine check handler. There are several new types of errors added, so logging messages for those are also added. This doesn't attempt to reuse any of the P7/8 defines or functions, because that becomes too complex. The better option in future is to use a table driven approach. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/bitops.h | 4 + arch/powerpc/include/asm/mce.h | 105 ++++++++++++++ arch/powerpc/kernel/cputable.c | 3 + arch/powerpc/kernel/mce.c | 83 +++++++++++ arch/powerpc/kernel/mce_power.c | 231 ++++++++++++++++++++++++++++++ 5 files changed, 426 insertions(+) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 73eb794d6163..bc5fdfd22788 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -51,6 +51,10 @@ #define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit)) #define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs)) +/* Put a PPC bit into a "normal" bit position */ +#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \ + ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit)) + #include /* Macro for generating the ***_bits() functions */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index b2a5865ccd87..ed62efe01e49 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -66,6 +66,55 @@ #define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \ P8_DSISR_MC_ERAT_MULTIHIT_SEC) + +/* + * Machine Check bits on power9 + */ +#define P9_SRR1_MC_LOADSTORE(srr1) (((srr1) >> PPC_BITLSHIFT(42)) & 1) + +#define P9_SRR1_MC_IFETCH(srr1) ( \ + PPC_BITEXTRACT(srr1, 45, 0) | \ + PPC_BITEXTRACT(srr1, 44, 1) | \ + PPC_BITEXTRACT(srr1, 43, 2) | \ + PPC_BITEXTRACT(srr1, 36, 3) ) + +/* 0 is reserved */ +#define P9_SRR1_MC_IFETCH_UE 1 +#define P9_SRR1_MC_IFETCH_SLB_PARITY 2 +#define P9_SRR1_MC_IFETCH_SLB_MULTIHIT 3 +#define P9_SRR1_MC_IFETCH_ERAT_MULTIHIT 4 +#define P9_SRR1_MC_IFETCH_TLB_MULTIHIT 5 +#define P9_SRR1_MC_IFETCH_UE_TLB_RELOAD 6 +/* 7 is reserved */ +#define P9_SRR1_MC_IFETCH_LINK_TIMEOUT 8 +#define P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT 9 +/* 10 ? */ +#define P9_SRR1_MC_IFETCH_RA 11 +#define P9_SRR1_MC_IFETCH_RA_TABLEWALK 12 +#define P9_SRR1_MC_IFETCH_RA_ASYNC_STORE 13 +#define P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT 14 +#define P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN 15 + +/* DSISR bits for machine check (On Power9) */ +#define P9_DSISR_MC_UE (PPC_BIT(48)) +#define P9_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) +#define P9_DSISR_MC_LINK_LOAD_TIMEOUT (PPC_BIT(50)) +#define P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT (PPC_BIT(51)) +#define P9_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) +#define P9_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) +#define P9_DSISR_MC_USER_TLBIE (PPC_BIT(54)) +#define P9_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) +#define P9_DSISR_MC_SLB_MULTIHIT_MFSLB (PPC_BIT(56)) +#define P9_DSISR_MC_RA_LOAD (PPC_BIT(57)) +#define P9_DSISR_MC_RA_TABLEWALK (PPC_BIT(58)) +#define P9_DSISR_MC_RA_TABLEWALK_FOREIGN (PPC_BIT(59)) +#define P9_DSISR_MC_RA_FOREIGN (PPC_BIT(60)) + +/* SLB error bits */ +#define P9_DSISR_MC_SLB_ERRORS (P9_DSISR_MC_ERAT_MULTIHIT | \ + P9_DSISR_MC_SLB_PARITY_MFSLB | \ + P9_DSISR_MC_SLB_MULTIHIT_MFSLB) + enum MCE_Version { MCE_V1 = 1, }; @@ -93,6 +142,9 @@ enum MCE_ErrorType { MCE_ERROR_TYPE_SLB = 2, MCE_ERROR_TYPE_ERAT = 3, MCE_ERROR_TYPE_TLB = 4, + MCE_ERROR_TYPE_USER = 5, + MCE_ERROR_TYPE_RA = 6, + MCE_ERROR_TYPE_LINK = 7, }; enum MCE_UeErrorType { @@ -121,6 +173,32 @@ enum MCE_TlbErrorType { MCE_TLB_ERROR_MULTIHIT = 2, }; +enum MCE_UserErrorType { + MCE_USER_ERROR_INDETERMINATE = 0, + MCE_USER_ERROR_TLBIE = 1, +}; + +enum MCE_RaErrorType { + MCE_RA_ERROR_INDETERMINATE = 0, + MCE_RA_ERROR_IFETCH = 1, + MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2, + MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3, + MCE_RA_ERROR_LOAD = 4, + MCE_RA_ERROR_STORE = 5, + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6, + MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7, + MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8, +}; + +enum MCE_LinkErrorType { + MCE_LINK_ERROR_INDETERMINATE = 0, + MCE_LINK_ERROR_IFETCH_TIMEOUT = 1, + MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2, + MCE_LINK_ERROR_LOAD_TIMEOUT = 3, + MCE_LINK_ERROR_STORE_TIMEOUT = 4, + MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5, +}; + struct machine_check_event { enum MCE_Version version:8; /* 0x00 */ uint8_t in_use; /* 0x01 */ @@ -166,6 +244,30 @@ struct machine_check_event { uint64_t effective_address; uint8_t reserved_2[16]; } tlb_error; + + struct { + enum MCE_UserErrorType user_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } user_error; + + struct { + enum MCE_RaErrorType ra_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } ra_error; + + struct { + enum MCE_LinkErrorType link_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } link_error; } u; }; @@ -176,6 +278,9 @@ struct mce_error_info { enum MCE_SlbErrorType slb_error_type:8; enum MCE_EratErrorType erat_error_type:8; enum MCE_TlbErrorType tlb_error_type:8; + enum MCE_UserErrorType user_error_type:8; + enum MCE_RaErrorType ra_error_type:8; + enum MCE_LinkErrorType link_error_type:8; } u; enum MCE_Severity severity:8; enum MCE_Initiator initiator:8; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index bb7a1890aeb7..e79b9daa873c 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -77,6 +77,7 @@ extern void __flush_tlb_power8(unsigned int action); extern void __flush_tlb_power9(unsigned int action); extern long __machine_check_early_realmode_p7(struct pt_regs *regs); extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); @@ -540,6 +541,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, { /* Power9 */ @@ -559,6 +561,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_power9, .cpu_restore = __restore_cpu_power9, .flush_tlb = __flush_tlb_power9, + .machine_check_early = __machine_check_early_realmode_p9, .platform = "power9", }, { /* Cell Broadband Engine */ diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 949507277436..a1475e6aef3a 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -58,6 +58,15 @@ static void mce_set_error_info(struct machine_check_event *mce, case MCE_ERROR_TYPE_TLB: mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type; break; + case MCE_ERROR_TYPE_USER: + mce->u.user_error.user_error_type = mce_err->u.user_error_type; + break; + case MCE_ERROR_TYPE_RA: + mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type; + break; + case MCE_ERROR_TYPE_LINK: + mce->u.link_error.link_error_type = mce_err->u.link_error_type; + break; case MCE_ERROR_TYPE_UNKNOWN: default: break; @@ -116,6 +125,15 @@ void save_mce_event(struct pt_regs *regs, long handled, } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) { mce->u.erat_error.effective_address_provided = true; mce->u.erat_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_USER) { + mce->u.user_error.effective_address_provided = true; + mce->u.user_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_RA) { + mce->u.ra_error.effective_address_provided = true; + mce->u.ra_error.effective_address = addr; + } else if (mce->error_type == MCE_ERROR_TYPE_LINK) { + mce->u.link_error.effective_address_provided = true; + mce->u.link_error.effective_address = addr; } else if (mce->error_type == MCE_ERROR_TYPE_UE) { mce->u.ue_error.effective_address_provided = true; mce->u.ue_error.effective_address = addr; @@ -240,6 +258,29 @@ void machine_check_print_event_info(struct machine_check_event *evt) "Parity", "Multihit", }; + static const char *mc_user_types[] = { + "Indeterminate", + "tlbie(l) invalid", + }; + static const char *mc_ra_types[] = { + "Indeterminate", + "Instruction fetch (bad)", + "Page table walk ifetch (bad)", + "Page table walk ifetch (foreign)", + "Load (bad)", + "Store (bad)", + "Page table walk Load/Store (bad)", + "Page table walk Load/Store (foreign)", + "Load/Store (foreign)", + }; + static const char *mc_link_types[] = { + "Indeterminate", + "Instruction fetch (timeout)", + "Page table walk ifetch (timeout)", + "Load (timeout)", + "Store (timeout)", + "Page table walk Load/Store (timeout)", + }; /* Print things out */ if (evt->version != MCE_V1) { @@ -316,6 +357,36 @@ void machine_check_print_event_info(struct machine_check_event *evt) printk("%s Effective address: %016llx\n", level, evt->u.tlb_error.effective_address); break; + case MCE_ERROR_TYPE_USER: + subtype = evt->u.user_error.user_error_type < + ARRAY_SIZE(mc_user_types) ? + mc_user_types[evt->u.user_error.user_error_type] + : "Unknown"; + printk("%s Error type: User [%s]\n", level, subtype); + if (evt->u.user_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.user_error.effective_address); + break; + case MCE_ERROR_TYPE_RA: + subtype = evt->u.ra_error.ra_error_type < + ARRAY_SIZE(mc_ra_types) ? + mc_ra_types[evt->u.ra_error.ra_error_type] + : "Unknown"; + printk("%s Error type: Real address [%s]\n", level, subtype); + if (evt->u.ra_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.ra_error.effective_address); + break; + case MCE_ERROR_TYPE_LINK: + subtype = evt->u.link_error.link_error_type < + ARRAY_SIZE(mc_link_types) ? + mc_link_types[evt->u.link_error.link_error_type] + : "Unknown"; + printk("%s Error type: Link [%s]\n", level, subtype); + if (evt->u.link_error.effective_address_provided) + printk("%s Effective address: %016llx\n", + level, evt->u.link_error.effective_address); + break; default: case MCE_ERROR_TYPE_UNKNOWN: printk("%s Error type: Unknown\n", level); @@ -342,6 +413,18 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt) if (evt->u.tlb_error.effective_address_provided) return evt->u.tlb_error.effective_address; break; + case MCE_ERROR_TYPE_USER: + if (evt->u.user_error.effective_address_provided) + return evt->u.user_error.effective_address; + break; + case MCE_ERROR_TYPE_RA: + if (evt->u.ra_error.effective_address_provided) + return evt->u.ra_error.effective_address; + break; + case MCE_ERROR_TYPE_LINK: + if (evt->u.link_error.effective_address_provided) + return evt->u.link_error.effective_address; + break; default: case MCE_ERROR_TYPE_UNKNOWN: break; diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index c37fc5fdd433..763d6f58caa8 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -116,6 +116,51 @@ static void flush_and_reload_slb(void) } #endif +static void flush_erat(void) +{ + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); +} + +#define MCE_FLUSH_SLB 1 +#define MCE_FLUSH_TLB 2 +#define MCE_FLUSH_ERAT 3 + +static int mce_flush(int what) +{ +#ifdef CONFIG_PPC_STD_MMU_64 + if (what == MCE_FLUSH_SLB) { + flush_and_reload_slb(); + return 1; + } +#endif + if (what == MCE_FLUSH_ERAT) { + flush_erat(); + return 1; + } + if (what == MCE_FLUSH_TLB) { + if (cur_cpu_spec && cur_cpu_spec->flush_tlb) { + cur_cpu_spec->flush_tlb(TLB_INVAL_SCOPE_GLOBAL); + return 1; + } + } + + return 0; +} + +static int mce_handle_flush_derrors(uint64_t dsisr, uint64_t slb, uint64_t tlb, uint64_t erat) +{ + if ((dsisr & slb) && mce_flush(MCE_FLUSH_SLB)) + dsisr &= ~slb; + if ((dsisr & erat) && mce_flush(MCE_FLUSH_ERAT)) + dsisr &= ~erat; + if ((dsisr & tlb) && mce_flush(MCE_FLUSH_TLB)) + dsisr &= ~tlb; + /* Any other errors we don't understand? */ + if (dsisr) + return 0; + return 1; +} + static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits) { long handled = 1; @@ -378,3 +423,189 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs) save_mce_event(regs, handled, &mce_error_info, nip, addr); return handled; } + +static int mce_handle_derror_p9(struct pt_regs *regs) +{ + uint64_t dsisr = regs->dsisr; + + return mce_handle_flush_derrors(dsisr, + P9_DSISR_MC_SLB_PARITY_MFSLB | + P9_DSISR_MC_SLB_MULTIHIT_MFSLB, + + P9_DSISR_MC_TLB_MULTIHIT_MFTLB, + + P9_DSISR_MC_ERAT_MULTIHIT); +} + +static int mce_handle_ierror_p9(struct pt_regs *regs) +{ + uint64_t srr1 = regs->msr; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_SLB_PARITY: + case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: + return mce_flush(MCE_FLUSH_SLB); + case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: + return mce_flush(MCE_FLUSH_TLB); + case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: + return mce_flush(MCE_FLUSH_ERAT); + default: + return 0; + } +} + +static void mce_get_derror_p9(struct pt_regs *regs, + struct mce_error_info *mce_err, uint64_t *addr) +{ + uint64_t dsisr = regs->dsisr; + + mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->initiator = MCE_INITIATOR_CPU; + + if (dsisr & P9_DSISR_MC_USER_TLBIE) + *addr = regs->nip; + else + *addr = regs->dar; + + if (dsisr & P9_DSISR_MC_UE) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_UE_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_LINK_LOAD_TIMEOUT) { + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_LOAD_TIMEOUT; + } else if (dsisr & P9_DSISR_MC_LINK_TABLEWALK_TIMEOUT) { + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT; + } else if (dsisr & P9_DSISR_MC_ERAT_MULTIHIT) { + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_TLB_MULTIHIT_MFTLB) { + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_USER_TLBIE) { + mce_err->error_type = MCE_ERROR_TYPE_USER; + mce_err->u.user_error_type = MCE_USER_ERROR_TLBIE; + } else if (dsisr & P9_DSISR_MC_SLB_PARITY_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + } else if (dsisr & P9_DSISR_MC_SLB_MULTIHIT_MFSLB) { + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + } else if (dsisr & P9_DSISR_MC_RA_LOAD) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD; + } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + } else if (dsisr & P9_DSISR_MC_RA_TABLEWALK_FOREIGN) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN; + } else if (dsisr & P9_DSISR_MC_RA_FOREIGN) { + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_LOAD_STORE_FOREIGN; + } +} + +static void mce_get_ierror_p9(struct pt_regs *regs, + struct mce_error_info *mce_err, uint64_t *addr) +{ + uint64_t srr1 = regs->msr; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: + case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: + mce_err->severity = MCE_SEV_FATAL; + break; + default: + mce_err->severity = MCE_SEV_ERROR_SYNC; + break; + } + + mce_err->initiator = MCE_INITIATOR_CPU; + + *addr = regs->nip; + + switch (P9_SRR1_MC_IFETCH(srr1)) { + case P9_SRR1_MC_IFETCH_UE: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH; + break; + case P9_SRR1_MC_IFETCH_SLB_PARITY: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY; + break; + case P9_SRR1_MC_IFETCH_SLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_SLB; + mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_ERAT_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_ERAT; + mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_TLB_MULTIHIT: + mce_err->error_type = MCE_ERROR_TYPE_TLB; + mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT; + break; + case P9_SRR1_MC_IFETCH_UE_TLB_RELOAD: + mce_err->error_type = MCE_ERROR_TYPE_UE; + mce_err->u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case P9_SRR1_MC_IFETCH_LINK_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_IFETCH_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_LINK_TABLEWALK_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_RA: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_IFETCH; + break; + case P9_SRR1_MC_IFETCH_RA_TABLEWALK: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + case P9_SRR1_MC_IFETCH_RA_ASYNC_STORE: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_STORE; + break; + case P9_SRR1_MC_IFETCH_LINK_ASYNC_STORE_TIMEOUT: + mce_err->error_type = MCE_ERROR_TYPE_LINK; + mce_err->u.link_error_type = MCE_LINK_ERROR_STORE_TIMEOUT; + break; + case P9_SRR1_MC_IFETCH_RA_TABLEWALK_FOREIGN: + mce_err->error_type = MCE_ERROR_TYPE_RA; + mce_err->u.ra_error_type = MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN; + break; + default: + break; + } +} + +long __machine_check_early_realmode_p9(struct pt_regs *regs) +{ + uint64_t nip, addr; + long handled; + struct mce_error_info mce_error_info = { 0 }; + + nip = regs->nip; + + if (P9_SRR1_MC_LOADSTORE(regs->msr)) { + handled = mce_handle_derror_p9(regs); + mce_get_derror_p9(regs, &mce_error_info, &addr); + } else { + handled = mce_handle_ierror_p9(regs); + mce_get_ierror_p9(regs, &mce_error_info, &addr); + } + + /* Handle UE error. */ + if (mce_error_info.error_type == MCE_ERROR_TYPE_UE) + handled = mce_handle_ue_error(regs); + + save_mce_event(regs, handled, &mce_error_info, nip, addr); + return handled; +} From 296739839fa2851e6badc77dcfc45050094cb102 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 9 Mar 2017 20:53:31 +0100 Subject: [PATCH 167/384] net: phy: marvell: Fix double free of hwmon device The hwmon temperature sensor devices is registered using a devm_hwmon API call. The marvell_release() would then manually free the device, not using a devm_hmon API, resulting in the device being removed twice, leading to a crash in kernfs_find_ns() during the second removal. Remove the manual removal, which makes marvell_release() empty, so remove it as well. Signed-off-by: Andrew Lunn Fixes: 0b04680fdae4 ("phy: marvell: Add support for temperature sensor") Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index f9d0fa315a47..272b051a0199 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1883,17 +1883,6 @@ static int m88e1510_probe(struct phy_device *phydev) return m88e1510_hwmon_probe(phydev); } -static void marvell_remove(struct phy_device *phydev) -{ -#ifdef CONFIG_HWMON - - struct marvell_priv *priv = phydev->priv; - - if (priv && priv->hwmon_dev) - hwmon_device_unregister(priv->hwmon_dev); -#endif -} - static struct phy_driver marvell_drivers[] = { { .phy_id = MARVELL_PHY_ID_88E1101, @@ -1974,7 +1963,6 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .probe = &m88e1121_probe, - .remove = &marvell_remove, .config_init = &m88e1121_config_init, .config_aneg = &m88e1121_config_aneg, .read_status = &marvell_read_status, @@ -2087,7 +2075,6 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE, .flags = PHY_HAS_INTERRUPT, .probe = &m88e1510_probe, - .remove = &marvell_remove, .config_init = &m88e1510_config_init, .config_aneg = &m88e1510_config_aneg, .read_status = &marvell_read_status, @@ -2109,7 +2096,6 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .probe = m88e1510_probe, - .remove = &marvell_remove, .config_init = &marvell_config_init, .config_aneg = &m88e1510_config_aneg, .read_status = &marvell_read_status, @@ -2127,7 +2113,6 @@ static struct phy_driver marvell_drivers[] = { .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1545", .probe = m88e1510_probe, - .remove = &marvell_remove, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .config_init = &marvell_config_init, From a1016e94cce9fb6ea56d7602263783e2d95d6e92 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Mar 2017 17:14:32 +0000 Subject: [PATCH 168/384] ARM: wire up statx syscall Wire up the new statx syscall for ARM. Signed-off-by: Russell King --- arch/arm/tools/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 3c2cb5d5adfa..0bb0e9c6376c 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -411,3 +411,4 @@ 394 common pkey_mprotect sys_pkey_mprotect 395 common pkey_alloc sys_pkey_alloc 396 common pkey_free sys_pkey_free +397 common statx sys_statx From 9a8b0a230aca55ee142fd76f4765f1da1799da93 Mon Sep 17 00:00:00 2001 From: Mihail Atanassov Date: Wed, 15 Feb 2017 14:00:15 +0000 Subject: [PATCH 169/384] drm: mali-dp: Remove mclk rate management The rate of mclk depends on the use-case. If no downscaling is required, then mclk == pxlclk is a valid option; with downscaling however, the rate at which mclk runs determines how much a plane can be downscaled before composition. This is a system integration + power management issue that is more suited to firmware rather than this driver. Signed-off-by: Mihail Atanassov Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_crtc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 08e6a71f5d05..294b53697334 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -63,8 +63,7 @@ static void malidp_crtc_enable(struct drm_crtc *crtc) clk_prepare_enable(hwdev->pxlclk); - /* mclk needs to be set to the same or higher rate than pxlclk */ - clk_set_rate(hwdev->mclk, crtc->state->adjusted_mode.crtc_clock * 1000); + /* We rely on firmware to set mclk to a sensible level. */ clk_set_rate(hwdev->pxlclk, crtc->state->adjusted_mode.crtc_clock * 1000); hwdev->modeset(hwdev, &vm); From d1479f6108006555fe33d7cfe8db4f95ad614b9a Mon Sep 17 00:00:00 2001 From: Mihail Atanassov Date: Thu, 9 Feb 2017 11:32:00 +0000 Subject: [PATCH 170/384] drm: mali-dp: Fix smart layer not going to composition Use rectangle 1 as a generic plane. Existing code already sets the smart layer bounding box size + offset. The rectangles' offsets are relative to the bounding box, so there is no need to set R1's offset (reset value is 0), just its size which is the same as the bounding box. Signed-off-by: Mihail Atanassov Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_hw.c | 2 +- drivers/gpu/drm/arm/malidp_planes.c | 18 ++++++++++++++++-- drivers/gpu/drm/arm/malidp_regs.h | 1 + 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c index 488aedf5b58d..9f5513006eee 100644 --- a/drivers/gpu/drm/arm/malidp_hw.c +++ b/drivers/gpu/drm/arm/malidp_hw.c @@ -83,7 +83,7 @@ static const struct malidp_layer malidp550_layers[] = { { DE_VIDEO1, MALIDP550_DE_LV1_BASE, MALIDP550_DE_LV1_PTR_BASE, MALIDP_DE_LV_STRIDE0 }, { DE_GRAPHICS1, MALIDP550_DE_LG_BASE, MALIDP550_DE_LG_PTR_BASE, MALIDP_DE_LG_STRIDE }, { DE_VIDEO2, MALIDP550_DE_LV2_BASE, MALIDP550_DE_LV2_PTR_BASE, MALIDP_DE_LV_STRIDE0 }, - { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, 0 }, + { DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE }, }; #define MALIDP_DE_DEFAULT_PREFETCH_START 5 diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 414aada10fe5..d5aec082294c 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -37,6 +37,8 @@ #define LAYER_V_VAL(x) (((x) & 0x1fff) << 16) #define MALIDP_LAYER_COMP_SIZE 0x010 #define MALIDP_LAYER_OFFSET 0x014 +#define MALIDP550_LS_ENABLE 0x01c +#define MALIDP550_LS_R1_IN_SIZE 0x020 /* * This 4-entry look-up-table is used to determine the full 8-bit alpha value @@ -242,6 +244,11 @@ static void malidp_de_plane_update(struct drm_plane *plane, LAYER_V_VAL(plane->state->crtc_y), mp->layer->base + MALIDP_LAYER_OFFSET); + if (mp->layer->id == DE_SMART) + malidp_hw_write(mp->hwdev, + LAYER_H_VAL(src_w) | LAYER_V_VAL(src_h), + mp->layer->base + MALIDP550_LS_R1_IN_SIZE); + /* first clear the rotation bits */ val = malidp_hw_read(mp->hwdev, mp->layer->base + MALIDP_LAYER_CONTROL); val &= ~LAYER_ROT_MASK; @@ -330,9 +337,16 @@ int malidp_de_planes_init(struct drm_device *drm) plane->hwdev = malidp->dev; plane->layer = &map->layers[i]; - /* Skip the features which the SMART layer doesn't have */ - if (id == DE_SMART) + if (id == DE_SMART) { + /* + * Enable the first rectangle in the SMART layer to be + * able to use it as a drm plane. + */ + malidp_hw_write(malidp->dev, 1, + plane->layer->base + MALIDP550_LS_ENABLE); + /* Skip the features which the SMART layer doesn't have. */ continue; + } drm_plane_create_rotation_property(&plane->base, DRM_ROTATE_0, flags); malidp_hw_write(malidp->dev, MALIDP_ALPHA_LUT, diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h index aff6d4a84e99..b816067a65c5 100644 --- a/drivers/gpu/drm/arm/malidp_regs.h +++ b/drivers/gpu/drm/arm/malidp_regs.h @@ -84,6 +84,7 @@ /* Stride register offsets relative to Lx_BASE */ #define MALIDP_DE_LG_STRIDE 0x18 #define MALIDP_DE_LV_STRIDE0 0x18 +#define MALIDP550_DE_LS_R1_STRIDE 0x28 /* macros to set values into registers */ #define MALIDP_DE_H_FRONTPORCH(x) (((x) & 0xfff) << 0) From 702f2ac87a9a8da23bf8506466bc70175fc970b2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 10 Mar 2017 07:48:49 +0000 Subject: [PATCH 171/384] rxrpc: Wake up the transmitter if Rx window size increases on the peer The RxRPC ACK packet may contain an extension that includes the peer's current Rx window size for this call. We adjust the local Tx window size to match. However, the transmitter can stall if the receive window is reduced to 0 by the peer and then reopened. This is because the normal way that the transmitter is re-energised is by dropping something out of our Tx queue and thus making space. When a single gap is made, the transmitter is woken up. However, because there's nothing in the Tx queue at this point, this doesn't happen. To fix this, perform a wake_up() any time we see the peer's Rx window size increasing. The observable symptom is that calls start failing on ETIMEDOUT and the following: kAFS: SERVER DEAD state=-62 appears in dmesg. Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/input.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d74921c4969b..18b2ad8be8e2 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -652,6 +652,7 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer; unsigned int mtu; + bool wake = false; u32 rwind = ntohl(ackinfo->rwind); _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", @@ -659,9 +660,14 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), rwind, ntohl(ackinfo->jumbo_max)); - if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) - rwind = RXRPC_RXTX_BUFF_SIZE - 1; - call->tx_winsize = rwind; + if (call->tx_winsize != rwind) { + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; + if (rwind > call->tx_winsize) + wake = true; + call->tx_winsize = rwind; + } + if (call->cong_ssthresh > rwind) call->cong_ssthresh = rwind; @@ -675,6 +681,9 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, spin_unlock_bh(&peer->lock); _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); } + + if (wake) + wake_up(&call->waitq); } /* From cb6950b7152fb3760942f9cb16bd2a35e5a1bfd1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 7 Mar 2017 00:34:57 +0530 Subject: [PATCH 172/384] arm64: kprobes: remove kprobe_exceptions_notify Commit fc62d0207ae0 ("kprobes: Introduce weak variant of kprobe_exceptions_notify()") introduces a generic empty version of the function for architectures that don't need special handling, like arm64. As such, remove the arch/arm64/ specific handler. Signed-off-by: Naveen N. Rao Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 2a07aae5b8a2..c5c45942fb6e 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -372,12 +372,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) return 0; } -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return NOTIFY_DONE; -} - static void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur_kprobe; From b0de0ccc8b9edd8846828e0ecdc35deacdf186b0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 6 Mar 2017 19:06:40 +0000 Subject: [PATCH 173/384] arm64: kasan: avoid bad virt_to_pfn() Booting a v4.11-rc1 kernel with DEBUG_VIRTUAL and KASAN enabled produces the following splat (trimmed for brevity): [ 0.000000] virt_to_phys used for non-linear address: ffff200008080000 (0xffff200008080000) [ 0.000000] WARNING: CPU: 0 PID: 0 at arch/arm64/mm/physaddr.c:14 __virt_to_phys+0x48/0x70 [ 0.000000] PC is at __virt_to_phys+0x48/0x70 [ 0.000000] LR is at __virt_to_phys+0x48/0x70 [ 0.000000] Call trace: [ 0.000000] [] __virt_to_phys+0x48/0x70 [ 0.000000] [] kasan_init+0x1c0/0x498 [ 0.000000] [] setup_arch+0x2fc/0x948 [ 0.000000] [] start_kernel+0xb8/0x570 [ 0.000000] [] __primary_switched+0x6c/0x74 This is because we use virt_to_pfn() on a kernel image address when trying to figure out its nid, so that we can allocate its shadow from the same node. As with other recent changes, this patch uses lm_alias() to solve this. We could instead use NUMA_NO_NODE, as x86 does for all shadow allocations, though we'll likely want the "real" memory shadow to be backed from its corresponding nid anyway, so we may as well be consistent and find the nid for the image shadow. Cc: Catalin Marinas Cc: Will Deacon Acked-by: Laura Abbott Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/mm/kasan_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 55d1e9205543..687a358a3733 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -162,7 +162,7 @@ void __init kasan_init(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); vmemmap_populate(kimg_shadow_start, kimg_shadow_end, - pfn_to_nid(virt_to_pfn(_text))); + pfn_to_nid(virt_to_pfn(lm_alias(_text)))); /* * vmemmap_populate() has populated the shadow region that covers the From 5c2a625937ba49bc691089370638223d310cda9a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 8 Mar 2017 16:27:04 -0800 Subject: [PATCH 174/384] arm64: support keyctl() system call in 32-bit mode As is the case for a number of other architectures that have a 32-bit compat mode, enable KEYS_COMPAT if both COMPAT and KEYS are enabled. This allows AArch32 programs to use the keyctl() system call when running on an AArch64 kernel. Signed-off-by: Eric Biggers Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a39029b5414e..f21e9a76ff67 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1063,6 +1063,10 @@ config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC +config KEYS_COMPAT + def_bool y + depends on COMPAT && KEYS + endmenu menu "Power management options" From 14088540ad63c648e5cdf490412033f792d16b6b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 10 Mar 2017 17:44:18 +0000 Subject: [PATCH 175/384] arm64: use const cap for system_uses_ttbr0_pan() Since commit 4b65a5db362783ab ("arm64: Introduce uaccess_{disable,enable} functionality based on TTBR0_EL1"), system_uses_ttbr0_pan() has used cpus_have_cap() to determine whether PAN is present. Since commit a4023f682739439b ("arm64: Add hypervisor safe helper for checking constant capabilities"), which was introduced around the same time, cpus_have_cap() doesn't try to use a static key, and must always perform a load, test, and consitional branch (likely a tbnz for the latter two). Elsewhere, we moved to using cpus_have_const_cap(), which can use a static key (i.e. a non-conditional branch), which is patched at runtime when the feature is detected. This patch makes system_uses_ttbr0_pan() use cpus_have_const_cap(). The static key is likely a win for hot-paths like the uacccess primitives, and this makes our usage consistent regardless. Signed-off-by: Mark Rutland Reviewed-by: Suzuki K Poulose Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 05310ad8c5ab..f31c48d0cd68 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -251,7 +251,7 @@ static inline bool system_supports_fpsimd(void) static inline bool system_uses_ttbr0_pan(void) { return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && - !cpus_have_cap(ARM64_HAS_PAN); + !cpus_have_const_cap(ARM64_HAS_PAN); } #endif /* __ASSEMBLY__ */ From af36370569eb37420e1e78a2e60c277b781fcd00 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Fri, 10 Mar 2017 14:33:01 +0200 Subject: [PATCH 176/384] net/mlx5: Fix create autogroup prev initializer The autogroups list is a list of non overlapping group boundaries sorted by their start index. If the autogroups list wasn't empty and an empty group slot was found at the start of the list, the new group was added to the end of the list instead of the beginning, as the prev initializer was incorrect. When this was repeated, it caused multiple groups to have overlapping boundaries. Fixed that by correctly initializing the prev pointer to the start of the list. Fixes: eccec8da3b4e ('net/mlx5: Keep autogroups list ordered') Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2478516a61e2..ded27bb9a3b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1136,7 +1136,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, u32 *match_criteria) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct list_head *prev = ft->node.children.prev; + struct list_head *prev = &ft->node.children; unsigned int candidate_index = 0; struct mlx5_flow_group *fg; void *match_criteria_addr; From 5d47f6c89d568ab61712d8c40676fbb020b68752 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 10 Mar 2017 14:33:02 +0200 Subject: [PATCH 177/384] net/mlx5: Don't save PCI state when PCI error is detected When a PCI error is detected the PCI state could be corrupt, don't save it in that flow. Save the state after initialization. After restoring the PCI state during slot reset save it again, restoring the state destroys the previously saved state info. Fixes: 05ac2c0b7438 ('net/mlx5: Fix race between PCI error handlers and health work') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c4242a4e8130..e2bd600d19de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1352,6 +1352,7 @@ static int init_one(struct pci_dev *pdev, if (err) goto clean_load; + pci_save_state(pdev); return 0; clean_load: @@ -1407,9 +1408,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state and drain the health wq */ + /* In case of kernel call drain the health wq */ if (state) { - pci_save_state(pdev); mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } @@ -1461,6 +1461,7 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) pci_set_master(pdev); pci_restore_state(pdev); + pci_save_state(pdev); if (wait_vital(pdev)) { dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); From 33e21c59526e9147d7c68913995298f10c35cd6f Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Fri, 10 Mar 2017 14:33:03 +0200 Subject: [PATCH 178/384] net/mlx5e: remove IEEE/CEE mode check when setting DCBX mode Currently, the function setdcbx fails if the request dcbx mode is either IEEE or CEE. We remove the IEEE/CEE mode check because we support both IEEE and CEE interfaces. Fixes: 3a6a931dfb8e ("net/mlx5e: Support DCBX CEE API") Signed-off-by: Huy Nguyen Reviewed-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 0523ed47f597..8fa23f6a1f67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -302,6 +302,9 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_dcbx *dcbx = &priv->dcbx; + if (mode & DCB_CAP_DCBX_LLD_MANAGED) + return 1; + if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) { if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO) return 0; @@ -315,13 +318,10 @@ static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode) return 1; } - if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev))) + if (!(mode & DCB_CAP_DCBX_HOST)) return 1; - if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || - !(mode & DCB_CAP_DCBX_VER_CEE) || - !(mode & DCB_CAP_DCBX_VER_IEEE) || - !(mode & DCB_CAP_DCBX_HOST)) + if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev))) return 1; return 0; From 65ba8fb7d5c6803ec236bb8d6650465fed7f9769 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 10 Mar 2017 14:33:04 +0200 Subject: [PATCH 179/384] net/mlx5e: Avoid wrong identification of rules on deletion When deleting offloaded TC flows, we must correctly identify E-switch rules. The current check could get us wrong w.r.t to rules set on the PF. Since it's possible to set NIC rules on the PF, switch to SRIOV offloads mode and then attempt to delete a NIC rule. To solve that, we add a flags field to offloaded rules, set it on creation time and use that over the code where currently needed. Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 44406a5ec15d..79481f4cf264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -48,9 +48,14 @@ #include "eswitch.h" #include "vxlan.h" +enum { + MLX5E_TC_FLOW_ESWITCH = BIT(0), +}; + struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; + u8 flags; struct mlx5_flow_handle *rule; struct list_head encap; /* flows sharing the same encap */ struct mlx5_esw_flow_attr *attr; @@ -177,7 +182,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5_fc_destroy(priv->mdev, counter); } - if (esw && esw->mode == SRIOV_OFFLOADS) { + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { mlx5_eswitch_del_vlan_action(esw, flow->attr); if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) mlx5e_detach_encap(priv, flow); @@ -598,6 +603,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, } static int parse_cls_flower(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f) { @@ -609,7 +615,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, err = __parse_cls_flower(priv, spec, f, &min_inline); - if (!err && esw->mode == SRIOV_OFFLOADS && + if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && rep->vport != FDB_UPLINK_VPORT) { if (min_inline > esw->offloads.inline_mode) { netdev_warn(priv->netdev, @@ -1132,23 +1138,19 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { struct mlx5e_tc_table *tc = &priv->fs.tc; - int err = 0; - bool fdb_flow = false; + int err, attr_size = 0; u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u8 flow_flags = 0; - if (esw && esw->mode == SRIOV_OFFLOADS) - fdb_flow = true; - - if (fdb_flow) - flow = kzalloc(sizeof(*flow) + - sizeof(struct mlx5_esw_flow_attr), - GFP_KERNEL); - else - flow = kzalloc(sizeof(*flow), GFP_KERNEL); + if (esw && esw->mode == SRIOV_OFFLOADS) { + flow_flags = MLX5E_TC_FLOW_ESWITCH; + attr_size = sizeof(struct mlx5_esw_flow_attr); + } + flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec || !flow) { err = -ENOMEM; @@ -1156,12 +1158,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, } flow->cookie = f->cookie; + flow->flags = flow_flags; - err = parse_cls_flower(priv, spec, f); + err = parse_cls_flower(priv, flow, spec, f); if (err < 0) goto err_free; - if (fdb_flow) { + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); err = parse_tc_fdb_actions(priv, f->exts, flow); if (err < 0) From ea29bd304d7b522f6162a36f394e690c579b5a63 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Fri, 10 Mar 2017 14:33:05 +0200 Subject: [PATCH 180/384] net/mlx5e: Fix loopback selftest Change packet type handler to ETH_P_IP instead of ETH_P_ALL since we are already expecting an IP packet. Also, using ETH_P_ALL will cause the loopback test packet type handler to be called on all outgoing packets, especially our own self loopback test SKB, which will be validated on xmit as well, and we don't want that. Tested with: ethtool -t ethX validated that the loopback test passes. Fixes: 0952da791c97 ('net/mlx5e: Add support for loopback selftest') Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 31e3cb7ee5fe..5621dcfda4f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -204,9 +204,6 @@ mlx5e_test_loopback_validate(struct sk_buff *skb, struct iphdr *iph; /* We are only going to peek, no need to clone the SKB */ - if (skb->protocol != htons(ETH_P_IP)) - goto out; - if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) goto out; @@ -249,7 +246,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, lbtp->loopback_ok = false; init_completion(&lbtp->comp); - lbtp->pt.type = htons(ETH_P_ALL); + lbtp->pt.type = htons(ETH_P_IP); lbtp->pt.func = mlx5e_test_loopback_validate; lbtp->pt.dev = priv->netdev; lbtp->pt.af_packet_priv = lbtp; From 0e4c0e6ea7d4a988a5ae2791c7cb5769b5256dad Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 17 Feb 2017 15:25:08 +0100 Subject: [PATCH 181/384] arm64: kernel: Update kerneldoc for cpu_suspend() rename Commit af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it aligned with arm") renamed cpu_suspend() to arm_cpuidle_suspend(), but forgot to update the kerneldoc header. Fixes: af391b15f7b56ce1 ("arm64: kernel: rename __cpu_suspend to keep it aligned with arm") Signed-off-by: Geert Uytterhoeven Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuidle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 75a0f8acef66..fd691087dc9a 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -30,7 +30,7 @@ int arm_cpuidle_init(unsigned int cpu) } /** - * cpu_suspend() - function to enter a low-power idle state + * arm_cpuidle_suspend() - function to enter a low-power idle state * @arg: argument to pass to CPU suspend operations * * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU From d8a8ed9758241e138933c67e40db2db2790eca19 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 9 Mar 2017 13:21:07 -0500 Subject: [PATCH 182/384] drm/amd/amdgpu: Disable GFX_PG on Carrizo until compute issues solved Currently compute jobs will stall if GFX_PG is enabled. Until this is resolved we'll disable GFX_PG. Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 50bdb24ef8d6..4a785d6acfb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1051,7 +1051,7 @@ static int vi_common_early_init(void *handle) /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; if (adev->rev_id != 0x00) { - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | AMD_PG_SUPPORT_CP | From 607523d19c9d67ba4cf7bdaced644f11ed04992c Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 10 Mar 2017 12:13:04 +1000 Subject: [PATCH 183/384] drm/amdgpu: fix parser init error path to avoid crash in parser fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we don't reset the chunk info in the error path, the subsequent fini path will double free. Reviewed-by: Christian König Signed-off-by: Dave Airlie Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d2d0f60ff36d..99424cb8020b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -240,6 +240,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) for (; i >= 0; i--) drm_free_large(p->chunks[i].kdata); kfree(p->chunks); + p->chunks = NULL; + p->nchunks = 0; put_ctx: amdgpu_ctx_put(p->ctx); free_chunk: From 3fb632e40d7667d8bedfabc28850ac06d5493f54 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 11:27:23 +0800 Subject: [PATCH 184/384] md: fix super_offset endianness in super_1_rdev_size_change The sb->super_offset should be big-endian, but the rdev->sb_start is in host byte order, so fix this by adding cpu_to_le64. Signed-off-by: Jason Yan Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index cd89ad3c3a0d..6e76d97a8fc3 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1879,7 +1879,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); - sb->super_offset = rdev->sb_start; + sb->super_offset = cpu_to_le64(rdev->sb_start); sb->sb_csum = calc_sb_1_csum(sb); do { md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, From 1345921393ba23b60d3fcf15933e699232ad25ae Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 11:49:12 +0800 Subject: [PATCH 185/384] md: fix incorrect use of lexx_to_cpu in does_sb_need_changing The sb->layout is of type __le32, so we shoud use le32_to_cpu. Signed-off-by: Jason Yan Signed-off-by: Shaohua Li --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 6e76d97a8fc3..f6ae1d67bcd0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2287,7 +2287,7 @@ static bool does_sb_need_changing(struct mddev *mddev) /* Check if any mddev parameters have changed */ if ((mddev->dev_sectors != le64_to_cpu(sb->size)) || (mddev->reshape_position != le64_to_cpu(sb->reshape_position)) || - (mddev->layout != le64_to_cpu(sb->layout)) || + (mddev->layout != le32_to_cpu(sb->layout)) || (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) || (mddev->chunk_sectors != le32_to_cpu(sb->chunksize))) return true; From 0134ed4fb9e78672ee9f7b18007114404c81e63f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Mar 2017 13:24:22 -0700 Subject: [PATCH 186/384] device-dax: fix pmd/pte fault fallback handling Jeff Moyer reports: With a device dax alignment of 4KB or 2MB, I get sigbus when running the attached fio job file for the current kernel (4.11.0-rc1+). If I specify an alignment of 1GB, it works. I turned on debug output, and saw that it was failing in the huge fault code. dax dax1.0: dax_open dax dax1.0: dax_mmap dax dax1.0: dax_dev_huge_fault: fio: write (0x7f08f0a00000 - dax dax1.0: __dax_dev_pud_fault: phys_to_pgoff(0xffffffffcf60 dax dax1.0: dax_release fio config for reproduce: [global] ioengine=dev-dax direct=0 filename=/dev/dax0.0 bs=2m [write] rw=write [read] stonewall rw=read The driver fails to fallback when taking a fault that is larger than the device alignment, or handling a larger fault when a smaller mapping is already established. While we could support larger mappings for a device with a smaller alignment, that change is too large for the immediate fix. The simplest change is to force fallback until the fault size matches the alignment. Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Cc: Reported-by: Jeff Moyer Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/dax/dax.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 8d9829ff2a78..a284dc532e46 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -427,6 +427,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) int rc = VM_FAULT_SIGBUS; phys_addr_t phys; pfn_t pfn; + unsigned int fault_size = PAGE_SIZE; if (check_vma(dax_dev, vmf->vma, __func__)) return VM_FAULT_SIGBUS; @@ -437,6 +438,9 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } + if (fault_size != dax_region->align) + return VM_FAULT_SIGBUS; + phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE); if (phys == -1) { dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, @@ -464,6 +468,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) phys_addr_t phys; pgoff_t pgoff; pfn_t pfn; + unsigned int fault_size = PMD_SIZE; if (check_vma(dax_dev, vmf->vma, __func__)) return VM_FAULT_SIGBUS; @@ -480,6 +485,16 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } + if (fault_size < dax_region->align) + return VM_FAULT_SIGBUS; + else if (fault_size > dax_region->align) + return VM_FAULT_FALLBACK; + + /* if we are outside of the VMA */ + if (pmd_addr < vmf->vma->vm_start || + (pmd_addr + PMD_SIZE) > vmf->vma->vm_end) + return VM_FAULT_SIGBUS; + pgoff = linear_page_index(vmf->vma, pmd_addr); phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE); if (phys == -1) { From 70b085b06c4560a69e95607f77bb4c2b2e41943c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Mar 2017 13:24:27 -0700 Subject: [PATCH 187/384] device-dax: fix pud fault fallback handling Jeff Moyer reports: With a device dax alignment of 4KB or 2MB, I get sigbus when running the attached fio job file for the current kernel (4.11.0-rc1+). If I specify an alignment of 1GB, it works. I turned on debug output, and saw that it was failing in the huge fault code. dax dax1.0: dax_open dax dax1.0: dax_mmap dax dax1.0: dax_dev_huge_fault: fio: write (0x7f08f0a00000 - dax dax1.0: __dax_dev_pud_fault: phys_to_pgoff(0xffffffffcf60) dax dax1.0: dax_release fio config for reproduce: [global] ioengine=dev-dax direct=0 filename=/dev/dax0.0 bs=2m [write] rw=write [read] stonewall rw=read The driver fails to fallback when taking a fault that is larger than the device alignment, or handling a larger fault when a smaller mapping is already established. While we could support larger mappings for a device with a smaller alignment, that change is too large for the immediate fix. The simplest change is to force fallback until the fault size matches the alignment. Reported-by: Jeff Moyer Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/dax/dax.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index a284dc532e46..523fecec7bda 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -518,6 +518,8 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) phys_addr_t phys; pgoff_t pgoff; pfn_t pfn; + unsigned int fault_size = PUD_SIZE; + if (check_vma(dax_dev, vmf->vma, __func__)) return VM_FAULT_SIGBUS; @@ -534,6 +536,16 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } + if (fault_size < dax_region->align) + return VM_FAULT_SIGBUS; + else if (fault_size > dax_region->align) + return VM_FAULT_FALLBACK; + + /* if we are outside of the VMA */ + if (pud_addr < vmf->vma->vm_start || + (pud_addr + PUD_SIZE) > vmf->vma->vm_end) + return VM_FAULT_SIGBUS; + pgoff = linear_page_index(vmf->vma, pud_addr); phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE); if (phys == -1) { From 52084f89b38cdd896b59627c629915ef1a7bf615 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 9 Mar 2017 16:56:01 -0700 Subject: [PATCH 188/384] device-dax: fix debug output typo The debug output for return the return data of pgoff_to_phys() in the fault handlers has 'phys' and 'pgoff' incorrectly swapped. Reported-by: Jeff Moyer Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/dax/dax.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c index 523fecec7bda..80c6db279ae1 100644 --- a/drivers/dax/dax.c +++ b/drivers/dax/dax.c @@ -443,7 +443,7 @@ static int __dax_dev_pte_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) phys = pgoff_to_phys(dax_dev, vmf->pgoff, PAGE_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, + dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, vmf->pgoff); return VM_FAULT_SIGBUS; } @@ -498,7 +498,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) pgoff = linear_page_index(vmf->vma, pmd_addr); phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, + dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, pgoff); return VM_FAULT_SIGBUS; } @@ -549,7 +549,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf) pgoff = linear_page_index(vmf->vma, pud_addr); phys = pgoff_to_phys(dax_dev, pgoff, PUD_SIZE); if (phys == -1) { - dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__, + dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__, pgoff); return VM_FAULT_SIGBUS; } From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:23 +0800 Subject: [PATCH 189/384] Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when booting" Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting") The mapping of "cpuid <-> nodeid" is established at boot time via ACPI tables to keep associations of workqueues and other node related items consistent across cpu hotplug. But, ACPI tables are unreliable and failures with that boot time mapping have been reported on machines where the ACPI table and the physical information which is retrieved at actual hotplug is inconsistent. Revert the mapping implementation so it can be replaced with a less error prone approach. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 2 +- drivers/acpi/acpi_processor.c | 5 --- drivers/acpi/bus.c | 1 - drivers/acpi/processor_core.c | 73 ----------------------------------- include/linux/acpi.h | 3 -- 5 files changed, 1 insertion(+), 83 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ae32838cac5f..f6b0e87d2388 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -710,7 +710,7 @@ static void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 4467a8089ab8..5d208a99d0c9 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu) void __weak arch_unregister_cpu(int cpu) {} -int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) -{ - return -ENODEV; -} - static int acpi_processor_hotadd_init(struct acpi_processor *pr) { unsigned long long sta; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 80cb5eb75b63..34fbe027e73a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1249,7 +1249,6 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); - acpi_set_processor_mapping(); return 0; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 611a5585a902..a84386204659 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -278,79 +278,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static bool __init -map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid) -{ - int type, id; - u32 acpi_id; - acpi_status status; - acpi_object_type acpi_type; - unsigned long long tmp; - union acpi_object object = { 0 }; - struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - - status = acpi_get_type(handle, &acpi_type); - if (ACPI_FAILURE(status)) - return false; - - switch (acpi_type) { - case ACPI_TYPE_PROCESSOR: - status = acpi_evaluate_object(handle, NULL, NULL, &buffer); - if (ACPI_FAILURE(status)) - return false; - acpi_id = object.processor.proc_id; - - /* validate the acpi_id */ - if(acpi_processor_validate_proc_id(acpi_id)) - return false; - break; - case ACPI_TYPE_DEVICE: - status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); - if (ACPI_FAILURE(status)) - return false; - acpi_id = tmp; - break; - default: - return false; - } - - type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; - - *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false); - id = acpi_map_cpuid(*phys_id, acpi_id); - - if (id < 0) - return false; - *cpuid = id; - return true; -} - -static acpi_status __init -set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context, - void **rv) -{ - phys_cpuid_t phys_id; - int cpu_id; - - if (!map_processor(handle, &phys_id, &cpu_id)) - return AE_ERROR; - - acpi_map_cpu2node(handle, cpu_id, phys_id); - return AE_OK; -} - -void __init acpi_set_processor_mapping(void) -{ - /* Set persistent cpu <-> node mapping for all processors. */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, set_processor_node_mapping, - NULL, NULL, NULL); -} -#else -void __init acpi_set_processor_mapping(void) {} -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base, u64 *phys_addr, int *ioapic_id) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 673acda012af..63a7519b00cc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id); int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -void acpi_set_processor_mapping(void); - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif From 09c3f2bd5c7e5f18687663acb6adc6b167484ca5 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:24 +0800 Subject: [PATCH 190/384] Revert"x86/acpi: Enable MADT APIs to return disabled apicids" Revert: 8ad893faf2ea ("x86/acpi: Enable MADT APIs to return disabled apicids") Remove the leftovers of the boot time 'cpuid <-> nodeid' mapping approach. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-3-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- drivers/acpi/processor_core.c | 60 +++++++++++++---------------------- 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index a84386204659..b933061b6b60 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -32,12 +32,12 @@ static struct acpi_table_madt *get_madt_table(void) } static int map_lapic_id(struct acpi_subtable_header *entry, - u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled) + u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_apic *lapic = container_of(entry, struct acpi_madt_local_apic, header); - if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED)) + if (!(lapic->lapic_flags & ACPI_MADT_ENABLED)) return -ENODEV; if (lapic->processor_id != acpi_id) @@ -48,13 +48,12 @@ static int map_lapic_id(struct acpi_subtable_header *entry, } static int map_x2apic_id(struct acpi_subtable_header *entry, - int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id, - bool ignore_disabled) + int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_x2apic *apic = container_of(entry, struct acpi_madt_local_x2apic, header); - if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED)) + if (!(apic->lapic_flags & ACPI_MADT_ENABLED)) return -ENODEV; if (device_declaration && (apic->uid == acpi_id)) { @@ -66,13 +65,12 @@ static int map_x2apic_id(struct acpi_subtable_header *entry, } static int map_lsapic_id(struct acpi_subtable_header *entry, - int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id, - bool ignore_disabled) + int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id) { struct acpi_madt_local_sapic *lsapic = container_of(entry, struct acpi_madt_local_sapic, header); - if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED)) + if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED)) return -ENODEV; if (device_declaration) { @@ -89,13 +87,12 @@ static int map_lsapic_id(struct acpi_subtable_header *entry, * Retrieve the ARM CPU physical identifier (MPIDR) */ static int map_gicc_mpidr(struct acpi_subtable_header *entry, - int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr, - bool ignore_disabled) + int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr) { struct acpi_madt_generic_interrupt *gicc = container_of(entry, struct acpi_madt_generic_interrupt, header); - if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED)) + if (!(gicc->flags & ACPI_MADT_ENABLED)) return -ENODEV; /* device_declaration means Device object in DSDT, in the @@ -112,7 +109,7 @@ static int map_gicc_mpidr(struct acpi_subtable_header *entry, } static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt, - int type, u32 acpi_id, bool ignore_disabled) + int type, u32 acpi_id) { unsigned long madt_end, entry; phys_cpuid_t phys_id = PHYS_CPUID_INVALID; /* CPU hardware ID */ @@ -130,20 +127,16 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt, struct acpi_subtable_header *header = (struct acpi_subtable_header *)entry; if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) { - if (!map_lapic_id(header, acpi_id, &phys_id, - ignore_disabled)) + if (!map_lapic_id(header, acpi_id, &phys_id)) break; } else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) { - if (!map_x2apic_id(header, type, acpi_id, &phys_id, - ignore_disabled)) + if (!map_x2apic_id(header, type, acpi_id, &phys_id)) break; } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) { - if (!map_lsapic_id(header, type, acpi_id, &phys_id, - ignore_disabled)) + if (!map_lsapic_id(header, type, acpi_id, &phys_id)) break; } else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) { - if (!map_gicc_mpidr(header, type, acpi_id, &phys_id, - ignore_disabled)) + if (!map_gicc_mpidr(header, type, acpi_id, &phys_id)) break; } entry += header->length; @@ -161,15 +154,14 @@ phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id) if (!madt) return PHYS_CPUID_INVALID; - rv = map_madt_entry(madt, 1, acpi_id, true); + rv = map_madt_entry(madt, 1, acpi_id); acpi_put_table((struct acpi_table_header *)madt); return rv; } -static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id, - bool ignore_disabled) +static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; @@ -190,38 +182,30 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id, header = (struct acpi_subtable_header *)obj->buffer.pointer; if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) - map_lapic_id(header, acpi_id, &phys_id, ignore_disabled); + map_lapic_id(header, acpi_id, &phys_id); else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) - map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled); + map_lsapic_id(header, type, acpi_id, &phys_id); else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) - map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled); + map_x2apic_id(header, type, acpi_id, &phys_id); else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) - map_gicc_mpidr(header, type, acpi_id, &phys_id, - ignore_disabled); + map_gicc_mpidr(header, type, acpi_id, &phys_id); exit: kfree(buffer.pointer); return phys_id; } -static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type, - u32 acpi_id, bool ignore_disabled) +phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id) { phys_cpuid_t phys_id; - phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled); + phys_id = map_mat_entry(handle, type, acpi_id); if (invalid_phys_cpuid(phys_id)) - phys_id = map_madt_entry(get_madt_table(), type, acpi_id, - ignore_disabled); + phys_id = map_madt_entry(get_madt_table(), type, acpi_id); return phys_id; } -phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id) -{ - return __acpi_get_phys_id(handle, type, acpi_id, true); -} - int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id) { #ifdef CONFIG_SMP From 2b85b3d22920db7473e5fed5719e7955c0ec323e Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:25 +0800 Subject: [PATCH 191/384] x86/acpi: Restore the order of CPU IDs The following commits: f7c28833c2 ("x86/acpi: Enable acpi to register all possible cpus at boot time") and 8f54969dc8 ("x86/acpi: Introduce persistent storage for cpuid <-> apicid mapping") ... registered all the possible CPUs at boot time via ACPI tables to make the mapping of cpuid <-> apicid fixed. Both enabled and disabled CPUs could have a logical CPU ID after boot time. But, ACPI tables are unreliable. the number amd order of Local APIC entries which depends on the firmware is often inconsistent with the physical devices. Even if they are consistent, The disabled CPUs which take up some logical CPU IDs will also make the order discontinuous. Revert the part of disabled CPUs registration, keep the allocation logic of logical CPU IDs and also keep some code location changes. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-4-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 7 ++++++- arch/x86/kernel/apic/apic.c | 26 +++++++------------------- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f6b0e87d2388..b2879cc23db4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) return -EINVAL; } + if (!enabled) { + ++disabled_cpus; + return -EINVAL; + } + if (boot_cpu_physical_apicid != -1U) ver = boot_cpu_apic_version; - cpu = __generic_processor_info(id, ver, enabled); + cpu = generic_processor_info(id, ver); if (cpu >= 0) early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index aee7deddabd0..8ccb7ef512e0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2063,7 +2063,7 @@ static int allocate_logical_cpuid(int apicid) return nr_logical_cpuids++; } -int __generic_processor_info(int apicid, int version, bool enabled) +int generic_processor_info(int apicid, int version) { int cpu, max = nr_cpu_ids; bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, @@ -2121,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled) if (num_processors >= nr_cpu_ids) { int thiscpu = max + disabled_cpus; - if (enabled) { - pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " - "reached. Processor %d/0x%x ignored.\n", - max, thiscpu, apicid); - } + pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " + "reached. Processor %d/0x%x ignored.\n", + max, thiscpu, apicid); disabled_cpus++; return -EINVAL; @@ -2177,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled) apic->x86_32_early_logical_apicid(cpu); #endif set_cpu_possible(cpu, true); - - if (enabled) { - num_processors++; - physid_set(apicid, phys_cpu_present_map); - set_cpu_present(cpu, true); - } else { - disabled_cpus++; - } + physid_set(apicid, phys_cpu_present_map); + set_cpu_present(cpu, true); + num_processors++; return cpu; } -int generic_processor_info(int apicid, int version) -{ - return __generic_processor_info(apicid, version, true); -} - int hard_smp_processor_id(void) { return read_apic_id(); From 8c8cb30f49b86333d8e036e1945cf1a78c03577e Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:26 +0800 Subject: [PATCH 192/384] acpi/processor: Implement DEVICE operator for processor enumeration ACPI allows to declare processors either with the PROCESSOR or with the DEVICE operator. The current implementation handles only the PROCESSOR operator. On a system which uses the DEVICE operator for processor enumeration the evaluation fails. Check for the ACPI type of the ACPI handle and evaluate PROCESSOR and DEVICE types separately. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-5-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- drivers/acpi/acpi_processor.c | 39 ++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 5d208a99d0c9..9a98d7e00200 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -633,25 +633,50 @@ static acpi_status __init acpi_processor_ids_walk(acpi_handle handle, void **rv) { acpi_status status; + acpi_object_type acpi_type; + unsigned long long uid; union acpi_object object = { 0 }; struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - status = acpi_evaluate_object(handle, NULL, NULL, &buffer); + status = acpi_get_type(handle, &acpi_type); if (ACPI_FAILURE(status)) - acpi_handle_info(handle, "Not get the processor object\n"); - else - processor_validated_ids_update(object.processor.proc_id); + return false; + + switch (acpi_type) { + case ACPI_TYPE_PROCESSOR: + status = acpi_evaluate_object(handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) + goto err; + uid = object.processor.proc_id; + break; + + case ACPI_TYPE_DEVICE: + status = acpi_evaluate_integer(handle, "_UID", NULL, &uid); + if (ACPI_FAILURE(status)) + goto err; + break; + default: + goto err; + } + + processor_validated_ids_update(uid); + return true; + +err: + acpi_handle_info(handle, "Invalid processor object\n"); + return false; - return AE_OK; } -static void __init acpi_processor_check_duplicates(void) +void __init acpi_processor_check_duplicates(void) { - /* Search all processor nodes in ACPI namespace */ + /* check the correctness for all processors in ACPI namespace */ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, acpi_processor_ids_walk, NULL, NULL, NULL); + acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_ids_walk, + NULL, NULL); } bool __init acpi_processor_validate_proc_id(int proc_id) From a77d6cd968497792e072b74dff45b891ba778ddb Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:27 +0800 Subject: [PATCH 193/384] acpi/processor: Check for duplicate processor ids at hotplug time The check for duplicate processor ids happens at boot time based on the ACPI table contents, but the final sanity checks for a processor happen at hotplug time. At hotplug time, where the physical information is available, which might differ from the ACPI table information, a check for duplicate processor ids is missing. Add it to the hotplug checks and rename the function so it better reflects its purpose. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-6-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- drivers/acpi/acpi_processor.c | 13 ++++++++++--- include/linux/acpi.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 9a98d7e00200..0143135b3abe 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -280,6 +280,13 @@ static int acpi_processor_get_info(struct acpi_device *device) pr->acpi_id = value; } + if (acpi_duplicate_processor_id(pr->acpi_id)) { + dev_err(&device->dev, + "Failed to get unique processor _UID (0x%x)\n", + pr->acpi_id); + return -ENODEV; + } + pr->phys_id = acpi_get_phys_id(pr->handle, device_declaration, pr->acpi_id); if (invalid_phys_cpuid(pr->phys_id)) @@ -580,7 +587,7 @@ static struct acpi_scan_handler processor_container_handler = { static int nr_unique_ids __initdata; /* The number of the duplicate processor IDs */ -static int nr_duplicate_ids __initdata; +static int nr_duplicate_ids; /* Used to store the unique processor IDs */ static int unique_processor_ids[] __initdata = { @@ -588,7 +595,7 @@ static int unique_processor_ids[] __initdata = { }; /* Used to store the duplicate processor IDs */ -static int duplicate_processor_ids[] __initdata = { +static int duplicate_processor_ids[] = { [0 ... NR_CPUS - 1] = -1, }; @@ -679,7 +686,7 @@ void __init acpi_processor_check_duplicates(void) NULL, NULL); } -bool __init acpi_processor_validate_proc_id(int proc_id) +bool acpi_duplicate_processor_id(int proc_id) { int i; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 63a7519b00cc..9b05886f9773 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -287,7 +287,7 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) } /* Validate the processor object's proc_id */ -bool acpi_processor_validate_proc_id(int proc_id); +bool acpi_duplicate_processor_id(int proc_id); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ From 33d8c15559df4f0bce25d7e16ebb5879e249f2e7 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Thu, 9 Mar 2017 17:58:38 +0100 Subject: [PATCH 194/384] Revert "clocksource/drivers/tcb_clksrc: Use 32 bit tcb as sched_clock" This reverts commit 7b9f1d16e6d1 ("clocksource/drivers/tcb_clksrc: Use 32 bit tcb as sched_clock"). In the current state, the kernel warns against a late registration of the new sched_clock, the printk clock resets after only a few minutes, and it seems that scheduling can be affected as well. Signed-off-by: Romain Izard Signed-off-by: Daniel Lezcano --- drivers/clocksource/tcb_clksrc.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 745844ee973e..d4ca9962a759 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -10,7 +10,6 @@ #include #include #include -#include /* @@ -57,14 +56,9 @@ static u64 tc_get_cycles(struct clocksource *cs) return (upper << 16) | lower; } -static u32 tc_get_cv32(void) -{ - return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); -} - static u64 tc_get_cycles32(struct clocksource *cs) { - return tc_get_cv32(); + return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); } static struct clocksource clksrc = { @@ -75,11 +69,6 @@ static struct clocksource clksrc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static u64 notrace tc_read_sched_clock(void) -{ - return tc_get_cv32(); -} - #ifdef CONFIG_GENERIC_CLOCKEVENTS struct tc_clkevt_device { @@ -350,9 +339,6 @@ static int __init tcb_clksrc_init(void) clksrc.read = tc_get_cycles32; /* setup ony channel 0 */ tcb_setup_single_chan(tc, best_divisor_idx); - - /* register sched_clock on chips with single 32 bit counter */ - sched_clock_register(tc_read_sched_clock, 32, divided_rate); } else { /* tclib will give us three clocks no matter what the * underlying platform supports. From f5fe1b51905df7cfe4fdfd85c5fb7bc5b71a094f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 10 Mar 2017 17:00:47 +1100 Subject: [PATCH 195/384] blk: Ensure users for current->bio_list can see the full list. Commit 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()") changed current->bio_list so that it did not contain *all* of the queued bios, but only those submitted by the currently running make_request_fn. There are two places which walk the list and requeue selected bios, and others that check if the list is empty. These are no longer correct. So redefine current->bio_list to point to an array of two lists, which contain all queued bios, and adjust various code to test or walk both lists. Signed-off-by: NeilBrown Fixes: 79bd99596b73 ("blk: improve order of bio handling in generic_make_request()") Signed-off-by: Jens Axboe --- block/bio.c | 12 +++++++++--- block/blk-core.c | 30 ++++++++++++++++++------------ drivers/md/dm.c | 27 +++++++++++++++------------ drivers/md/raid10.c | 3 ++- 4 files changed, 44 insertions(+), 28 deletions(-) diff --git a/block/bio.c b/block/bio.c index 5eec5e08417f..e75878f8b14a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs) bio_list_init(&punt); bio_list_init(&nopunt); - while ((bio = bio_list_pop(current->bio_list))) + while ((bio = bio_list_pop(¤t->bio_list[0]))) bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[0] = nopunt; - *current->bio_list = nopunt; + bio_list_init(&nopunt); + while ((bio = bio_list_pop(¤t->bio_list[1]))) + bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio); + current->bio_list[1] = nopunt; spin_lock(&bs->rescue_lock); bio_list_merge(&bs->rescue_list, &punt); @@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) * we retry with the original gfp_flags. */ - if (current->bio_list && !bio_list_empty(current->bio_list)) + if (current->bio_list && + (!bio_list_empty(¤t->bio_list[0]) || + !bio_list_empty(¤t->bio_list[1]))) gfp_mask &= ~__GFP_DIRECT_RECLAIM; p = mempool_alloc(bs->bio_pool, gfp_mask); diff --git a/block/blk-core.c b/block/blk-core.c index 0eeb99ef654f..d772c221cc17 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1973,7 +1973,14 @@ generic_make_request_checks(struct bio *bio) */ blk_qc_t generic_make_request(struct bio *bio) { - struct bio_list bio_list_on_stack; + /* + * bio_list_on_stack[0] contains bios submitted by the current + * make_request_fn. + * bio_list_on_stack[1] contains bios that were submitted before + * the current make_request_fn, but that haven't been processed + * yet. + */ + struct bio_list bio_list_on_stack[2]; blk_qc_t ret = BLK_QC_T_NONE; if (!generic_make_request_checks(bio)) @@ -1990,7 +1997,7 @@ blk_qc_t generic_make_request(struct bio *bio) * should be added at the tail */ if (current->bio_list) { - bio_list_add(current->bio_list, bio); + bio_list_add(¤t->bio_list[0], bio); goto out; } @@ -2009,18 +2016,17 @@ blk_qc_t generic_make_request(struct bio *bio) * bio_list, and call into ->make_request() again. */ BUG_ON(bio->bi_next); - bio_list_init(&bio_list_on_stack); - current->bio_list = &bio_list_on_stack; + bio_list_init(&bio_list_on_stack[0]); + current->bio_list = bio_list_on_stack; do { struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (likely(blk_queue_enter(q, false) == 0)) { - struct bio_list hold; struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ - hold = bio_list_on_stack; - bio_list_init(&bio_list_on_stack); + bio_list_on_stack[1] = bio_list_on_stack[0]; + bio_list_init(&bio_list_on_stack[0]); ret = q->make_request_fn(q, bio); blk_queue_exit(q); @@ -2030,19 +2036,19 @@ blk_qc_t generic_make_request(struct bio *bio) */ bio_list_init(&lower); bio_list_init(&same); - while ((bio = bio_list_pop(&bio_list_on_stack)) != NULL) + while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) if (q == bdev_get_queue(bio->bi_bdev)) bio_list_add(&same, bio); else bio_list_add(&lower, bio); /* now assemble so we handle the lowest level first */ - bio_list_merge(&bio_list_on_stack, &lower); - bio_list_merge(&bio_list_on_stack, &same); - bio_list_merge(&bio_list_on_stack, &hold); + bio_list_merge(&bio_list_on_stack[0], &lower); + bio_list_merge(&bio_list_on_stack[0], &same); + bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]); } else { bio_io_error(bio); } - bio = bio_list_pop(current->bio_list); + bio = bio_list_pop(&bio_list_on_stack[0]); } while (bio); current->bio_list = NULL; /* deactivate */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f4ffd1eb8f44..dfb75979e455 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -989,26 +989,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule) struct dm_offload *o = container_of(cb, struct dm_offload, cb); struct bio_list list; struct bio *bio; + int i; INIT_LIST_HEAD(&o->cb.list); if (unlikely(!current->bio_list)) return; - list = *current->bio_list; - bio_list_init(current->bio_list); + for (i = 0; i < 2; i++) { + list = current->bio_list[i]; + bio_list_init(¤t->bio_list[i]); - while ((bio = bio_list_pop(&list))) { - struct bio_set *bs = bio->bi_pool; - if (unlikely(!bs) || bs == fs_bio_set) { - bio_list_add(current->bio_list, bio); - continue; + while ((bio = bio_list_pop(&list))) { + struct bio_set *bs = bio->bi_pool; + if (unlikely(!bs) || bs == fs_bio_set) { + bio_list_add(¤t->bio_list[i], bio); + continue; + } + + spin_lock(&bs->rescue_lock); + bio_list_add(&bs->rescue_list, bio); + queue_work(bs->rescue_workqueue, &bs->rescue_work); + spin_unlock(&bs->rescue_lock); } - - spin_lock(&bs->rescue_lock); - bio_list_add(&bs->rescue_list, bio); - queue_work(bs->rescue_workqueue, &bs->rescue_work); - spin_unlock(&bs->rescue_lock); } } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 063c43d83b72..0536658c9d40 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf) !conf->barrier || (atomic_read(&conf->nr_pending) && current->bio_list && - !bio_list_empty(current->bio_list)), + (!bio_list_empty(¤t->bio_list[0]) || + !bio_list_empty(¤t->bio_list[1]))), conf->resync_lock); conf->nr_waiting--; if (!conf->nr_waiting) From d1c4e9bf73e739b937ddd9dc4cf0f6de2e6117da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Mon, 20 Feb 2017 14:50:23 -0500 Subject: [PATCH 196/384] platform/x86: asus-wmi: Remove quirk_no_rfkill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the detection introduced in the previous patches, we don't need these static DMI-based quirks anymore. This reverts the following commits: 56a37a72002b "asus-wmi: Add quirk_no_rfkill_wapf4 for the Asus X456UA" a961a285b479 "asus-wmi: Add quirk_no_rfkill_wapf4 for the Asus X456UF" 6b7ff2af5286 "asus-wmi: Add quirk_no_rfkill for the Asus Z550MA" 02db9ff7af18 "asus-wmi: Add quirk_no_rfkill for the Asus U303LB" 2d735244b798 "asus-wmi: Add quirk_no_rfkill for the Asus N552VW" a977e59c0c67 "asus-wmi: Create quirk for airplane_mode LED" Signed-off-by: João Paulo Rechi Vita Signed-off-by: Andy Shevchenko [dvhart: minor commit message corrections] Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/asus-nb-wmi.c | 49 ++---------------------------- drivers/platform/x86/asus-wmi.c | 5 +-- drivers/platform/x86/asus-wmi.h | 1 - 3 files changed, 3 insertions(+), 52 deletions(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 5be4783e40d4..dea98ffb6f60 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -103,15 +103,6 @@ static struct quirk_entry quirk_asus_x200ca = { .wapf = 2, }; -static struct quirk_entry quirk_no_rfkill = { - .no_rfkill = true, -}; - -static struct quirk_entry quirk_no_rfkill_wapf4 = { - .wapf = 4, - .no_rfkill = true, -}; - static struct quirk_entry quirk_asus_ux303ub = { .wmi_backlight_native = true, }; @@ -194,7 +185,7 @@ static const struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"), }, - .driver_data = &quirk_no_rfkill_wapf4, + .driver_data = &quirk_asus_wapf4, }, { .callback = dmi_matched, @@ -203,7 +194,7 @@ static const struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"), }, - .driver_data = &quirk_no_rfkill_wapf4, + .driver_data = &quirk_asus_wapf4, }, { .callback = dmi_matched, @@ -367,42 +358,6 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_x200ca, }, - { - .callback = dmi_matched, - .ident = "ASUSTeK COMPUTER INC. X555UB", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X555UB"), - }, - .driver_data = &quirk_no_rfkill, - }, - { - .callback = dmi_matched, - .ident = "ASUSTeK COMPUTER INC. N552VW", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "N552VW"), - }, - .driver_data = &quirk_no_rfkill, - }, - { - .callback = dmi_matched, - .ident = "ASUSTeK COMPUTER INC. U303LB", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "U303LB"), - }, - .driver_data = &quirk_no_rfkill, - }, - { - .callback = dmi_matched, - .ident = "ASUSTeK COMPUTER INC. Z550MA", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "Z550MA"), - }, - .driver_data = &quirk_no_rfkill, - }, { .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. UX303UB", diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 8499d3ae4257..8fe5890bf539 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2111,10 +2111,7 @@ static int asus_wmi_add(struct platform_device *pdev) if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) asus->driver->wlan_ctrl_by_user = 1; - if (asus->driver->wlan_ctrl_by_user && ashs_present()) - asus->driver->quirks->no_rfkill = 1; - - if (!asus->driver->quirks->no_rfkill) { + if (!(asus->driver->wlan_ctrl_by_user && ashs_present())) { err = asus_wmi_rfkill_init(asus); if (err) goto fail_rfkill; diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index fdff626c3b51..c9589d9342bb 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -39,7 +39,6 @@ struct key_entry; struct asus_wmi; struct quirk_entry { - bool no_rfkill; bool hotplug_wireless; bool scalar_panel_brightness; bool store_backlight_power; From ecd052250b51c8cee4d9720adea05690dfc77c64 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Thu, 9 Mar 2017 13:28:33 -0500 Subject: [PATCH 197/384] net: ethernet: aquantia: call set_irq_affinity_hint before free_irq When a network interface controlled by the aquantia ethernet driver is brought down a warning is output in dmesg (see below). The problem is that aq_pci_func_free_irqs() is calling free_irq() before it is calling irq_set_affinity_hint(). WARNING: CPU: 4 PID: 10068 at kernel/irq/manage.c:1503 __free_irq+0x24d/0x2b0 Call Trace: dump_stack+0x63/0x87 __warn+0xd1/0xf0 warn_slowpath_null+0x1d/0x20 __free_irq+0x24d/0x2b0 free_irq+0x39/0x90 aq_pci_func_free_irqs+0x52/0xa0 [atlantic] aq_nic_stop+0xca/0xd0 [atlantic] aq_ndev_close+0x1d/0x40 [atlantic] __dev_close_many+0x99/0x100 __dev_close+0x67/0xb0 Fixes: 36a4a50f4048 ("net: ethernet: aquantia: switch to pci_alloc_irq_vectors") Cc: Christoph Hellwig Cc: Pavel Belous Signed-off-by: David Arcari Tested-by: Pavel Belous Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 581de71a958a..4c6c882c6a1c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -213,9 +213,9 @@ void aq_pci_func_free_irqs(struct aq_pci_func_s *self) if (!((1U << i) & self->msix_entry_mask)) continue; - free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]); if (pdev->msix_enabled) irq_set_affinity_hint(pci_irq_vector(pdev, i), NULL); + free_irq(pci_irq_vector(pdev, i), self->aq_vec[i]); self->msix_entry_mask &= ~(1U << i); } } From 7ce101246655935b014b11d81f815342921f5654 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 9 Mar 2017 14:58:29 -0800 Subject: [PATCH 198/384] netvsc: handle select_queue when device is being removed Move the send indirection table from the inner device (netvsc) to the network device context. It is possible that netvsc_device is not present (remove in progress). This solves potential use after free issues when packet is being created during MTU change, shutdown, or queue count changes. Fixes: d8e18ee0fa96 ("netvsc: enhance transmit select_queue") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 ++- drivers/net/hyperv/netvsc.c | 8 ++------ drivers/net/hyperv/netvsc_drv.c | 11 +++-------- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d3e73ac158ae..f9f3dba7a588 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -700,6 +700,8 @@ struct net_device_context { u32 tx_checksum_mask; + u32 tx_send_table[VRSS_SEND_TAB_SIZE]; + /* Ethtool settings */ u8 duplex; u32 speed; @@ -757,7 +759,6 @@ struct netvsc_device { struct nvsp_message revoke_packet; - u32 send_table[VRSS_SEND_TAB_SIZE]; u32 max_chn; u32 num_chn; spinlock_t sc_lock; /* Protects num_sc_offered variable */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d35ebd993b38..4c1d8cca247b 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1136,15 +1136,11 @@ static void netvsc_receive(struct net_device *ndev, static void netvsc_send_table(struct hv_device *hdev, struct nvsp_message *nvmsg) { - struct netvsc_device *nvscdev; struct net_device *ndev = hv_get_drvdata(hdev); + struct net_device_context *net_device_ctx = netdev_priv(ndev); int i; u32 count, *tab; - nvscdev = get_outbound_net_device(hdev); - if (!nvscdev) - return; - count = nvmsg->msg.v5_msg.send_table.count; if (count != VRSS_SEND_TAB_SIZE) { netdev_err(ndev, "Received wrong send-table size:%u\n", count); @@ -1155,7 +1151,7 @@ static void netvsc_send_table(struct hv_device *hdev, nvmsg->msg.v5_msg.send_table.offset); for (i = 0; i < count; i++) - nvscdev->send_table[i] = tab[i]; + net_device_ctx->tx_send_table[i] = tab[i]; } static void netvsc_send_vf(struct net_device_context *net_device_ctx, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index bc05c895d958..5ede87f30463 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -206,17 +206,15 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct netvsc_device *nvsc_dev = net_device_ctx->nvdev; + unsigned int num_tx_queues = ndev->real_num_tx_queues; struct sock *sk = skb->sk; int q_idx = sk_tx_queue_get(sk); - if (q_idx < 0 || skb->ooo_okay || - q_idx >= ndev->real_num_tx_queues) { + if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) { u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE); int new_idx; - new_idx = nvsc_dev->send_table[hash] - % nvsc_dev->num_chn; + new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues; if (q_idx != new_idx && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) @@ -225,9 +223,6 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, q_idx = new_idx; } - if (unlikely(!nvsc_dev->chan_table[q_idx].channel)) - q_idx = 0; - return q_idx; } From 88a7cddce2506b0b6c06a9f6e51379d0d275353b Mon Sep 17 00:00:00 2001 From: Neil Jerram Date: Fri, 10 Mar 2017 12:24:57 +0000 Subject: [PATCH 199/384] Make IP 'forwarding' doc more precise It wasn't clear if the 'forwarding' setting needs to be enabled on the interface that packets are received from, or on the interface that packets are forwarded to, or both. In fact (according to my code reading) the setting is relevant on the interface that packets are received from, so this change updates the doc to say that. Signed-off-by: Neil Jerram Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index fc73eeb7b3b8..ab0230461377 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1006,7 +1006,8 @@ accept_redirects - BOOLEAN FALSE (router) forwarding - BOOLEAN - Enable IP forwarding on this interface. + Enable IP forwarding on this interface. This controls whether packets + received _on_ this interface can be forwarded. mc_forwarding - BOOLEAN Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE From 52491c7607c5527138095edf44c53169dc1ddb82 Mon Sep 17 00:00:00 2001 From: Etienne Noss Date: Fri, 10 Mar 2017 16:55:32 +0100 Subject: [PATCH 200/384] act_connmark: avoid crashing on malformed nlattrs with null parms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcf_connmark_init does not check in its configuration if TCA_CONNMARK_PARMS is set, resulting in a null pointer dereference when trying to access it. [501099.043007] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [501099.043039] IP: [] tcf_connmark_init+0x8b/0x180 [act_connmark] ... [501099.044334] Call Trace: [501099.044345] [] ? tcf_action_init_1+0x198/0x1b0 [501099.044363] [] ? tcf_action_init+0xb0/0x120 [501099.044380] [] ? tcf_exts_validate+0xc4/0x110 [501099.044398] [] ? u32_set_parms+0xa7/0x270 [cls_u32] [501099.044417] [] ? u32_change+0x680/0x87b [cls_u32] [501099.044436] [] ? tc_ctl_tfilter+0x4dd/0x8a0 [501099.044454] [] ? security_capable+0x41/0x60 [501099.044471] [] ? rtnetlink_rcv_msg+0xe1/0x220 [501099.044490] [] ? rtnl_newlink+0x870/0x870 [501099.044507] [] ? netlink_rcv_skb+0xa1/0xc0 [501099.044524] [] ? rtnetlink_rcv+0x24/0x30 [501099.044541] [] ? netlink_unicast+0x184/0x230 [501099.044558] [] ? netlink_sendmsg+0x2f8/0x3b0 [501099.044576] [] ? sock_sendmsg+0x30/0x40 [501099.044592] [] ? SYSC_sendto+0xd3/0x150 [501099.044608] [] ? __do_page_fault+0x2d1/0x510 [501099.044626] [] ? system_call_fast_compare_end+0xc/0x9b Fixes: 22a5dc0e5e3e ("net: sched: Introduce connmark action") Signed-off-by: Étienne Noss Signed-off-by: Victorien Molle Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_connmark.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index ab8062909962..f9bb43c25697 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (ret < 0) return ret; + if (!tb[TCA_CONNMARK_PARMS]) + return -EINVAL; + parm = nla_data(tb[TCA_CONNMARK_PARMS]); if (!tcf_hash_check(tn, parm->index, a, bind)) { From e37791ec1ad785b59022ae211f63a16189bacebf Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 10 Mar 2017 09:46:15 -0800 Subject: [PATCH 201/384] mpls: Send route delete notifications when router module is unloaded When the mpls_router module is unloaded, mpls routes are deleted but notifications are not sent to userspace leaving userspace caches out of sync. Add the call to mpls_notify_route in mpls_net_exit as routes are freed. Fixes: 0189197f44160 ("mpls: Basic routing support") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 3818686182b2..a1477989ed0b 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2028,6 +2028,7 @@ static void mpls_net_exit(struct net *net) for (index = 0; index < platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); RCU_INIT_POINTER(platform_label[index], NULL); + mpls_notify_route(net, index, rt, NULL, NULL); mpls_rt_free(rt); } rtnl_unlock(); From b17075d5c1988b83f840d272c795ac17d57ce804 Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Fri, 10 Mar 2017 21:36:22 +0000 Subject: [PATCH 202/384] xen-netback: fix race condition on XenBus disconnect In some cases during XenBus disconnect event handling and subsequent queue resource release there may be some TX handlers active on other processors. Use RCU in order to synchronize with them. Signed-off-by: Igor Druzhinin Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 26 +++++++++++++++++--------- drivers/net/xen-netback/netback.c | 2 +- drivers/net/xen-netback/xenbus.c | 20 ++++++++++---------- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 829b26cd4549..8397f6c92451 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -165,13 +165,17 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; - unsigned int num_queues = vif->num_queues; + unsigned int num_queues; u16 index; struct xenvif_rx_cb *cb; BUG_ON(skb->dev != dev); - /* Drop the packet if queues are not set up */ + /* Drop the packet if queues are not set up. + * This handler should be called inside an RCU read section + * so we don't need to enter it here explicitly. + */ + num_queues = READ_ONCE(vif->num_queues); if (num_queues < 1) goto drop; @@ -222,18 +226,18 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; + unsigned int num_queues; u64 rx_bytes = 0; u64 rx_packets = 0; u64 tx_bytes = 0; u64 tx_packets = 0; unsigned int index; - spin_lock(&vif->lock); - if (vif->queues == NULL) - goto out; + rcu_read_lock(); + num_queues = READ_ONCE(vif->num_queues); /* Aggregate tx and rx stats from each queue */ - for (index = 0; index < vif->num_queues; ++index) { + for (index = 0; index < num_queues; ++index) { queue = &vif->queues[index]; rx_bytes += queue->stats.rx_bytes; rx_packets += queue->stats.rx_packets; @@ -241,8 +245,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev) tx_packets += queue->stats.tx_packets; } -out: - spin_unlock(&vif->lock); + rcu_read_unlock(); vif->dev->stats.rx_bytes = rx_bytes; vif->dev->stats.rx_packets = rx_packets; @@ -378,10 +381,13 @@ static void xenvif_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 * data) { struct xenvif *vif = netdev_priv(dev); - unsigned int num_queues = vif->num_queues; + unsigned int num_queues; int i; unsigned int queue_index; + rcu_read_lock(); + num_queues = READ_ONCE(vif->num_queues); + for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) { unsigned long accum = 0; for (queue_index = 0; queue_index < num_queues; ++queue_index) { @@ -390,6 +396,8 @@ static void xenvif_get_ethtool_stats(struct net_device *dev, } data[i] = accum; } + + rcu_read_unlock(); } static void xenvif_get_strings(struct net_device *dev, u32 stringset, u8 * data) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f9bcf4a665bc..602d408fa25e 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -214,7 +214,7 @@ static void xenvif_fatal_tx_err(struct xenvif *vif) netdev_err(vif->dev, "fatal error; disabling device\n"); vif->disabled = true; /* Disable the vif from queue 0's kthread */ - if (vif->queues) + if (vif->num_queues) xenvif_kick_thread(&vif->queues[0]); } diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index d2d7cd9145b1..a56d3eab35dd 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -495,26 +495,26 @@ static void backend_disconnect(struct backend_info *be) struct xenvif *vif = be->vif; if (vif) { + unsigned int num_queues = vif->num_queues; unsigned int queue_index; - struct xenvif_queue *queues; xen_unregister_watchers(vif); #ifdef CONFIG_DEBUG_FS xenvif_debugfs_delif(vif); #endif /* CONFIG_DEBUG_FS */ xenvif_disconnect_data(vif); - for (queue_index = 0; - queue_index < vif->num_queues; - ++queue_index) + + /* At this point some of the handlers may still be active + * so we need to have additional synchronization here. + */ + vif->num_queues = 0; + synchronize_net(); + + for (queue_index = 0; queue_index < num_queues; ++queue_index) xenvif_deinit_queue(&vif->queues[queue_index]); - spin_lock(&vif->lock); - queues = vif->queues; - vif->num_queues = 0; + vfree(vif->queues); vif->queues = NULL; - spin_unlock(&vif->lock); - - vfree(queues); xenvif_disconnect_ctrl(vif); } From 79099aab38c8f5c746748b066ae74ba984fe2cc8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 10 Mar 2017 14:11:39 -0800 Subject: [PATCH 203/384] mpls: Do not decrement alive counter for unregister events Multipath routes can be rendered usesless when a device in one of the paths is deleted. For example: $ ip -f mpls ro ls 100 nexthop as to 200 via inet 172.16.2.2 dev virt12 nexthop as to 300 via inet 172.16.3.2 dev br0 101 nexthop as to 201 via inet6 2000:2::2 dev virt12 nexthop as to 301 via inet6 2000:3::2 dev br0 $ ip li del br0 When br0 is deleted the other hop is not considered in mpls_select_multipath because of the alive check -- rt_nhn_alive is 0. rt_nhn_alive is decremented once in mpls_ifdown when the device is taken down (NETDEV_DOWN) and again when it is deleted (NETDEV_UNREGISTER). For a 2 hop route, deleting one device drops the alive count to 0. Since devices are taken down before unregistering, the decrement on NETDEV_UNREGISTER is redundant. Fixes: c89359a42e2a4 ("mpls: support for dead routes") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index a1477989ed0b..33211f9a2656 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1288,7 +1288,8 @@ static void mpls_ifdown(struct net_device *dev, int event) /* fall through */ case NETDEV_CHANGE: nh->nh_flags |= RTNH_F_LINKDOWN; - ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; + if (event != NETDEV_UNREGISTER) + ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1; break; } if (event == NETDEV_UNREGISTER) From 1da8ac7c49fb2879ba95006d8bd1095e6870ea1a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Mar 2017 22:05:55 -0800 Subject: [PATCH 204/384] selftests/bpf: fix broken build Recent merge of 'linux-kselftest-4.11-rc1' tree broke bpf test build. None of the tests were building and test_verifier.c had tons of compiler errors. Fix it and add #ifdef CAP_IS_SUPPORTED to support old versions of libcap. Tested on centos 6.8 and 7 Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Tested-by: Daniel Borkmann Signed-off-by: David S. Miller --- tools/include/uapi/linux/bpf_perf_event.h | 18 ++++++++++++++++++ tools/testing/selftests/bpf/Makefile | 4 +++- tools/testing/selftests/bpf/test_verifier.c | 4 ++++ 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 tools/include/uapi/linux/bpf_perf_event.h diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h new file mode 100644 index 000000000000..067427259820 --- /dev/null +++ b/tools/include/uapi/linux/bpf_perf_event.h @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ +#define _UAPI__LINUX_BPF_PERF_EVENT_H__ + +#include +#include + +struct bpf_perf_event_data { + struct pt_regs regs; + __u64 sample_period; +}; + +#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4b498265dae6..67531f47781b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -1,12 +1,14 @@ LIBDIR := ../../../lib BPFOBJ := $(LIBDIR)/bpf/bpf.o -CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) +CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ) TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map TEST_PROGS := test_kmod.sh +all: $(TEST_GEN_PROGS) + .PHONY: all clean force # force a rebuild of BPFOBJ when its dependencies are updated diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e1f5b9eea1e8..d1555e4240c0 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -8,6 +8,8 @@ * License as published by the Free Software Foundation. */ +#include +#include #include #include #include @@ -4583,10 +4585,12 @@ static bool is_admin(void) cap_flag_value_t sysadmin = CAP_CLEAR; const cap_value_t cap_val = CAP_SYS_ADMIN; +#ifdef CAP_IS_SUPPORTED if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) { perror("cap_get_flag"); return false; } +#endif caps = cap_get_proc(); if (!caps) { perror("cap_get_proc"); From 65869a47f3488253f5fd88cc4f14e0a4e2601a55 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 Mar 2017 16:55:49 +0100 Subject: [PATCH 205/384] bpf: improve read-only handling Improve bpf_{prog,jit_binary}_{un,}lock_ro() by throwing a one-time warning in case of an error when the image couldn't be set read-only, and also mark struct bpf_prog as locked when bpf_prog_lock_ro() was called. Reason for the latter is that bpf_prog_unlock_ro() is called from various places including error paths, and we shouldn't mess with page attributes when really not needed. For bpf_jit_binary_unlock_ro() this is not needed as jited flag implicitly indicates this, thus for archs with ARCH_HAS_SET_MEMORY we're guaranteed to have a previously locked image. Overall, this should also help us to identify any further potential issues with set_memory_*() helpers. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 0c167fdee5f7..fbf7b39e8103 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -409,6 +409,7 @@ struct bpf_prog { u16 pages; /* Number of allocated pages */ kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ + locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ @@ -554,22 +555,29 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) #ifdef CONFIG_ARCH_HAS_SET_MEMORY static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { - set_memory_ro((unsigned long)fp, fp->pages); + fp->locked = 1; + WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages)); } static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { - set_memory_rw((unsigned long)fp, fp->pages); + if (fp->locked) { + WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); + /* In case set_memory_rw() fails, we want to be the first + * to crash here instead of some random place later on. + */ + fp->locked = 0; + } } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { - set_memory_ro((unsigned long)hdr, hdr->pages); + WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages)); } static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { - set_memory_rw((unsigned long)hdr, hdr->pages); + WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); } #else static inline void bpf_prog_lock_ro(struct bpf_prog *fp) From 80354c29025833acd72ddac1ffa21c6cb50128cd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 12 Mar 2017 17:07:44 +0200 Subject: [PATCH 206/384] x86/platform/intel-mid: Correct MSI IRQ line for watchdog device The interrupt line used for the watchdog is 12, according to the official Intel Edison BSP code. And indeed after fixing it we start getting an interrupt and thus the watchdog starts working again: [ 191.699951] Kernel panic - not syncing: Kernel Watchdog Signed-off-by: Andy Shevchenko Cc: Borislav Petkov Cc: David Cohen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 78a3bb9e408b ("x86: intel-mid: add watchdog platform code for Merrifield") Link: http://lkml.kernel.org/r/20170312150744.45493-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 86edd1e941eb..9e304e2ea4f5 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -19,7 +19,7 @@ #include #include -#define TANGIER_EXT_TIMER0_MSI 15 +#define TANGIER_EXT_TIMER0_MSI 12 static struct platform_device wdt_dev = { .name = "intel_mid_wdt", From 9fa1d7537242bd580ffa99c4725a0407096aad26 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Feb 2017 10:11:45 +0200 Subject: [PATCH 207/384] drm/omap: fix dmabuf mmap for dma_alloc'ed buffers omap_gem_dmabuf_mmap() returns an error (with a WARN) when called for a buffer which is allocated with dma_alloc_*(). This prevents dmabuf mmap from working on SoCs without DMM, e.g. AM4 and OMAP3. I could not find any reason for omap_gem_dmabuf_mmap() rejecting such buffers, and just removing the if() fixes the limitation. Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c index af267c35d813..ee5883f59be5 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c +++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c @@ -147,9 +147,6 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer, struct drm_gem_object *obj = buffer->priv; int ret = 0; - if (WARN_ON(!obj->filp)) - return -EINVAL; - ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma); if (ret < 0) return ret; From 337ba7fbf0fbc12242359c4af114878618b90951 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sat, 25 Feb 2017 16:29:50 +0300 Subject: [PATCH 208/384] uapi: fix drm/omap_drm.h userspace compilation errors Consistently use types from linux/types.h like in other uapi drm/*_drm.h header files to fix the following drm/omap_drm.h userspace compilation errors: /usr/include/drm/omap_drm.h:36:2: error: unknown type name 'uint64_t' uint64_t param; /* in */ /usr/include/drm/omap_drm.h:37:2: error: unknown type name 'uint64_t' uint64_t value; /* in (set_param), out (get_param) */ /usr/include/drm/omap_drm.h:56:2: error: unknown type name 'uint32_t' uint32_t bytes; /* (for non-tiled formats) */ /usr/include/drm/omap_drm.h:58:3: error: unknown type name 'uint16_t' uint16_t width; /usr/include/drm/omap_drm.h:59:3: error: unknown type name 'uint16_t' uint16_t height; /usr/include/drm/omap_drm.h:65:2: error: unknown type name 'uint32_t' uint32_t flags; /* in */ /usr/include/drm/omap_drm.h:66:2: error: unknown type name 'uint32_t' uint32_t handle; /* out */ /usr/include/drm/omap_drm.h:67:2: error: unknown type name 'uint32_t' uint32_t __pad; /usr/include/drm/omap_drm.h:77:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:78:2: error: unknown type name 'uint32_t' uint32_t op; /* mask of omap_gem_op (in) */ /usr/include/drm/omap_drm.h:82:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:83:2: error: unknown type name 'uint32_t' uint32_t op; /* mask of omap_gem_op (in) */ /usr/include/drm/omap_drm.h:88:2: error: unknown type name 'uint32_t' uint32_t nregions; /usr/include/drm/omap_drm.h:89:2: error: unknown type name 'uint32_t' uint32_t __pad; /usr/include/drm/omap_drm.h:93:2: error: unknown type name 'uint32_t' uint32_t handle; /* buffer handle (in) */ /usr/include/drm/omap_drm.h:94:2: error: unknown type name 'uint32_t' uint32_t pad; /usr/include/drm/omap_drm.h:95:2: error: unknown type name 'uint64_t' uint64_t offset; /* mmap offset (out) */ /usr/include/drm/omap_drm.h:102:2: error: unknown type name 'uint32_t' uint32_t size; /* virtual size for mmap'ing (out) */ /usr/include/drm/omap_drm.h:103:2: error: unknown type name 'uint32_t' uint32_t __pad; Fixes: ef6503e89194 ("drm: Kbuild: add omap_drm.h to the installed headers") Signed-off-by: Dmitry V. Levin Signed-off-by: Tomi Valkeinen --- include/uapi/drm/omap_drm.h | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/uapi/drm/omap_drm.h b/include/uapi/drm/omap_drm.h index 407cb55df6ac..7fb97863c945 100644 --- a/include/uapi/drm/omap_drm.h +++ b/include/uapi/drm/omap_drm.h @@ -33,8 +33,8 @@ extern "C" { #define OMAP_PARAM_CHIPSET_ID 1 /* ie. 0x3430, 0x4430, etc */ struct drm_omap_param { - uint64_t param; /* in */ - uint64_t value; /* in (set_param), out (get_param) */ + __u64 param; /* in */ + __u64 value; /* in (set_param), out (get_param) */ }; #define OMAP_BO_SCANOUT 0x00000001 /* scanout capable (phys contiguous) */ @@ -53,18 +53,18 @@ struct drm_omap_param { #define OMAP_BO_TILED (OMAP_BO_TILED_8 | OMAP_BO_TILED_16 | OMAP_BO_TILED_32) union omap_gem_size { - uint32_t bytes; /* (for non-tiled formats) */ + __u32 bytes; /* (for non-tiled formats) */ struct { - uint16_t width; - uint16_t height; + __u16 width; + __u16 height; } tiled; /* (for tiled formats) */ }; struct drm_omap_gem_new { union omap_gem_size size; /* in */ - uint32_t flags; /* in */ - uint32_t handle; /* out */ - uint32_t __pad; + __u32 flags; /* in */ + __u32 handle; /* out */ + __u32 __pad; }; /* mask of operations: */ @@ -74,33 +74,33 @@ enum omap_gem_op { }; struct drm_omap_gem_cpu_prep { - uint32_t handle; /* buffer handle (in) */ - uint32_t op; /* mask of omap_gem_op (in) */ + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ }; struct drm_omap_gem_cpu_fini { - uint32_t handle; /* buffer handle (in) */ - uint32_t op; /* mask of omap_gem_op (in) */ + __u32 handle; /* buffer handle (in) */ + __u32 op; /* mask of omap_gem_op (in) */ /* TODO maybe here we pass down info about what regions are touched * by sw so we can be clever about cache ops? For now a placeholder, * set to zero and we just do full buffer flush.. */ - uint32_t nregions; - uint32_t __pad; + __u32 nregions; + __u32 __pad; }; struct drm_omap_gem_info { - uint32_t handle; /* buffer handle (in) */ - uint32_t pad; - uint64_t offset; /* mmap offset (out) */ + __u32 handle; /* buffer handle (in) */ + __u32 pad; + __u64 offset; /* mmap offset (out) */ /* note: in case of tiled buffers, the user virtual size can be * different from the physical size (ie. how many pages are needed * to back the object) which is returned in DRM_IOCTL_GEM_OPEN.. * This size here is the one that should be used if you want to * mmap() the buffer: */ - uint32_t size; /* virtual size for mmap'ing (out) */ - uint32_t __pad; + __u32 size; /* virtual size for mmap'ing (out) */ + __u32 __pad; }; #define DRM_OMAP_GET_PARAM 0x00 From 0067d4b020ea07a58540acb2c5fcd3364bf326e0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 13 Mar 2017 16:10:11 +0200 Subject: [PATCH 209/384] blk-mq: Fix tagset reinit in the presence of cpu hot-unplug In case cpu was unplugged, we need to make sure not to assume that the tags for that cpu are still allocated. so check for null tags when reinitializing a tagset. Reported-by: Yi Zhang Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index e48bc2c72615..9d97bfc4d465 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -295,6 +295,9 @@ int blk_mq_reinit_tagset(struct blk_mq_tag_set *set) for (i = 0; i < set->nr_hw_queues; i++) { struct blk_mq_tags *tags = set->tags[i]; + if (!tags) + continue; + for (j = 0; j < tags->nr_tags; j++) { if (!tags->static_rqs[j]) continue; From 4a3a485b1ed0e109718cc8c9d094fa0f552de9b2 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Fri, 10 Mar 2017 12:09:49 -0800 Subject: [PATCH 210/384] writeback: fix memory leak in wb_queue_work() When WB_registered flag is not set, wb_queue_work() skips queuing the work, but does not perform the necessary clean up. In particular, if work->auto_free is true, it should free the memory. The leak condition can be reprouced by following these steps: mount /dev/sdb /mnt/sdb /* In qemu console: device_del sdb */ umount /dev/sdb Above will result in a wb_queue_work() call on an unregistered wb and thus leak memory. Reported-by: John Sperbeck Signed-off-by: Tahsin Erdogan Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- fs/fs-writeback.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ef600591d96f..63ee2940775c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -173,19 +173,33 @@ static void wb_wakeup(struct bdi_writeback *wb) spin_unlock_bh(&wb->work_lock); } +static void finish_writeback_work(struct bdi_writeback *wb, + struct wb_writeback_work *work) +{ + struct wb_completion *done = work->done; + + if (work->auto_free) + kfree(work); + if (done && atomic_dec_and_test(&done->cnt)) + wake_up_all(&wb->bdi->wb_waitq); +} + static void wb_queue_work(struct bdi_writeback *wb, struct wb_writeback_work *work) { trace_writeback_queue(wb, work); - spin_lock_bh(&wb->work_lock); - if (!test_bit(WB_registered, &wb->state)) - goto out_unlock; if (work->done) atomic_inc(&work->done->cnt); - list_add_tail(&work->list, &wb->work_list); - mod_delayed_work(bdi_wq, &wb->dwork, 0); -out_unlock: + + spin_lock_bh(&wb->work_lock); + + if (test_bit(WB_registered, &wb->state)) { + list_add_tail(&work->list, &wb->work_list); + mod_delayed_work(bdi_wq, &wb->dwork, 0); + } else + finish_writeback_work(wb, work); + spin_unlock_bh(&wb->work_lock); } @@ -1873,16 +1887,9 @@ static long wb_do_writeback(struct bdi_writeback *wb) set_bit(WB_writeback_running, &wb->state); while ((work = get_next_work_item(wb)) != NULL) { - struct wb_completion *done = work->done; - trace_writeback_exec(wb, work); - wrote += wb_writeback(wb, work); - - if (work->auto_free) - kfree(work); - if (done && atomic_dec_and_test(&done->cnt)) - wake_up_all(&wb->bdi->wb_waitq); + finish_writeback_work(wb, work); } /* From 6f1f622019f95d79d6e2f8bb3781144ad0aff75f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 6 Mar 2017 17:49:42 +0800 Subject: [PATCH 211/384] nfs4: fix a typo of NFS_ATTR_FATTR_GROUP_NAME This typo cause a memory leak, and a bad client's group id. unreferenced object 0xffff96d8073998d0 (size 8): comm "kworker/0:3", pid 34224, jiffies 4295361338 (age 761.752s) hex dump (first 8 bytes): 30 00 39 07 d8 96 ff ff 0.9..... backtrace: [] kmemleak_alloc+0x4a/0xa0 [] __kmalloc+0x140/0x220 [] xdr_stream_decode_string_dup+0x7c/0x110 [sunrpc] [] decode_getfattr_attrs+0x940/0x1630 [nfsv4] [] decode_getfattr_generic.constprop.108+0x9b/0x100 [nfsv4] [] nfs4_xdr_dec_open+0xcf/0x100 [nfsv4] [] rpcauth_unwrap_resp+0xa7/0xe0 [sunrpc] [] call_decode+0x1e0/0x810 [sunrpc] [] __rpc_execute+0x8d/0x420 [sunrpc] [] rpc_async_schedule+0x12/0x20 [sunrpc] [] process_one_work+0x197/0x430 [] worker_thread+0x4e/0x4a0 [] kthread+0x101/0x140 [] ret_from_fork+0x2c/0x40 [] 0xffffffffffffffff Fixes: 686a816ab6 ("NFSv4: Clean up owner/group attribute decode") Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f0369e362753..80ce289eea05 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3942,7 +3942,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, if (len <= 0) goto out; dprintk("%s: name=%s\n", __func__, group_name->data); - return NFS_ATTR_FATTR_OWNER_NAME; + return NFS_ATTR_FATTR_GROUP_NAME; } else { len = xdr_stream_decode_opaque_inline(xdr, (void **)&p, XDR_MAX_NETOBJ); From 366a1569bff3fe14abfdf9285e31e05e091745f5 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 6 Mar 2017 22:29:14 +0800 Subject: [PATCH 212/384] NFSv4: fix a reference leak caused WARNING messages Because nfs4_opendata_access() has close the state when access is denied, so the state isn't leak. Rather than revert the commit a974deee47, I'd like clean the strange state close. [ 1615.094218] ------------[ cut here ]------------ [ 1615.094607] WARNING: CPU: 0 PID: 23702 at lib/list_debug.c:31 __list_add_valid+0x8e/0xa0 [ 1615.094913] list_add double add: new=ffff9d7901d9f608, prev=ffff9d7901d9f608, next=ffff9d7901ee8dd0. [ 1615.095458] Modules linked in: nfsv4(E) nfs(E) nfsd(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock f2fs snd_seq_midi snd_seq_midi_event fscrypto coretemp ppdev crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_rapl_perf vmw_balloon snd_ens1371 joydev gameport snd_ac97_codec ac97_bus snd_seq snd_pcm snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp auth_rpcgss nfs_acl lockd(E) grace sunrpc(E) xfs libcrc32c vmwgfx drm_kms_helper ttm drm crc32c_intel mptspi e1000 serio_raw scsi_transport_spi mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: nfs] [ 1615.097663] CPU: 0 PID: 23702 Comm: fstest Tainted: G W E 4.11.0-rc1+ #517 [ 1615.098015] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 1615.098807] Call Trace: [ 1615.099183] dump_stack+0x63/0x86 [ 1615.099578] __warn+0xcb/0xf0 [ 1615.099967] warn_slowpath_fmt+0x5f/0x80 [ 1615.100370] __list_add_valid+0x8e/0xa0 [ 1615.100760] nfs4_put_state_owner+0x75/0xc0 [nfsv4] [ 1615.101136] __nfs4_close+0x109/0x140 [nfsv4] [ 1615.101524] nfs4_close_state+0x15/0x20 [nfsv4] [ 1615.101949] nfs4_close_context+0x21/0x30 [nfsv4] [ 1615.102691] __put_nfs_open_context+0xb8/0x110 [nfs] [ 1615.103155] put_nfs_open_context+0x10/0x20 [nfs] [ 1615.103586] nfs4_file_open+0x13b/0x260 [nfsv4] [ 1615.103978] do_dentry_open+0x20a/0x2f0 [ 1615.104369] ? nfs4_copy_file_range+0x30/0x30 [nfsv4] [ 1615.104739] vfs_open+0x4c/0x70 [ 1615.105106] ? may_open+0x5a/0x100 [ 1615.105469] path_openat+0x623/0x1420 [ 1615.105823] do_filp_open+0x91/0x100 [ 1615.106174] ? __alloc_fd+0x3f/0x170 [ 1615.106568] do_sys_open+0x130/0x220 [ 1615.106920] ? __put_cred+0x3d/0x50 [ 1615.107256] SyS_open+0x1e/0x20 [ 1615.107588] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 1615.107922] RIP: 0033:0x7fab599069b0 [ 1615.108247] RSP: 002b:00007ffcf0600d78 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 [ 1615.108575] RAX: ffffffffffffffda RBX: 00007fab59bcfae0 RCX: 00007fab599069b0 [ 1615.108896] RDX: 0000000000000200 RSI: 0000000000000200 RDI: 00007ffcf060255e [ 1615.109211] RBP: 0000000000040010 R08: 0000000000000000 R09: 0000000000000016 [ 1615.109515] R10: 00000000000006a1 R11: 0000000000000246 R12: 0000000000041000 [ 1615.109806] R13: 0000000000040010 R14: 0000000000001000 R15: 0000000000002710 [ 1615.110152] ---[ end trace 96ed63b1306bf2f3 ]--- Fixes: a974deee47 ("NFSv4: Fix memory and state leak in...") Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1b183686c6d4..c1f5369cd339 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2258,8 +2258,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred, if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) return 0; - /* even though OPEN succeeded, access is denied. Close the file */ - nfs4_close_state(state, fmode); return -EACCES; } From aac66bf5f916f645bd57029490a72c3f91f2c274 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 6 Mar 2017 23:54:01 +0000 Subject: [PATCH 213/384] drm/i915: use correct node for handling cache domain eviction It looks like we were incorrectly comparing vma->node against itself instead of the target node, when evicting for a node on systems where we need guard pages between regions with different cache domains. As a consequence we can end up trying to needlessly evict neighbouring nodes, even if they have the same cache domain, and if they were pinned we would fail the eviction. Fixes: 625d988acc28 ("drm/i915: Extract reserving space in the GTT to a helper") Signed-off-by: Matthew Auld Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170306235414.23407-3-matthew.auld@intel.com Signed-off-by: Chris Wilson (cherry picked from commit fe65cbdbc97929e4a522716ed279a36783656142) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_evict.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index c181b1bb3d2c..3be2503aa042 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -293,12 +293,12 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, * those as well to make room for our guard pages. */ if (check_color) { - if (vma->node.start + vma->node.size == node->start) { - if (vma->node.color == node->color) + if (node->start + node->size == target->start) { + if (node->color == target->color) continue; } - if (vma->node.start == node->start + node->size) { - if (vma->node.color == node->color) + if (node->start == target->start + target->size) { + if (node->color == target->color) continue; } } From 3a0d137de035cc8c70194d9988ded61825b5ff8a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 8 Mar 2017 13:00:07 +0100 Subject: [PATCH 214/384] drm/i915: Nuke skl_update_plane debug message from the pipe update critical section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit printks are slow so we should not be doing them from the vblank evade critical section. These could explain why we sometimes seem to blow past our 100 usec deadline. The problem has been there ever since commit c331879ce8ea ("drm/i915: skylake sprite plane scaling using shared scalers.") but it may not have been readily visible until commit e1edbd44e23b ("drm/i915: Complain if we take too long under vblank evasion.") increased our chances of noticing it. Signed-off-by: Maarten Lankhorst Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1488974407-25175-1-git-send-email-maarten.lankhorst@linux.intel.com Fixes: c331879ce8ea ("drm/i915: skylake sprite plane scaling using shared scalers") Cc: # v4.2+ Reviewed-by: Ville Syrjälä [mlankhorst: Add missing tags, point to the correct offending commit] (cherry picked from commit d38146b9ee16264ff9a88bf3391ab9f2f5af3646) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sprite.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9ef54688872a..9481ca9a3ae7 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -254,9 +254,6 @@ skl_update_plane(struct drm_plane *drm_plane, int scaler_id = plane_state->scaler_id; const struct intel_scaler *scaler; - DRM_DEBUG_KMS("plane = %d PS_PLANE_SEL(plane) = 0x%x\n", - plane_id, PS_PLANE_SEL(plane_id)); - scaler = &crtc_state->scaler_state.scalers[scaler_id]; I915_WRITE(SKL_PS_CTRL(pipe, scaler_id), From 6aef660370a9c246956ba6d01eebd8063c4214cb Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 10 Mar 2017 09:32:49 +0000 Subject: [PATCH 215/384] drm/i915: Fix forcewake active domain tracking In commit 003342a50021 ("drm/i915: Keep track of active forcewake domains in a bitmask") I forgot to adjust the newly introduce fw_domains_active state across reset. This caused the assert_forcewakes_inactive to trigger during suspend and resume if there were user held forcewakes. v2: Bitmask checks are required since vfuncs are not always present. v3: Move bitmask tracking to get/put vfunc for simplicity. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: 003342a50021 ("drm/i915: Keep track of active forcewake domains in a bitmask") Testcase: igt/drv_suspend/forcewake Cc: Tvrtko Ursulin Cc: "Paneri, Praveen" Cc: Chris Wilson Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: v4.10+ Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170310093249.4484-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit b8473050805f35add97f3ff57570d55a01808df5) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_uncore.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe08885a5ba..b7ff592b14f5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -119,6 +119,8 @@ fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma for_each_fw_domain_masked(d, fw_domains, dev_priv) fw_domain_wait_ack(d); + + dev_priv->uncore.fw_domains_active |= fw_domains; } static void @@ -130,6 +132,8 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma fw_domain_put(d); fw_domain_posting_read(d); } + + dev_priv->uncore.fw_domains_active &= ~fw_domains; } static void @@ -240,10 +244,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) if (WARN_ON(domain->wake_count == 0)) domain->wake_count++; - if (--domain->wake_count == 0) { + if (--domain->wake_count == 0) dev_priv->uncore.funcs.force_wake_put(dev_priv, domain->mask); - dev_priv->uncore.fw_domains_active &= ~domain->mask; - } spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); @@ -454,10 +456,8 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, fw_domains &= ~domain->mask; } - if (fw_domains) { + if (fw_domains) dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); - dev_priv->uncore.fw_domains_active |= fw_domains; - } } /** @@ -968,7 +968,6 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv, fw_domain_arm_timer(domain); dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains); - dev_priv->uncore.fw_domains_active |= fw_domains; } static inline void __force_wake_auto(struct drm_i915_private *dev_priv, From ce70df089143c49385b4f32f39d41fb50fbf6a7c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 13 Mar 2017 08:22:13 +0300 Subject: [PATCH 216/384] mm, gup: fix typo in gup_p4d_range() gup_p4d_range() should call gup_pud_range(), not itself. [ This was not noticed on x86: this is the HAVE_GENERIC_RCU_GUP code used by arm[64] and powerpc - Linus ] Fixes: c2febafc6773 ("mm: convert generic code to 5-level paging") Signed-off-by: Kirill A. Shutemov Reported-by: Chris Packham Reported-by: Anton Blanchard Acked-by: Michal Hocko Acked-by: Mark Rutland Signed-off-by: Linus Torvalds --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index c74bad1bf6e8..04aa405350dc 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1455,7 +1455,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, P4D_SHIFT, next, write, pages, nr)) return 0; - } else if (!gup_p4d_range(p4d, addr, next, write, pages, nr)) + } else if (!gup_pud_range(p4d, addr, next, write, pages, nr)) return 0; } while (p4dp++, addr = next, addr != end); From c5f7c5a9a0f84c511a8a336491f9b8a3060b6517 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Mon, 6 Mar 2017 16:21:16 +0200 Subject: [PATCH 217/384] drivers, xen: convert grant_map.users from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Boris Ostrovsky --- drivers/xen/gntdev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index c77a0751a311..f3bf8f4e2d6c 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -86,7 +87,7 @@ struct grant_map { int index; int count; int flags; - atomic_t users; + refcount_t users; struct unmap_notify notify; struct ioctl_gntdev_grant_ref *grants; struct gnttab_map_grant_ref *map_ops; @@ -166,7 +167,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->index = 0; add->count = count; - atomic_set(&add->users, 1); + refcount_set(&add->users, 1); return add; @@ -212,7 +213,7 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map) if (!map) return; - if (!atomic_dec_and_test(&map->users)) + if (!refcount_dec_and_test(&map->users)) return; atomic_sub(map->count, &pages_mapped); @@ -400,7 +401,7 @@ static void gntdev_vma_open(struct vm_area_struct *vma) struct grant_map *map = vma->vm_private_data; pr_debug("gntdev_vma_open %p\n", vma); - atomic_inc(&map->users); + refcount_inc(&map->users); } static void gntdev_vma_close(struct vm_area_struct *vma) @@ -1004,7 +1005,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) goto unlock_out; } - atomic_inc(&map->users); + refcount_inc(&map->users); vma->vm_ops = &gntdev_vmops; From 44fee88cea43d3c2cac962e0439cb10a3cabff6d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 13 Mar 2017 15:57:12 +0100 Subject: [PATCH 218/384] x86/tsc: Fix ART for TSC_KNOWN_FREQ Subhransu reported that convert_art_to_tsc() isn't working for him. The ART to TSC relation is only set up for systems which use the refined TSC calibration. Systems with known TSC frequency (available via CPUID 15) are not using the refined calibration and therefor the ART to TSC relation is never established. Add the setup to the known frequency init path which skips ART calibration. The init code needs to be duplicated as for systems which use refined calibration the ART setup must be delayed until calibration has been done. The problem has been there since the ART support was introdduced, but only detected now because Subhransu tested the first time on hardware which has TSC frequency enumerated via CPUID 15. Note for stable: The conditional has changed from TSC_RELIABLE to TSC_KNOWN_FREQUENCY. [ tglx: Rewrote changelog and identified the proper 'Fixes' commit ] Fixes: f9677e0f8308 ("x86/tsc: Always Running Timer (ART) correlated clocksource") Reported-by: "Prusty, Subhransu S" Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Cc: christopher.s.hall@intel.com Cc: kevin.b.stanton@intel.com Cc: john.stultz@linaro.org Cc: akataria@vmware.com Link: http://lkml.kernel.org/r/20170313145712.GI3312@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 4f7a9833d8e5..c73a7f9e881a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1333,6 +1333,8 @@ static int __init init_tsc_clocksource(void) * the refined calibration and directly register it as a clocksource. */ if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { + if (boot_cpu_has(X86_FEATURE_ART)) + art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); return 0; } From 67e194007be08d071294456274dd53e0a04fdf90 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Mon, 13 Mar 2017 13:28:09 +0100 Subject: [PATCH 219/384] ipv6: make ECMP route replacement less greedy Commit 27596472473a ("ipv6: fix ECMP route replacement") introduced a loop that removes all siblings of an ECMP route that is being replaced. However, this loop doesn't stop when it has replaced siblings, and keeps removing other routes with a higher metric. We also end up triggering the WARN_ON after the loop, because after this nsiblings < 0. Instead, stop the loop when we have taken care of all routes with the same metric as the route being replaced. Reproducer: =========== #!/bin/sh ip netns add ns1 ip netns add ns2 ip -net ns1 link set lo up for x in 0 1 2 ; do ip link add veth$x netns ns2 type veth peer name eth$x netns ns1 ip -net ns1 link set eth$x up ip -net ns2 link set veth$x up done ip -net ns1 -6 r a 2000::/64 nexthop via fe80::0 dev eth0 \ nexthop via fe80::1 dev eth1 nexthop via fe80::2 dev eth2 ip -net ns1 -6 r a 2000::/64 via fe80::42 dev eth0 metric 256 ip -net ns1 -6 r a 2000::/64 via fe80::43 dev eth0 metric 2048 echo "before replace, 3 routes" ip -net ns1 -6 r | grep -v '^fe80\|^ff00' echo ip -net ns1 -6 r c 2000::/64 nexthop via fe80::4 dev eth0 \ nexthop via fe80::5 dev eth1 nexthop via fe80::6 dev eth2 echo "after replace, only 2 routes, metric 2048 is gone" ip -net ns1 -6 r | grep -v '^fe80\|^ff00' Fixes: 27596472473a ("ipv6: fix ECMP route replacement") Signed-off-by: Sabrina Dubroca Acked-by: Nicolas Dichtel Reviewed-by: Xin Long Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e4266746e4a2..d4bf2c68a545 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -923,6 +923,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &rt->dst.rt6_next; iter = *ins; while (iter) { + if (iter->rt6i_metric > rt->rt6i_metric) + break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); From 68c32f9c2a36d410aa242e661506e5b2c2764179 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:39:01 +0100 Subject: [PATCH 220/384] isdn/gigaset: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer should a malicious device lack endpoints. Fixes: cf7776dc05b8 ("[PATCH] isdn4linux: Siemens Gigaset drivers - direct USB connection") Cc: stable # 2.6.17 Cc: Hansjoerg Lipp Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/isdn/gigaset/bas-gigaset.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index 11e13c56126f..2da3ff650e1d 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -2317,6 +2317,9 @@ static int gigaset_probe(struct usb_interface *interface, return -ENODEV; } + if (hostif->desc.bNumEndpoints < 1) + return -ENODEV; + dev_info(&udev->dev, "%s: Device matched (Vendor: 0x%x, Product: 0x%x)\n", __func__, le16_to_cpu(udev->descriptor.idVendor), From 6e526fdff7be4f13b24f929a04c0e9ae6761291e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 13 Mar 2017 13:42:03 +0100 Subject: [PATCH 221/384] net: wimax/i2400m: fix NULL-deref at probe Make sure to check the number of endpoints to avoid dereferencing a NULL-pointer or accessing memory beyond the endpoint array should a malicious device lack the expected endpoints. The endpoints are specifically dereferenced in the i2400m_bootrom_init path during probe (e.g. in i2400mu_tx_bulk_out). Fixes: f398e4240fce ("i2400m/USB: probe/disconnect, dev init/shutdown and reset backends") Cc: Inaky Perez-Gonzalez Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/wimax/i2400m/usb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c index e7f5910a6519..f8eb66ef2944 100644 --- a/drivers/net/wimax/i2400m/usb.c +++ b/drivers/net/wimax/i2400m/usb.c @@ -467,6 +467,9 @@ int i2400mu_probe(struct usb_interface *iface, struct i2400mu *i2400mu; struct usb_device *usb_dev = interface_to_usbdev(iface); + if (iface->cur_altsetting->desc.bNumEndpoints < 4) + return -ENODEV; + if (usb_dev->speed != USB_SPEED_HIGH) dev_err(dev, "device not connected as high speed\n"); From 0d443b70cc92d741cbc1dcbf1079897b3d8bc3cc Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 7 Mar 2017 15:08:42 -0600 Subject: [PATCH 222/384] x86/platform: Remove warning message for duplicate NMI handlers Remove the WARNING message associated with multiple NMI handlers as there are at least two that are legitimate. These are the KGDB and the UV handlers and both want to be called if the NMI has not been claimed by any other NMI handler. Use of the UNKNOWN NMI call chain dramatically lowers the NMI call rate when high frequency NMI tools are in use, notably the perf tools. It is required on systems that cannot sustain a high NMI call rate without adversely affecting the system operation. Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Cc: Don Zickus Cc: Peter Zijlstra Cc: Russ Anderson Cc: Frank Ramsay Cc: Tony Ernst Link: http://lkml.kernel.org/r/20170307210841.730959611@asylum.americas.sgi.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index f088ea4c66e7..a723ae9440ab 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) spin_lock_irqsave(&desc->lock, flags); /* - * most handlers of type NMI_UNKNOWN never return because - * they just assume the NMI is theirs. Just a sanity check - * to manage expectations + * Indicate if there are multiple registrations on the + * internal NMI handler call chains (SERR and IO_CHECK). */ - WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); From 79e49503efe53a8c51d8b695bedc8a346c5e4a87 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 Mar 2017 16:24:28 +0100 Subject: [PATCH 223/384] ipv6: avoid write to a possibly cloned skb ip6_fragment, in case skb has a fraglist, checks if the skb is cloned. If it is, it will move to the 'slow path' and allocates new skbs for each fragment. However, right before entering the slowpath loop, it updates the nexthdr value of the last ipv6 extension header to NEXTHDR_FRAGMENT, to account for the fragment header that will be inserted in the new ipv6-fragment skbs. In case original skb is cloned this munges nexthdr value of another skb. Avoid this by doing the nexthdr update for each of the new fragment skbs separately. This was observed with tcpdump on a bridge device where netfilter ipv6 reassembly is active: tcpdump shows malformed fragment headers as the l4 header (icmpv6, tcp, etc). is decoded as a fragment header. Cc: Hannes Frederic Sowa Reported-by: Andreas Karis Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index df42096e1f04..58f6288e9ba5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -768,13 +768,14 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, * Fragment the datagram. */ - *prevhdr = NEXTHDR_FRAGMENT; troom = rt->dst.dev->needed_tailroom; /* * Keep copying data until we run out. */ while (left > 0) { + u8 *fragnexthdr_offset; + len = left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) @@ -819,6 +820,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, */ skb_copy_from_linear_data(skb, skb_network_header(frag), hlen); + fragnexthdr_offset = skb_network_header(frag); + fragnexthdr_offset += prevhdr - skb_network_header(skb); + *fragnexthdr_offset = NEXTHDR_FRAGMENT; + /* * Build fragment header. */ From a13b2082ece95247779b9995c4e91b4246bed023 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 Mar 2017 17:38:17 +0100 Subject: [PATCH 224/384] bridge: drop netfilter fake rtable unconditionally Andreas reports kernel oops during rmmod of the br_netfilter module. Hannes debugged the oops down to a NULL rt6info->rt6i_indev. Problem is that br_netfilter has the nasty concept of adding a fake rtable to skb->dst; this happens in a br_netfilter prerouting hook. A second hook (in bridge LOCAL_IN) is supposed to remove these again before the skb is handed up the stack. However, on module unload hooks get unregistered which means an skb could traverse the prerouting hook that attaches the fake_rtable, while the 'fake rtable remove' hook gets removed from the hooklist immediately after. Fixes: 34666d467cbf1e2e3c7 ("netfilter: bridge: move br_netfilter out of the core") Reported-by: Andreas Karis Debugged-by: Hannes Frederic Sowa Signed-off-by: Florian Westphal Acked-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/bridge/br_input.c | 1 + net/bridge/br_netfilter_hooks.c | 21 --------------------- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 236f34244dbe..013f2290bfa5 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -30,6 +30,7 @@ EXPORT_SYMBOL(br_should_route_hook); static int br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { + br_drop_fake_rtable(skb); return netif_receive_skb(skb); } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 95087e6e8258..fa87fbd62bb7 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv, } -/* PF_BRIDGE/LOCAL_IN ************************************************/ -/* The packet is locally destined, which requires a real - * dst_entry, so detach the fake one. On the way up, the - * packet would pass through PRE_ROUTING again (which already - * took place when the packet entered the bridge), but we - * register an IPv4 PRE_ROUTING 'sabotage' hook that will - * prevent this from happening. */ -static unsigned int br_nf_local_in(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - br_drop_fake_rtable(skb); - return NF_ACCEPT; -} - /* PF_BRIDGE/FORWARD *************************************************/ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -907,12 +892,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { .hooknum = NF_BR_PRE_ROUTING, .priority = NF_BR_PRI_BRNF, }, - { - .hook = br_nf_local_in, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_LOCAL_IN, - .priority = NF_BR_PRI_BRNF, - }, { .hook = br_nf_forward_ip, .pf = NFPROTO_BRIDGE, From c836e5cf0d0880d6668966476c4ffe727f29f69a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Mar 2017 13:24:21 +0200 Subject: [PATCH 225/384] x86/platform/intel-mid: Use common power off sequence Intel Medfield may use common for Intel MID devices power sequence. Remove unneded custom power off stub. While here, remove function forward declaration. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170308112422.67533-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/intel-mid/mfld.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index e793fe509971..e42978d4deaf 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c @@ -17,16 +17,6 @@ #include "intel_mid_weak_decls.h" -static void penwell_arch_setup(void); -/* penwell arch ops */ -static struct intel_mid_ops penwell_ops = { - .arch_setup = penwell_arch_setup, -}; - -static void mfld_power_off(void) -{ -} - static unsigned long __init mfld_calibrate_tsc(void) { unsigned long fast_calibrate; @@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void) static void __init penwell_arch_setup(void) { x86_platform.calibrate_tsc = mfld_calibrate_tsc; - pm_power_off = mfld_power_off; } +static struct intel_mid_ops penwell_ops = { + .arch_setup = penwell_arch_setup, +}; + void *get_penwell_ops(void) { return &penwell_ops; From 859bb6d59066b96341020e0991f191631cabe59d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Mar 2017 13:24:22 +0200 Subject: [PATCH 226/384] x86/platform/intel-mid: Add power button support for Merrifield Intel Merrifield platform has a Basin Cove PMIC to handle in particular power button events. Add necessary bits to enable it. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170308112422.67533-2-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- .../platform/intel-mid/device_libs/Makefile | 1 + .../device_libs/platform_mrfld_power_btn.c | 82 +++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index a7dbec4dce27..3dbde04febdc 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o +obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c new file mode 100644 index 000000000000..a6c3705a28ad --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c @@ -0,0 +1,82 @@ +/* + * Intel Merrifield power button support + * + * (C) Copyright 2017 Intel Corporation + * + * Author: Andy Shevchenko + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include + +#include +#include + +static struct resource mrfld_power_btn_resources[] = { + { + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device mrfld_power_btn_dev = { + .name = "msic_power_btn", + .id = PLATFORM_DEVID_NONE, + .num_resources = ARRAY_SIZE(mrfld_power_btn_resources), + .resource = mrfld_power_btn_resources, +}; + +static int mrfld_power_btn_scu_status_change(struct notifier_block *nb, + unsigned long code, void *data) +{ + if (code == SCU_DOWN) { + platform_device_unregister(&mrfld_power_btn_dev); + return 0; + } + + return platform_device_register(&mrfld_power_btn_dev); +} + +static struct notifier_block mrfld_power_btn_scu_notifier = { + .notifier_call = mrfld_power_btn_scu_status_change, +}; + +static int __init register_mrfld_power_btn(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + /* + * We need to be sure that the SCU IPC is ready before + * PMIC power button device can be registered: + */ + intel_scu_notifier_add(&mrfld_power_btn_scu_notifier); + + return 0; +} +arch_initcall(register_mrfld_power_btn); + +static void __init *mrfld_power_btn_platform_data(void *info) +{ + struct resource *res = mrfld_power_btn_resources; + struct sfi_device_table_entry *pentry = info; + + res->start = res->end = pentry->irq; + return NULL; +} + +static const struct devs_id mrfld_power_btn_dev_id __initconst = { + .name = "bcove_power_btn", + .type = SFI_DEV_TYPE_IPC, + .delay = 1, + .msic = 1, + .get_platform_data = &mrfld_power_btn_platform_data, +}; + +sfi_device(mrfld_power_btn_dev_id); From 6e7408acd04d06c04981c0c0fb5a2462b16fae4f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 12 Mar 2017 18:12:56 +0100 Subject: [PATCH 227/384] cpufreq: intel_pstate: Update pid_params.sample_rate_ns in pid_param_set() Fix the debugfs interface for PID tuning to actually update pid_params.sample_rate_ns on PID parameters updates, as changing pid_params.sample_rate_ms via debugfs has no effect now. Fixes: a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization update callbacks) Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3d37219a0dd7..f9fe910f3b83 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -989,6 +989,7 @@ static void intel_pstate_update_policies(void) static int pid_param_set(void *data, u64 val) { *(u32 *)data = val; + pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; intel_pstate_reset_all_pid(); return 0; } From be3606ff739d1c1be36389f8737c577ad87e1f57 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 13 Mar 2017 19:33:37 +0300 Subject: [PATCH 228/384] x86/kasan: Fix boot with KASAN=y and PROFILE_ANNOTATED_BRANCHES=y The kernel doesn't boot with both PROFILE_ANNOTATED_BRANCHES=y and KASAN=y options selected. With branch profiling enabled we end up calling ftrace_likely_update() before kasan_early_init(). ftrace_likely_update() is built with KASAN instrumentation, so calling it before kasan has been initialized leads to crash. Use DISABLE_BRANCH_PROFILING define to make sure that we don't call ftrace_likely_update() from early code before kasan_early_init(). Fixes: ef7f0d6a6ca8 ("x86_64: add KASan support") Reported-by: Fengguang Wu Signed-off-by: Andrey Ryabinin Cc: kasan-dev@googlegroups.com Cc: Alexander Potapenko Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: lkp@01.org Cc: Dmitry Vyukov Link: http://lkml.kernel.org/r/20170313163337.1704-1-aryabinin@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 1 + arch/x86/mm/kasan_init_64.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 54a2372f5dbb..b5785c197e53 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -4,6 +4,7 @@ * Copyright (C) 2000 Andrea Arcangeli SuSE */ +#define DISABLE_BRANCH_PROFILING #include #include #include diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8d63d7a104c3..4c90cfdc128b 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt #include #include From 91864f5852f9996210fad400cf70fb85af091243 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Sun, 12 Mar 2017 21:36:18 -0700 Subject: [PATCH 229/384] net: use net->count to check whether a netns is alive or not MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous idea was to check whether a net namespace is in net_exit_list or not. It doesn't work, because net->exit_list is used in __register_pernet_operations and __unregister_pernet_operations where all namespaces are added to a temporary list to make cleanup in a error case, so list_empty(&net->exit_list) always returns false. Reported-by: Mantas Mikulėnas Fixes: 002d8a1a6c11 ("net: skip genenerating uevents for network namespaces that are exiting") Signed-off-by: Andrei Vagin Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 3945821e9c1f..65ea0ff4017c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -953,7 +953,7 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) while (--i >= new_num) { struct kobject *kobj = &dev->_rx[i].kobj; - if (!list_empty(&dev_net(dev)->exit_list)) + if (!atomic_read(&dev_net(dev)->count)) kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); @@ -1371,7 +1371,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) while (--i >= new_num) { struct netdev_queue *queue = dev->_tx + i; - if (!list_empty(&dev_net(dev)->exit_list)) + if (!atomic_read(&dev_net(dev)->count)) queue->kobj.uevent_suppress = 1; #ifdef CONFIG_BQL sysfs_remove_group(&queue->kobj, &dql_group); @@ -1558,7 +1558,7 @@ void netdev_unregister_kobject(struct net_device *ndev) { struct device *dev = &(ndev->dev); - if (!list_empty(&dev_net(ndev)->exit_list)) + if (!atomic_read(&dev_net(ndev)->count)) dev_set_uevent_suppress(dev, 1); kobject_get(&dev->kobj); From c80498e36d4ef3e24599d363c622fbf22a1293cc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Mar 2017 16:24:03 +0100 Subject: [PATCH 230/384] vxlan: fix ovs support The required changes in the function vxlan_dev_create() were missing in commit 8bcdc4f3a20b. The vxlan device is not registered anymore after this patch and the error path causes an stack dump: WARNING: CPU: 3 PID: 1498 at net/core/dev.c:6713 rollback_registered_many+0x9d/0x3f0 Fixes: 8bcdc4f3a20b ("vxlan: add changelink support") CC: Roopa Prabhu Signed-off-by: Nicolas Dichtel Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 73 +++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 33 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e375560cc74e..bdb6ae16d4a8 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2976,6 +2976,44 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, return 0; } +static int __vxlan_dev_create(struct net *net, struct net_device *dev, + struct vxlan_config *conf) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + struct vxlan_dev *vxlan = netdev_priv(dev); + int err; + + err = vxlan_dev_configure(net, dev, conf, false); + if (err) + return err; + + dev->ethtool_ops = &vxlan_ethtool_ops; + + /* create an fdb entry for a valid default destination */ + if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { + err = vxlan_fdb_create(vxlan, all_zeros_mac, + &vxlan->default_dst.remote_ip, + NUD_REACHABLE | NUD_PERMANENT, + NLM_F_EXCL | NLM_F_CREATE, + vxlan->cfg.dst_port, + vxlan->default_dst.remote_vni, + vxlan->default_dst.remote_vni, + vxlan->default_dst.remote_ifindex, + NTF_SELF); + if (err) + return err; + } + + err = register_netdevice(dev); + if (err) { + vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni); + return err; + } + + list_add(&vxlan->next, &vn->vxlan_list); + return 0; +} + static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], struct net_device *dev, struct vxlan_config *conf, bool changelink) @@ -3172,8 +3210,6 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct vxlan_net *vn = net_generic(src_net, vxlan_net_id); - struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_config conf; int err; @@ -3181,36 +3217,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (err) return err; - err = vxlan_dev_configure(src_net, dev, &conf, false); - if (err) - return err; - - dev->ethtool_ops = &vxlan_ethtool_ops; - - /* create an fdb entry for a valid default destination */ - if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, - &vxlan->default_dst.remote_ip, - NUD_REACHABLE | NUD_PERMANENT, - NLM_F_EXCL | NLM_F_CREATE, - vxlan->cfg.dst_port, - vxlan->default_dst.remote_vni, - vxlan->default_dst.remote_vni, - vxlan->default_dst.remote_ifindex, - NTF_SELF); - if (err) - return err; - } - - err = register_netdevice(dev); - if (err) { - vxlan_fdb_delete_default(vxlan, vxlan->default_dst.remote_vni); - return err; - } - - list_add(&vxlan->next, &vn->vxlan_list); - - return 0; + return __vxlan_dev_create(src_net, dev, &conf); } static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], @@ -3440,7 +3447,7 @@ struct net_device *vxlan_dev_create(struct net *net, const char *name, if (IS_ERR(dev)) return dev; - err = vxlan_dev_configure(net, dev, conf, false); + err = __vxlan_dev_create(net, dev, conf); if (err < 0) { free_netdev(dev); return ERR_PTR(err); From 3f8ed54aee491bbb83656592c2d0ad7b78d045ca Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 13 Mar 2017 18:30:12 -0700 Subject: [PATCH 231/384] cpufreq: intel_pstate: Correct frequency setting in the HWP mode In the functions intel_pstate_hwp_set(), min/max range from HWP capability MSR along with max_perf_pct and min_perf_pct, is used to set the HWP request MSR. In some cases this doesn't result in the correct HWP max/min in HWP request. For example: In the following case: HWP capabilities from MSR 0x771 0x70a1220 Here cpufreq min/max frequencies from above MSR dump are 700MHz and 3.2GHz respectively. This will result in hwp_min = 0x07 hwp_max = 0x20 To limit max frequency to 2GHz: perf_limits->max_perf_pct = 63 (2GHz as a percent of 3.2GHz rounded up) With the current calculation: adj_range = max_perf_pct * range / 100; adj_range = 63 * (32 - 7) / 100 adj_range = 15 max = hw_min + adj_range; max = 7 + 15 = 22 This will result in HWP request of 0x160f, which will result in a frequency cap of 2.2GHz not 2GHz. The problem with the above calculation is that hwp_min of 7 is treated as 0% in the range. But max_perf_pct is calculated with respect to minimum as 0 and max as 3.2GHz or hwp_max, so adding hwp_min to it will result in more than the desired. Since the min_perf_pct and max_perf_pct is already a percent of max frequency or hwp_max, this min/max HWP request value can be calculated directly applying these percentage to hwp_max. Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f9fe910f3b83..ee12641ee010 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -845,7 +845,7 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { static void intel_pstate_hwp_set(struct cpufreq_policy *policy) { - int min, hw_min, max, hw_max, cpu, range, adj_range; + int min, hw_min, max, hw_max, cpu; struct perf_limits *perf_limits = limits; u64 value, cap; @@ -863,20 +863,17 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) hw_max = HWP_GUARANTEED_PERF(cap); else hw_max = HWP_HIGHEST_PERF(cap); - range = hw_max - hw_min; max_perf_pct = perf_limits->max_perf_pct; min_perf_pct = perf_limits->min_perf_pct; + min = hw_max * min_perf_pct / 100; rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); - adj_range = min_perf_pct * range / 100; - min = hw_min + adj_range; + value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - adj_range = max_perf_pct * range / 100; - max = hw_min + adj_range; - + max = hw_max * max_perf_pct / 100; value &= ~HWP_MAX_PERF(~0L); value |= HWP_MAX_PERF(max); From 64ff64b90e62c860772fd0be50b7cfcef1d8a9b2 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 10 Mar 2017 03:22:12 -0800 Subject: [PATCH 232/384] scsi: megaraid_sas: enable intx only if msix request fails Without this fix, driver will enable INTx Interrupt pin even though MSI-x vectors are enabled. See below lspci output. DisINTx is unset for MSIx setup. lspci -s 85:00.0 -vvv |grep INT |grep Control Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- After applying this fix, driver will enable INTx Interrupt pin only if Legacy interrupt method is required. See below lspci output. DisINTx is set for MSIx setup. lspci -s 85:00.0 -vvv |grep INT |grep Control Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx+ Signed-off-by: Kashyap Desai Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 7ac9a9ee9bd4..016ffcebaf66 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5034,10 +5034,12 @@ megasas_setup_irqs_msix(struct megasas_instance *instance, u8 is_probe) &instance->irq_context[j]); /* Retry irq register for IO_APIC*/ instance->msix_vectors = 0; - if (is_probe) + if (is_probe) { + pci_free_irq_vectors(instance->pdev); return megasas_setup_irqs_ioapic(instance); - else + } else { return -1; + } } } return 0; @@ -5277,9 +5279,11 @@ static int megasas_init_fw(struct megasas_instance *instance) MPI2_REPLY_POST_HOST_INDEX_OFFSET); } - i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); - if (i < 0) - goto fail_setup_irqs; + if (!instance->msix_vectors) { + i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); + if (i < 0) + goto fail_setup_irqs; + } dev_info(&instance->pdev->dev, "firmware supports msix\t: (%d)", fw_msix_count); From 49524b3c6e12375627ddd870613fcc6b24909898 Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 10 Mar 2017 03:22:13 -0800 Subject: [PATCH 233/384] scsi: megaraid_sas: add correct return type check for ldio hint logic for raid1 Return value check of atomic_dec_if_positive is required as it returns old value minus one. Without this fix, driver will send small ios to firmware path and that will be a performance issue. Not critical, but good to have r1_ldio_hint as default value in sdev private. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 3 +++ drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 016ffcebaf66..0016f12cc563 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1963,6 +1963,9 @@ static int megasas_slave_alloc(struct scsi_device *sdev) if (!mr_device_priv_data) return -ENOMEM; sdev->hostdata = mr_device_priv_data; + + atomic_set(&mr_device_priv_data->r1_ldio_hint, + instance->r1_ldio_hint_default); return 0; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 29650ba669da..ebd746e2d97c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2338,7 +2338,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance, fp_possible = false; atomic_dec(&instance->fw_outstanding); } else if ((scsi_buff_len > MR_LARGE_IO_MIN_SIZE) || - atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint)) { + (atomic_dec_if_positive(&mrdev_priv->r1_ldio_hint) > 0)) { fp_possible = false; atomic_dec(&instance->fw_outstanding); if (scsi_buff_len > MR_LARGE_IO_MIN_SIZE) From 874d025da667d19b728141437ccbefe9dbaf9e7b Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 10 Mar 2017 03:22:14 -0800 Subject: [PATCH 234/384] scsi: megaraid_sas: raid6 also require cpuSel check same as raid5 Without this fix, raid6 performance will not be optimal. Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index ebd746e2d97c..f990ab4d45e1 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2159,7 +2159,7 @@ megasas_set_raidflag_cpu_affinity(union RAID_CONTEXT_UNION *praid_context, cpu_sel = MR_RAID_CTX_CPUSEL_1; if (is_stream_detected(rctx_g35) && - (raid->level == 5) && + ((raid->level == 5) || (raid->level == 6)) && (raid->writeMode == MR_RL_WRITE_THROUGH_MODE) && (cpu_sel == MR_RAID_CTX_CPUSEL_FCFS)) cpu_sel = MR_RAID_CTX_CPUSEL_0; From 22487b66594f18f2a7c211f3ab8bb02dec74d37b Mon Sep 17 00:00:00 2001 From: Shivasharan S Date: Fri, 10 Mar 2017 03:22:15 -0800 Subject: [PATCH 235/384] scsi: megaraid_sas: Driver version upgrade Signed-off-by: Kashyap Desai Signed-off-by: Shivasharan S Reviewed-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index e7e5974e1a2c..2b209bbb4c91 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -35,8 +35,8 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "07.701.16.00-rc1" -#define MEGASAS_RELDATE "February 2, 2017" +#define MEGASAS_VERSION "07.701.17.00-rc1" +#define MEGASAS_RELDATE "March 2, 2017" /* * Device IDs From 02bb56ddc6711639c549d81c7b9f6d845da243a9 Mon Sep 17 00:00:00 2001 From: Zhao Qiang Date: Tue, 14 Mar 2017 09:38:33 +0800 Subject: [PATCH 236/384] ucc/hdlc: fix two little issue 1. modify bd_status from u32 to u16 in function hdlc_rx_done, because bd_status register is 16bits 2. write bd_length register before writing bd_status register Signed-off-by: Zhao Qiang Signed-off-by: David S. Miller --- drivers/net/wan/fsl_ucc_hdlc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index a5045b5279d7..6742ae605660 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -381,8 +381,8 @@ static netdev_tx_t ucc_hdlc_tx(struct sk_buff *skb, struct net_device *dev) /* set bd status and length */ bd_status = (bd_status & T_W_S) | T_R_S | T_I_S | T_L_S | T_TC_S; - iowrite16be(bd_status, &bd->status); iowrite16be(skb->len, &bd->length); + iowrite16be(bd_status, &bd->status); /* Move to next BD in the ring */ if (!(bd_status & T_W_S)) @@ -457,7 +457,7 @@ static int hdlc_rx_done(struct ucc_hdlc_private *priv, int rx_work_limit) struct sk_buff *skb; hdlc_device *hdlc = dev_to_hdlc(dev); struct qe_bd *bd; - u32 bd_status; + u16 bd_status; u16 length, howmany = 0; u8 *bdbuffer; int i; From 45caeaa5ac0b4b11784ac6f932c0ad4c6b67cda0 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Date: Fri, 10 Mar 2017 16:40:33 +1100 Subject: [PATCH 237/384] dccp/tcp: fix routing redirect race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As Eric Dumazet pointed out this also needs to be fixed in IPv6. v2: Contains the IPv6 tcp/Ipv6 dccp patches as well. We have seen a few incidents lately where a dst_enty has been freed with a dangling TCP socket reference (sk->sk_dst_cache) pointing to that dst_entry. If the conditions/timings are right a crash then ensues when the freed dst_entry is referenced later on. A Common crashing back trace is: #8 [] page_fault at ffffffff8163e648 [exception RIP: __tcp_ack_snd_check+74] . . #9 [] tcp_rcv_established at ffffffff81580b64 #10 [] tcp_v4_do_rcv at ffffffff8158b54a #11 [] tcp_v4_rcv at ffffffff8158cd02 #12 [] ip_local_deliver_finish at ffffffff815668f4 #13 [] ip_local_deliver at ffffffff81566bd9 #14 [] ip_rcv_finish at ffffffff8156656d #15 [] ip_rcv at ffffffff81566f06 #16 [] __netif_receive_skb_core at ffffffff8152b3a2 #17 [] __netif_receive_skb at ffffffff8152b608 #18 [] netif_receive_skb at ffffffff8152b690 #19 [] vmxnet3_rq_rx_complete at ffffffffa015eeaf [vmxnet3] #20 [] vmxnet3_poll_rx_only at ffffffffa015f32a [vmxnet3] #21 [] net_rx_action at ffffffff8152bac2 #22 [] __do_softirq at ffffffff81084b4f #23 [] call_softirq at ffffffff8164845c #24 [] do_softirq at ffffffff81016fc5 #25 [] irq_exit at ffffffff81084ee5 #26 [] do_IRQ at ffffffff81648ff8 Of course it may happen with other NIC drivers as well. It's found the freed dst_entry here: 224 static bool tcp_in_quickack_mode(struct sock *sk)↩ 225 {↩ 226 ▹ const struct inet_connection_sock *icsk = inet_csk(sk);↩ 227 ▹ const struct dst_entry *dst = __sk_dst_get(sk);↩ 228 ↩ 229 ▹ return (dst && dst_metric(dst, RTAX_QUICKACK)) ||↩ 230 ▹ ▹ (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);↩ 231 }↩ But there are other backtraces attributed to the same freed dst_entry in netfilter code as well. All the vmcores showed 2 significant clues: - Remote hosts behind the default gateway had always been redirected to a different gateway. A rtable/dst_entry will be added for that host. Making more dst_entrys with lower reference counts. Making this more probable. - All vmcores showed a postitive LockDroppedIcmps value, e.g: LockDroppedIcmps 267 A closer look at the tcp_v4_err() handler revealed that do_redirect() will run regardless of whether user space has the socket locked. This can result in a race condition where the same dst_entry cached in sk->sk_dst_entry can be decremented twice for the same socket via: do_redirect()->__sk_dst_check()-> dst_release(). Which leads to the dst_entry being prematurely freed with another socket pointing to it via sk->sk_dst_cache and a subsequent crash. To fix this skip do_redirect() if usespace has the socket locked. Instead let the redirect take place later when user space does not have the socket locked. The dccp/IPv6 code is very similar in this respect, so fixing it there too. As Eric Garver pointed out the following commit now invalidates routes. Which can set the dst->obsolete flag so that ipv4_dst_check() returns null and triggers the dst_release(). Fixes: ceb3320610d6 ("ipv4: Kill routes during PMTU/redirect updates.") Cc: Eric Garver Cc: Hannes Sowa Signed-off-by: Jon Maxwell Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 3 ++- net/dccp/ipv6.c | 8 +++++--- net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 8 +++++--- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 409d0cfd3447..b99168b0fabf 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) switch (type) { case ICMP_REDIRECT: - dccp_do_redirect(skb, sk); + if (!sock_owned_by_user(sk)) + dccp_do_redirect(skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 233b57367758..d9b6a4e403e7 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8f3ec1365497..575e19dcc017 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -431,7 +431,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) switch (type) { case ICMP_REDIRECT: - do_redirect(icmp_skb, sk); + if (!sock_owned_by_user(sk)) + do_redirect(icmp_skb, sk); goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 60a5295a7de6..49fa2e8c3fa9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -391,10 +391,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, np = inet6_sk(sk); if (type == NDISC_REDIRECT) { - struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); + if (!sock_owned_by_user(sk)) { + struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); - if (dst) - dst->ops->redirect(dst, sk, skb); + if (dst) + dst->ops->redirect(dst, sk, skb); + } goto out; } From b20e2d54789c6acbf6bd0efdbec2cf5fa4d90ef1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 13 Mar 2017 00:00:26 +0100 Subject: [PATCH 238/384] tun: fix premature POLLOUT notification on tun devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit aszlig observed failing ssh tunnels (-w) during initialization since commit cc9da6cc4f56e0 ("ipv6: addrconf: use stable address generator for ARPHRD_NONE"). We already had reports that the mentioned commit breaks Juniper VPN connections. I can't clearly say that the Juniper VPN client has the same problem, but it is worth a try to hint to this patch. Because of the early generation of link local addresses, the kernel now can start asking for routers on the local subnet much earlier than usual. Those router solicitation packets arrive inside the ssh channels and should be transmitted to the tun fd before the configuration scripts might have upped the interface and made it ready for transmission. ssh polls on the interface and receives back a POLL_OUT. It tries to send the earily router solicitation packet to the tun interface. Unfortunately it hasn't been up'ed yet by config scripts, thus failing with -EIO. ssh doesn't retry again and considers the tun interface broken forever. Link: https://bugzilla.kernel.org/show_bug.cgi?id=121131 Fixes: cc9da6cc4f56 ("ipv6: addrconf: use stable address generator for ARPHRD_NONE") Cc: Bjørn Mork Reported-by: Valdis Kletnieks Cc: Valdis Kletnieks Reported-by: Jonas Lippuner Cc: Jonas Lippuner Reported-by: aszlig Cc: aszlig Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/tun.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index f58b7d850114..34cc3c590aa5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -822,7 +822,18 @@ static void tun_net_uninit(struct net_device *dev) /* Net device open. */ static int tun_net_open(struct net_device *dev) { + struct tun_struct *tun = netdev_priv(dev); + int i; + netif_tx_start_all_queues(dev); + + for (i = 0; i < tun->numqueues; i++) { + struct tun_file *tfile; + + tfile = rtnl_dereference(tun->tfiles[i]); + tfile->socket.sk->sk_write_space(tfile->socket.sk); + } + return 0; } @@ -1103,9 +1114,10 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) if (!skb_array_empty(&tfile->tx_array)) mask |= POLLIN | POLLRDNORM; - if (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk))) + if (tun->dev->flags & IFF_UP && + (sock_writeable(sk) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && + sock_writeable(sk)))) mask |= POLLOUT | POLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) From 72ef9c4125c7b257e3a714d62d778ab46583d6a3 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 13 Mar 2017 00:01:30 +0100 Subject: [PATCH 239/384] dccp: fix memory leak during tear-down of unsuccessful connection request This patch fixes a memory leak, which happens if the connection request is not fulfilled between parsing the DCCP options and handling the SYN (because e.g. the backlog is full), because we forgot to free the list of ack vectors. Reported-by: Jianwen Ji Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index f053198e730c..5e3a7302f774 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk) for (i = 0; i < hc->tx_seqbufc; i++) kfree(hc->tx_seqbuf[i]); hc->tx_seqbufc = 0; + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) From 0043c1dfbec7b6e2427409059b05347d6f51aa9f Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 8 Feb 2017 09:24:25 +0000 Subject: [PATCH 240/384] serial: st-asc: Use new GPIOD API to obtain RTS pin The commits mentioned below adapt the GPIO API to allow more information to be passed directly through devm_get_gpiod_from_child() in the first instance. This facilitates the removal of subsequent calls, such as gpiod_direction_output(). This patch firstly moves to utilise the new API and secondly removes the now superfluous call do set the direction. Reported-by: Stephen Rothwell Suggested-by: Boris Brezillon Signed-off-by: Lee Jones [Also drop the header file dummies that only this driver was using] Acked-by: Greg Kroah-Hartman Signed-off-by: Linus Walleij --- drivers/tty/serial/st-asc.c | 11 ++++++----- include/linux/gpio/consumer.h | 16 ---------------- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c index bcf1d33e6ffe..c334bcc59c64 100644 --- a/drivers/tty/serial/st-asc.c +++ b/drivers/tty/serial/st-asc.c @@ -575,12 +575,13 @@ static void asc_set_termios(struct uart_port *port, struct ktermios *termios, pinctrl_select_state(ascport->pinctrl, ascport->states[NO_HW_FLOWCTRL]); - gpiod = devm_get_gpiod_from_child(port->dev, "rts", - &np->fwnode); - if (!IS_ERR(gpiod)) { - gpiod_direction_output(gpiod, 0); + gpiod = devm_fwnode_get_gpiod_from_child(port->dev, + "rts", + &np->fwnode, + GPIOD_OUT_LOW, + np->name); + if (!IS_ERR(gpiod)) ascport->rts = gpiod; - } } } diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 2484b2fcc6eb..933d93656605 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -143,15 +143,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, struct fwnode_handle *child, enum gpiod_flags flags, const char *label); -/* FIXME: delete this helper when users are switched over */ -static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, struct fwnode_handle *child) -{ - return devm_fwnode_get_index_gpiod_from_child(dev, con_id, - 0, child, - GPIOD_ASIS, - "?"); -} #else /* CONFIG_GPIOLIB */ @@ -444,13 +435,6 @@ struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev, return ERR_PTR(-ENOSYS); } -/* FIXME: delete this when all users are switched over */ -static inline struct gpio_desc *devm_get_gpiod_from_child(struct device *dev, - const char *con_id, struct fwnode_handle *child) -{ - return ERR_PTR(-ENOSYS); -} - #endif /* CONFIG_GPIOLIB */ static inline From abf8315f71dc5a2ee56fb60830dcb2861982dc91 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Tue, 31 Jan 2017 16:18:42 +0200 Subject: [PATCH 241/384] drm/tilcdc: Fix hardcoded fail-return value in tilcdc_crtc_create() Fix badly hardcoded return return value under fail-label. All goto branches to the label set the "ret"-variable accordingly. Signed-off-by: Jyri Sarha Reviewed-by: Gabriel Krisman Bertazi --- drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index f80bf9385e41..abcbcd9f5851 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -1036,5 +1036,5 @@ int tilcdc_crtc_create(struct drm_device *dev) fail: tilcdc_crtc_destroy(crtc); - return -ENOMEM; + return ret; } From 11abbc9f39e002a2b25657e00abac8056cb39e93 Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Wed, 1 Mar 2017 10:30:28 +0200 Subject: [PATCH 242/384] drm/tilcdc: Set framebuffer DMA address to HW only if CRTC is enabled Touching HW while clocks are off is a serious error and for instance breaks suspend functionality. After this patch tilcdc_crtc_update_fb() always updates the primary plane's framebuffer pointer, increases fb's reference count and stores vblank event. tilcdc_crtc_update_fb() only writes the fb's DMA address to HW if the crtc is enabled, as tilcdc_crtc_enable() takes care of writing the address on enable. This patch also refactors the tilcdc_crtc_update_fb() a bit. Number of subsequent small changes had made it almost unreadable. There should be no other functional changes but checking the CRTC's enable state. However, the locking goes a bit differently and some of the redundant checks have been removed in this new version. The enable_lock should be enough to protect the access to tilcdc_crtc->enabled. The irq_lock protects the access to last_vblank and next_fb. The check for vrefresh and last_vblank being valid is redundant, as the vrefresh should be always valid if the CRTC is enabled and now last_vblank should be too, because it is initialized to current time when CRTC raster is enabled. If for some reason the values are not correctly initialized the division by zero warning is quite appropriate. Signed-off-by: Jyri Sarha Reviewed-by: Tomi Valkeinen --- drivers/gpu/drm/tilcdc/tilcdc_crtc.c | 35 ++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index abcbcd9f5851..d745e8b50fb8 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -464,6 +464,7 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); + unsigned long flags; WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); mutex_lock(&tilcdc_crtc->enable_lock); @@ -484,7 +485,17 @@ static void tilcdc_crtc_enable(struct drm_crtc *crtc) tilcdc_write_mask(dev, LCDC_RASTER_CTRL_REG, LCDC_PALETTE_LOAD_MODE(DATA_ONLY), LCDC_PALETTE_LOAD_MODE_MASK); + + /* There is no real chance for a race here as the time stamp + * is taken before the raster DMA is started. The spin-lock is + * taken to have a memory barrier after taking the time-stamp + * and to avoid a context switch between taking the stamp and + * enabling the raster. + */ + spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + tilcdc_crtc->last_vblank = ktime_get(); tilcdc_set(dev, LCDC_RASTER_CTRL_REG, LCDC_RASTER_ENABLE); + spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); drm_crtc_vblank_on(crtc); @@ -539,7 +550,6 @@ static void tilcdc_crtc_off(struct drm_crtc *crtc, bool shutdown) } drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq); - tilcdc_crtc->last_vblank = 0; tilcdc_crtc->enabled = false; mutex_unlock(&tilcdc_crtc->enable_lock); @@ -602,7 +612,6 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc, { struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc); struct drm_device *dev = crtc->dev; - unsigned long flags; WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); @@ -614,28 +623,30 @@ int tilcdc_crtc_update_fb(struct drm_crtc *crtc, drm_framebuffer_reference(fb); crtc->primary->fb = fb; + tilcdc_crtc->event = event; - spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + mutex_lock(&tilcdc_crtc->enable_lock); - if (crtc->hwmode.vrefresh && ktime_to_ns(tilcdc_crtc->last_vblank)) { + if (tilcdc_crtc->enabled) { + unsigned long flags; ktime_t next_vblank; s64 tdiff; - next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, - 1000000 / crtc->hwmode.vrefresh); + spin_lock_irqsave(&tilcdc_crtc->irq_lock, flags); + next_vblank = ktime_add_us(tilcdc_crtc->last_vblank, + 1000000 / crtc->hwmode.vrefresh); tdiff = ktime_to_us(ktime_sub(next_vblank, ktime_get())); if (tdiff < TILCDC_VBLANK_SAFETY_THRESHOLD_US) tilcdc_crtc->next_fb = fb; + else + set_scanout(crtc, fb); + + spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); } - if (tilcdc_crtc->next_fb != fb) - set_scanout(crtc, fb); - - tilcdc_crtc->event = event; - - spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags); + mutex_unlock(&tilcdc_crtc->enable_lock); return 0; } From 0977762f6d15f13caccc20d71a5dec47d098907d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 13 Mar 2017 13:44:35 -0700 Subject: [PATCH 243/384] md/r5cache: fix set_syndrome_sources() for data in cache Before this patch, device InJournal will be included in prexor (SYNDROME_SRC_WANT_DRAIN) but not in reconstruct (SYNDROME_SRC_WRITTEN). So it will break parity calculation. With srctype == SYNDROME_SRC_WRITTEN, we need include both dev with non-null ->written and dev with R5_InJournal. This fixes logic in 1e6d690(md/r5cache: caching phase of r5cache) Cc: stable@vger.kernel.org (v4.10+) Signed-off-by: Song Liu Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6bfedfcf41c1..ed5cd705b985 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1401,7 +1401,8 @@ static int set_syndrome_sources(struct page **srcs, (test_bit(R5_Wantdrain, &dev->flags) || test_bit(R5_InJournal, &dev->flags))) || (srctype == SYNDROME_SRC_WRITTEN && - dev->written)) { + (dev->written || + test_bit(R5_InJournal, &dev->flags)))) { if (test_bit(R5_InJournal, &dev->flags)) srcs[slot] = sh->dev[i].orig_page; else From 9c62110454b088b4914ffe375c2dbc19643eac34 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 14 Mar 2017 11:51:59 -0600 Subject: [PATCH 244/384] blk-mq-sched: don't run the queue async from blk_mq_try_issue_directly() If we have scheduling enabled, we jump directly to insert-and-run. That's fine, but we run the queue async and we don't pass in information on whether we can block from this context or not. Fixup both these cases. Reviewed-by: Bart Van Assche Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- block/blk-mq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 159187a28d66..a4546f060e80 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1434,7 +1434,8 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); } -static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) +static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie, + bool may_sleep) { struct request_queue *q = rq->q; struct blk_mq_queue_data bd = { @@ -1475,7 +1476,7 @@ static void blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie) } insert: - blk_mq_sched_insert_request(rq, false, true, true, false); + blk_mq_sched_insert_request(rq, false, true, false, may_sleep); } /* @@ -1569,11 +1570,11 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) { rcu_read_lock(); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, false); rcu_read_unlock(); } else { srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu); - blk_mq_try_issue_directly(old_rq, &cookie); + blk_mq_try_issue_directly(old_rq, &cookie, true); srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx); } goto done; From 8c53ad2139137dd4bf506a2c2b888de3816e8f75 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 13 Mar 2017 15:14:08 +0800 Subject: [PATCH 245/384] drm/amd/powerplay: fix copy error in smu7_clockpoweragting.c Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c index 8cf71f3c6d0e..261b828ad590 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c @@ -178,7 +178,7 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) if (bgate) { cgs_set_powergating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); + AMD_PG_STATE_GATE); cgs_set_clockgating_state(hwmgr->device, AMD_IP_BLOCK_TYPE_VCE, AMD_CG_STATE_GATE); From 11353b9d10392e79e32603d2178e75feb25eaf0d Mon Sep 17 00:00:00 2001 From: Zhilong Liu Date: Tue, 14 Mar 2017 15:52:26 +0800 Subject: [PATCH 246/384] md/raid1: fix a trivial typo in comments raid1.c: fix a trivial typo in comments of freeze_array(). Cc: Jack Wang Cc: Guoqing Jiang Cc: John Stoffel Acked-by: Coly Li Signed-off-by: Zhilong Liu Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c33e96e33b8e..a34f58772022 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1027,7 +1027,7 @@ static int get_unqueued_pending(struct r1conf *conf) static void freeze_array(struct r1conf *conf, int extra) { /* Stop sync I/O and normal I/O and wait for everything to - * go quite. + * go quiet. * This is called in two situations: * 1) management command handlers (reshape, remove disk, quiesce). * 2) one normal I/O request failed. From dc434e056fe1dada20df7ba07f32739d3a701adf Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 14 Mar 2017 16:06:45 +0100 Subject: [PATCH 247/384] cpu/hotplug: Serialize callback invocations proper The setup/remove_state/instance() functions in the hotplug core code are serialized against concurrent CPU hotplug, but unfortunately not serialized against themself. As a consequence a concurrent invocation of these function results in corruption of the callback machinery because two instances try to invoke callbacks on remote cpus at the same time. This results in missing callback invocations and initiator threads waiting forever on the completion. The obvious solution to replace get_cpu_online() with cpu_hotplug_begin() is not possible because at least one callsite calls into these functions from a get_online_cpu() locked region. Extend the protection scope of the cpuhp_state_mutex from solely protecting the state arrays to cover the callback invocation machinery as well. Fixes: 5b7aa87e0482 ("cpu/hotplug: Implement setup/removal interface") Reported-and-tested-by: Bart Van Assche Signed-off-by: Sebastian Andrzej Siewior Cc: hpa@zytor.com Cc: mingo@kernel.org Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/20170314150645.g4tdyoszlcbajmna@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/cpu.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index f7c063239fa5..37b223e4fc05 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1335,26 +1335,21 @@ static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, struct cpuhp_step *sp; int ret = 0; - mutex_lock(&cpuhp_state_mutex); - if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) { ret = cpuhp_reserve_state(state); if (ret < 0) - goto out; + return ret; state = ret; } sp = cpuhp_get_step(state); - if (name && sp->name) { - ret = -EBUSY; - goto out; - } + if (name && sp->name) + return -EBUSY; + sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(&sp->list); -out: - mutex_unlock(&cpuhp_state_mutex); return ret; } @@ -1428,6 +1423,7 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (!invoke || !sp->startup.multi) goto add_node; @@ -1447,16 +1443,14 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, if (ret) { if (sp->teardown.multi) cpuhp_rollback_install(cpu, state, node); - goto err; + goto unlock; } } add_node: ret = 0; - mutex_lock(&cpuhp_state_mutex); hlist_add_head(node, &sp->list); +unlock: mutex_unlock(&cpuhp_state_mutex); - -err: put_online_cpus(); return ret; } @@ -1491,6 +1485,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); @@ -1524,6 +1519,7 @@ int __cpuhp_setup_state(enum cpuhp_state state, } } out: + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); /* * If the requested state is CPUHP_AP_ONLINE_DYN, return the @@ -1547,6 +1543,8 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, return -EINVAL; get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); + if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* @@ -1563,7 +1561,6 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, } remove: - mutex_lock(&cpuhp_state_mutex); hlist_del(node); mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); @@ -1571,6 +1568,7 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state, return 0; } EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance); + /** * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state * @state: The state to remove @@ -1589,6 +1587,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) get_online_cpus(); + mutex_lock(&cpuhp_state_mutex); if (sp->multi_instance) { WARN(!hlist_empty(&sp->list), "Error: Removing state %d which has instances left.\n", @@ -1613,6 +1612,7 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) } remove: cpuhp_store_callbacks(state, NULL, NULL, NULL, false); + mutex_unlock(&cpuhp_state_mutex); put_online_cpus(); } EXPORT_SYMBOL(__cpuhp_remove_state); From 37c343b4f4e70e9dc328ab04903c0ec8d154c1a4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 14 Mar 2017 08:58:08 -0400 Subject: [PATCH 248/384] net: Resend IGMP memberships upon peer notification. When we notify peers of potential changes, it's also good to update IGMP memberships. For example, during VM migration, updating IGMP memberships will redirect existing multicast streams to the VM at the new location. Signed-off-by: Vladislav Yasevich Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index 8637b2b71f3d..7869ae3837ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1304,6 +1304,7 @@ void netdev_notify_peers(struct net_device *dev) { rtnl_lock(); call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); rtnl_unlock(); } EXPORT_SYMBOL(netdev_notify_peers); From f004ec065b4879d6bc9ba0211af2169b3ce3097f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Mar 2017 14:00:00 +0100 Subject: [PATCH 249/384] mlxsw: reg: Fix SPVM max record count The num_rec field is 8 bit, so the maximal count number is 255. This fixes vlans not being enabled for wider ranges than 255. Fixes: b2e345f9a454 ("mlxsw: reg: Add Switch Port VID and Switch Port VLAN Membership registers definitions") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 0899e2d310e2..65e19422b80a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -769,7 +769,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid) #define MLXSW_REG_SPVM_ID 0x200F #define MLXSW_REG_SPVM_BASE_LEN 0x04 /* base length, without records */ #define MLXSW_REG_SPVM_REC_LEN 0x04 /* record length */ -#define MLXSW_REG_SPVM_REC_MAX_COUNT 256 +#define MLXSW_REG_SPVM_REC_MAX_COUNT 255 #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN + \ MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT) From e9093b1183bbac462d2caef3eac165778c0b1bf1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Mar 2017 14:00:01 +0100 Subject: [PATCH 250/384] mlxsw: reg: Fix SPVMLR max record count The num_rec field is 8 bit, so the maximal count number is 255. This fixes vlans learning not being enabled for wider ranges than 255. Fixes: a4feea74cd7a ("mlxsw: reg: Add Switch Port VLAN MAC Learning register definition") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 65e19422b80a..d9616daf8a70 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1702,7 +1702,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload, #define MLXSW_REG_SPVMLR_ID 0x2020 #define MLXSW_REG_SPVMLR_BASE_LEN 0x04 /* base length, without records */ #define MLXSW_REG_SPVMLR_REC_LEN 0x04 /* record length */ -#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 256 +#define MLXSW_REG_SPVMLR_REC_MAX_COUNT 255 #define MLXSW_REG_SPVMLR_LEN (MLXSW_REG_SPVMLR_BASE_LEN + \ MLXSW_REG_SPVMLR_REC_LEN * \ MLXSW_REG_SPVMLR_REC_MAX_COUNT) From f3e48119b97f56fb09310c95d49da122a27003d7 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 14 Mar 2017 15:25:58 +0200 Subject: [PATCH 251/384] qed: Align CIDs according to DORQ requirement The Doorbell HW block can be configured at a granularity of 16 x CIDs, so we need to make sure that the actual number of CIDs configured would be a multiplication of 16. Today, when RoCE is enabled - given that the number is unaligned, doorbelling the higher CIDs would fail to reach the firmware and would eventually timeout. Fixes: dbb799c39717 ("qed: Initialize hardware for new protocols") Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index d42d03df751a..7e3a6fed3da6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -422,8 +422,9 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn, u32 page_sz = p_mgr->clients[ILT_CLI_CDUC].p_size.val; u32 cxt_size = CONN_CXT_SIZE(p_hwfn); u32 elems_per_page = ILT_PAGE_IN_BYTES(page_sz) / cxt_size; + u32 align = elems_per_page * DQ_RANGE_ALIGN; - p_conn->cid_count = roundup(p_conn->cid_count, elems_per_page); + p_conn->cid_count = roundup(p_conn->cid_count, align); } } From 3ef310a7d99216e0fbdff29f0cb13bc54180373a Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 14 Mar 2017 15:25:59 +0200 Subject: [PATCH 252/384] qed: Prevent creation of too-big u32-chains Current Logic would allow the creation of a chain with U32_MAX + 1 elements, when the actual maximum supported by the driver infrastructure is U32_MAX. Fixes: a91eb52abb50 ("qed: Revisit chain implementation") Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index e2a081ceaf52..e518f914eab1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -2389,9 +2389,8 @@ qed_chain_alloc_sanity_check(struct qed_dev *cdev, * size/capacity fields are of a u32 type. */ if ((cnt_type == QED_CHAIN_CNT_TYPE_U16 && - chain_size > 0x10000) || - (cnt_type == QED_CHAIN_CNT_TYPE_U32 && - chain_size > 0x100000000ULL)) { + chain_size > ((u32)U16_MAX + 1)) || + (cnt_type == QED_CHAIN_CNT_TYPE_U32 && chain_size > U32_MAX)) { DP_NOTICE(cdev, "The actual chain size (0x%llx) is larger than the maximal possible value\n", chain_size); From 752ecb2da11124a948567076b60767dc8034cfa5 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 14 Mar 2017 15:26:00 +0200 Subject: [PATCH 253/384] qed: Fix mapping leak on LL2 rx flow When receiving an Rx LL2 packet, qed fails to unmap the previous buffer. Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support"); Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 5fb34db377c8..29ae5ec2ac2d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -211,6 +211,8 @@ static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, /* If need to reuse or there's no replacement buffer, repost this */ if (rc) goto out_post; + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); skb = build_skb(buffer->data, 0); if (!skb) { From 4621ceb279d065151eb940ce8a4728b10c0646c7 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 14 Mar 2017 15:26:01 +0200 Subject: [PATCH 254/384] qed: Free previous connections when releasing iSCSI Fixes: fc831825f99e ("qed: Add support for hardware offloaded iSCSI") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 28 +++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c index 3a44d6b395fa..7e73b05eeab8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c @@ -786,6 +786,23 @@ static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn, spin_unlock_bh(&p_hwfn->p_iscsi_info->lock); } +void qed_iscsi_free_connection(struct qed_hwfn *p_hwfn, + struct qed_iscsi_conn *p_conn) +{ + qed_chain_free(p_hwfn->cdev, &p_conn->xhq); + qed_chain_free(p_hwfn->cdev, &p_conn->uhq); + qed_chain_free(p_hwfn->cdev, &p_conn->r2tq); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct tcp_upload_params), + p_conn->tcp_upload_params_virt_addr, + p_conn->tcp_upload_params_phys_addr); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct scsi_terminate_extra_params), + p_conn->queue_cnts_virt_addr, + p_conn->queue_cnts_phys_addr); + kfree(p_conn); +} + struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn) { struct qed_iscsi_info *p_iscsi_info; @@ -807,6 +824,17 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn, void qed_iscsi_free(struct qed_hwfn *p_hwfn, struct qed_iscsi_info *p_iscsi_info) { + struct qed_iscsi_conn *p_conn = NULL; + + while (!list_empty(&p_hwfn->p_iscsi_info->free_list)) { + p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list, + struct qed_iscsi_conn, list_entry); + if (p_conn) { + list_del(&p_conn->list_entry); + qed_iscsi_free_connection(p_hwfn, p_conn); + } + } + kfree(p_iscsi_info); } From 1df2adedcce17ad4a39fba74f0e2b611f797fe10 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Tue, 14 Mar 2017 15:26:02 +0200 Subject: [PATCH 255/384] qed: Fix interrupt flags on Rx LL2 Before iterating over the the LL2 Rx ring, the ring's spinlock is taken via spin_lock_irqsave(). The actual processing of the packet [including handling by the protocol driver] is done without said lock, so qed releases the spinlock and re-claims it afterwards. Problem is that the final spin_lock_irqrestore() at the end of the iteration uses the original flags saved from the initial irqsave() instead of the flags from the most recent irqsave(). So it's possible that the interrupt status would be incorrect at the end of the processing. Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support"); CC: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 29ae5ec2ac2d..0d3cef409c96 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -476,7 +476,7 @@ qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn, static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_conn, union core_rx_cqe_union *p_cqe, - unsigned long lock_flags, + unsigned long *p_lock_flags, bool b_last_cqe) { struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; @@ -497,10 +497,10 @@ static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, "Mismatch between active_descq and the LL2 Rx chain\n"); list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); - spin_unlock_irqrestore(&p_rx->lock, lock_flags); + spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags); qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id, p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe); - spin_lock_irqsave(&p_rx->lock, lock_flags); + spin_lock_irqsave(&p_rx->lock, *p_lock_flags); return 0; } @@ -540,7 +540,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) break; case CORE_RX_CQE_TYPE_REGULAR: rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, - cqe, flags, b_last_cqe); + cqe, &flags, + b_last_cqe); break; default: rc = -EIO; From db31d330e8b0ad8926725eb2af6f6422c07ca7ab Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 14 Mar 2017 15:26:03 +0200 Subject: [PATCH 256/384] qed: Correct out-of-bound access in OOO history Need to set the number of entries in database, otherwise the logic would quickly surpass the array. Fixes: 1d6cff4fca43 ("qed: Add iSCSI out of order packet handling") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ooo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c index 7d731c6cb892..378afce58b3f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c @@ -159,6 +159,8 @@ struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn) if (!p_ooo_info->ooo_history.p_cqes) goto no_history_mem; + p_ooo_info->ooo_history.num_of_cqes = QED_MAX_NUM_OOO_HISTORY_ENTRIES; + return p_ooo_info; no_history_mem: From 6b116b1d6a521a1907b3c18cb7a8592a655f660c Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 14 Mar 2017 15:26:04 +0200 Subject: [PATCH 257/384] qed: Enable iSCSI Out-of-Order Missing in the initial submission, qed fails to propagate qedi's request to enable OOO to firmware. Fixes: fc831825f99e ("qed: Add support for hardware offloaded iSCSI") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_iscsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c index 7e73b05eeab8..098766f7fe88 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c @@ -190,6 +190,9 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn, p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring; p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring; p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring; + p_init->ooo_enable = p_params->ooo_enable; + p_init->ll2_rx_queue_id = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] + + p_params->ll2_ooo_queue_id; p_init->func_params.log_page_size = p_params->log_page_size; val = p_params->num_tasks; p_init->func_params.num_tasks = cpu_to_le16(val); From d434936e4cbb10181463622962f30b989d3e9e19 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 13:12:53 +0100 Subject: [PATCH 258/384] mm, x86: Fix native_pud_clear build error We still get a build error in random configurations, after this has been modified a few times: In file included from include/linux/mm.h:68:0, from include/linux/suspend.h:8, from arch/x86/kernel/asm-offsets.c:12: arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear' #define pud_clear(pud) native_pud_clear(pud) My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear definition. Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()") Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Signed-off-by: Arnd Bergmann Acked-by: Dave Jiang Cc: Kees Cook Cc: Dave Hansen Cc: Hugh Dickins Cc: Andrew Morton Cc: Borislav Petkov Cc: Thomas Garnier Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pgtable-3level.h | 3 --- arch/x86/include/asm/pgtable.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 72277b1028a5..50d35e3185f5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ - defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -#endif static inline void pud_clear(pud_t *pudp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cfb36b8c024..585ee0d42d18 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif -#ifndef __PAGETABLE_PMD_FOLDED +#ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif From c236c8e95a3d395b0494e7108f0d41cf36ec107c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Mar 2017 10:27:18 +0100 Subject: [PATCH 259/384] futex: Fix potential use-after-free in FUTEX_REQUEUE_PI While working on the futex code, I stumbled over this potential use-after-free scenario. Dmitry triggered it later with syzkaller. pi_mutex is a pointer into pi_state, which we drop the reference on in unqueue_me_pi(). So any access to that pointer after that is bad. Since other sites already do rt_mutex_unlock() with hb->lock held, see for example futex_lock_pi(), simply move the unlock before unqueue_me_pi(). Reported-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Darren Hart Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: dvhart@infradead.org Cc: bristot@redhat.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170304093558.801744246@infradead.org Signed-off-by: Thomas Gleixner --- kernel/futex.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 229a744b1781..3a4775fd7468 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2815,7 +2815,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, { struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; - struct rt_mutex *pi_mutex = NULL; struct futex_hash_bucket *hb; union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; @@ -2907,6 +2906,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, spin_unlock(q.lock_ptr); } } else { + struct rt_mutex *pi_mutex; + /* * We have been woken up by futex_unlock_pi(), a timeout, or a * signal. futex_unlock_pi() will not destroy the lock_ptr nor @@ -2930,18 +2931,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (res) ret = (res < 0) ? res : 0; + /* + * If fixup_pi_state_owner() faulted and was unable to handle + * the fault, unlock the rt_mutex and return the fault to + * userspace. + */ + if (ret && rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + /* Unqueue and drop the lock. */ unqueue_me_pi(&q); } - /* - * If fixup_pi_state_owner() faulted and was unable to handle the - * fault, unlock the rt_mutex and return the fault to userspace. - */ - if (ret == -EFAULT) { - if (pi_mutex && rt_mutex_owner(pi_mutex) == current) - rt_mutex_unlock(pi_mutex); - } else if (ret == -EINTR) { + if (ret == -EINTR) { /* * We've already been requeued, but cannot restart by calling * futex_lock_pi() directly. We could restart this syscall, but From 9bbb25afeb182502ca4f2c4f3f88af0681b34cae Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 4 Mar 2017 10:27:19 +0100 Subject: [PATCH 260/384] futex: Add missing error handling to FUTEX_REQUEUE_PI Thomas spotted that fixup_pi_state_owner() can return errors and we fail to unlock the rt_mutex in that case. Reported-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Darren Hart Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: dvhart@infradead.org Cc: bristot@redhat.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170304093558.867401760@infradead.org Signed-off-by: Thomas Gleixner --- kernel/futex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 3a4775fd7468..45858ec73941 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2898,6 +2898,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (q.pi_state && (q.pi_state->owner != current)) { spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) + rt_mutex_unlock(&q.pi_state->pi_mutex); /* * Drop the reference to the pi state which * the requeue_pi() code acquired for us. From 87a6b2975f0d340c75b7488d22d61d2f98fb8abf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Mar 2017 23:27:47 -0500 Subject: [PATCH 261/384] x86/unwind: Fix last frame check for aligned function stacks Pavel Machek reported the following warning on x86-32: WARNING: kernel stack frame pointer at f50cdf98 in swapper/2:0 has bad value (null) The warning is caused by the unwinder not realizing that it reached the end of the stack, due to an unusual prologue which gcc sometimes generates for aligned stacks. The prologue is based on a gcc feature called the Dynamic Realign Argument Pointer (DRAP). It's almost always enabled for aligned stacks when -maccumulate-outgoing-args isn't set. This issue is similar to the one fixed by the following commit: 8023e0e2a48d ("x86/unwind: Adjust last frame check for aligned function stacks") ... but that fix was specific to x86-64. Make the fix more generic to cover x86-32 as well, and also ensure that the return address referred to by the frame pointer is a copy of the original return address. Fixes: acb4608ad186 ("x86/unwind: Create stack frames for saved syscall registers") Reported-by: Pavel Machek Signed-off-by: Josh Poimboeuf Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/50d4924db716c264b14f1633037385ec80bf89d2.1489465609.git.jpoimboe@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/unwind_frame.c | 36 ++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 478d15dbaee4..08339262b666 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs) return sizeof(*regs); } +#ifdef CONFIG_X86_32 +#define GCC_REALIGN_WORDS 3 +#else +#define GCC_REALIGN_WORDS 1 +#endif + static bool is_last_task_frame(struct unwind_state *state) { - unsigned long bp = (unsigned long)state->bp; - unsigned long regs = (unsigned long)task_pt_regs(state->task); + unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2; + unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS; /* * We have to check for the last task frame at two different locations * because gcc can occasionally decide to realign the stack pointer and - * change the offset of the stack frame by a word in the prologue of a - * function called by head/entry code. + * change the offset of the stack frame in the prologue of a function + * called by head/entry code. Examples: + * + * : + * push %edi + * lea 0x8(%esp),%edi + * and $0xfffffff8,%esp + * pushl -0x4(%edi) + * push %ebp + * mov %esp,%ebp + * + * : + * lea 0x8(%rsp),%r10 + * and $0xfffffffffffffff0,%rsp + * pushq -0x8(%r10) + * push %rbp + * mov %rsp,%rbp + * + * Note that after aligning the stack, it pushes a duplicate copy of + * the return address before pushing the frame pointer. */ - return bp == regs - FRAME_HEADER_SIZE || - bp == regs - FRAME_HEADER_SIZE - sizeof(long); + return (state->bp == last_bp || + (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1))); } /* From 49ec8f5b6ae3ab60385492cad900ffc8a523c895 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 14 Mar 2017 15:20:53 +0100 Subject: [PATCH 262/384] x86/intel_rdt: Put group node in rdtgroup_kn_unlock The rdtgroup_kn_unlock waits for the last user to release and put its node. But it's calling kernfs_put on the node which calls the rdtgroup_kn_unlock, which might not be the group's directory node, but another group's file node. This race could be easily reproduced by running 2 instances of following script: mount -t resctrl resctrl /sys/fs/resctrl/ pushd /sys/fs/resctrl/ mkdir krava echo "krava" > krava/schemata rmdir krava popd umount /sys/fs/resctrl It triggers the slub debug error message with following command line config: slub_debug=,kernfs_node_cache. Call kernfs_put on the group's node to fix it. Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Signed-off-by: Jiri Olsa Cc: Fenghua Yu Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Shaohua Li Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1489501253-20248-1-git-send-email-jolsa@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index c05509d38b1f..9ac2a5cdd9c2 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -727,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) if (atomic_dec_and_test(&rdtgrp->waitcount) && (rdtgrp->flags & RDT_DELETED)) { kernfs_unbreak_active_protection(kn); - kernfs_put(kn); + kernfs_put(rdtgrp->kn); kfree(rdtgrp); } else { kernfs_unbreak_active_protection(kn); From 655d9ca9ac075da1ef2a45012ba48a39f6eb1f58 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 22:27:11 +0100 Subject: [PATCH 263/384] drm: amd: remove broken include path The AMD ACP driver adds "-I../acp -I../acp/include" to the gcc command line, which makes no sense, since these are evaluated relative to the build directory. When we build with "make W=1", they instead cause a warning: cc1: error: ../acp/: No such file or directory [-Werror=missing-include-dirs] cc1: error: ../acp/include: No such file or directory [-Werror=missing-include-dirs] cc1: all warnings being treated as errors ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.o' failed ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.o' failed ../scripts/Makefile.build:289: recipe for target 'drivers/gpu/drm/amd/amdgpu/amdgpu_kms.o' failed This removes the subdir-ccflags variable that evidently did not serve any purpose here. Signed-off-by: Arnd Bergmann Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/acp/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile index 8363cb57915b..8a08e81ee90d 100644 --- a/drivers/gpu/drm/amd/acp/Makefile +++ b/drivers/gpu/drm/amd/acp/Makefile @@ -3,6 +3,4 @@ # of AMDSOC/AMDGPU drm driver. # It provides the HW control for ACP related functionalities. -subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include - AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o From 630a04e79dd41ff746b545d4fc052e0abb836120 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 15 Mar 2017 00:24:25 -0700 Subject: [PATCH 264/384] xfs: verify inline directory data forks When we're reading or writing the data fork of an inline directory, check the contents to make sure we're not overflowing buffers or eating garbage data. xfs/348 corrupts an inline symlink into an inline directory, triggering a buffer overflow bug. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- v2: add more checks consistent with _dir2_sf_check and make the verifier usable from anywhere. --- fs/xfs/libxfs/xfs_dir2_priv.h | 2 + fs/xfs/libxfs/xfs_dir2_sf.c | 87 ++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_inode_fork.c | 26 ++++++++-- fs/xfs/libxfs/xfs_inode_fork.h | 2 +- fs/xfs/xfs_dir2_readdir.c | 11 ----- fs/xfs/xfs_inode.c | 12 +++-- 6 files changed, 122 insertions(+), 18 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index d04547fcf274..eb00bc133bca 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -125,6 +125,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); +extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp, + int size); /* xfs_dir2_readdir.c */ extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index c6809ff41197..96b45cd6c63f 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -629,6 +629,93 @@ xfs_dir2_sf_check( } #endif /* DEBUG */ +/* Verify the consistency of an inline directory. */ +int +xfs_dir2_sf_verify( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, + int size) +{ + struct xfs_dir2_sf_entry *sfep; + struct xfs_dir2_sf_entry *next_sfep; + char *endp; + const struct xfs_dir_ops *dops; + xfs_ino_t ino; + int i; + int i8count; + int offset; + __uint8_t filetype; + + dops = xfs_dir_get_ops(mp, NULL); + + /* + * Give up if the directory is way too short. + */ + XFS_WANT_CORRUPTED_RETURN(mp, size > + offsetof(struct xfs_dir2_sf_hdr, parent)); + XFS_WANT_CORRUPTED_RETURN(mp, size >= + xfs_dir2_sf_hdr_size(sfp->i8count)); + + endp = (char *)sfp + size; + + /* Check .. entry */ + ino = dops->sf_get_parent_ino(sfp); + i8count = ino > XFS_DIR2_MAX_SHORT_INUM; + XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + offset = dops->data_first_offset; + + /* Check all reported entries */ + sfep = xfs_dir2_sf_firstentry(sfp); + for (i = 0; i < sfp->count; i++) { + /* + * struct xfs_dir2_sf_entry has a variable length. + * Check the fixed-offset parts of the structure are + * within the data buffer. + */ + XFS_WANT_CORRUPTED_RETURN(mp, + ((char *)sfep + sizeof(*sfep)) < endp); + + /* Don't allow names with known bad length. */ + XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0); + XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN); + + /* + * Check that the variable-length part of the structure is + * within the data buffer. The next entry starts after the + * name component, so nextentry is an acceptable test. + */ + next_sfep = dops->sf_nextentry(sfp, sfep); + XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep); + + /* Check that the offsets always increase. */ + XFS_WANT_CORRUPTED_RETURN(mp, + xfs_dir2_sf_get_offset(sfep) >= offset); + + /* Check the inode number. */ + ino = dops->sf_get_ino(sfp, sfep); + i8count += ino > XFS_DIR2_MAX_SHORT_INUM; + XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino)); + + /* Check the file type. */ + filetype = dops->sf_get_ftype(sfep); + XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX); + + offset = xfs_dir2_sf_get_offset(sfep) + + dops->data_entsize(sfep->namelen); + + sfep = next_sfep; + } + XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count); + XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp); + + /* Make sure this whole thing ought to be in local format. */ + XFS_WANT_CORRUPTED_RETURN(mp, offset + + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + + (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize); + + return 0; +} + /* * Create a new (shortform) directory. */ diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 25c1e078aef6..9653e964eda4 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -33,6 +33,8 @@ #include "xfs_trace.h" #include "xfs_attr_sf.h" #include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_priv.h" kmem_zone_t *xfs_ifork_zone; @@ -320,6 +322,7 @@ xfs_iformat_local( int whichfork, int size) { + int error; /* * If the size is unreasonable, then something @@ -336,6 +339,14 @@ xfs_iformat_local( return -EFSCORRUPTED; } + if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { + error = xfs_dir2_sf_verify(ip->i_mount, + (struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip), + size); + if (error) + return error; + } + xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size); return 0; } @@ -856,7 +867,7 @@ xfs_iextents_copy( * In these cases, the format always takes precedence, because the * format indicates the current state of the fork. */ -void +int xfs_iflush_fork( xfs_inode_t *ip, xfs_dinode_t *dip, @@ -866,6 +877,7 @@ xfs_iflush_fork( char *cp; xfs_ifork_t *ifp; xfs_mount_t *mp; + int error; static const short brootflag[2] = { XFS_ILOG_DBROOT, XFS_ILOG_ABROOT }; static const short dataflag[2] = @@ -874,7 +886,7 @@ xfs_iflush_fork( { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; if (!iip) - return; + return 0; ifp = XFS_IFORK_PTR(ip, whichfork); /* * This can happen if we gave up in iformat in an error path, @@ -882,12 +894,19 @@ xfs_iflush_fork( */ if (!ifp) { ASSERT(whichfork == XFS_ATTR_FORK); - return; + return 0; } cp = XFS_DFORK_PTR(dip, whichfork); mp = ip->i_mount; switch (XFS_IFORK_FORMAT(ip, whichfork)) { case XFS_DINODE_FMT_LOCAL: + if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { + error = xfs_dir2_sf_verify(mp, + (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data, + ifp->if_bytes); + if (error) + return error; + } if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { ASSERT(ifp->if_u1.if_data != NULL); @@ -940,6 +959,7 @@ xfs_iflush_fork( ASSERT(0); break; } + return 0; } /* diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 7fb8365326d1..132dc59fdde6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -140,7 +140,7 @@ typedef struct xfs_ifork { struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state); int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *); -void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, +int xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_inode *, int); void xfs_idata_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 003a99b83bd8..ad9396e516f6 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -71,22 +71,11 @@ xfs_dir2_sf_getdents( struct xfs_da_geometry *geo = args->geo; ASSERT(dp->i_df.if_flags & XFS_IFINLINE); - /* - * Give up if the directory is way too short. - */ - if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { - ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount)); - return -EIO; - } - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); ASSERT(dp->i_df.if_u1.if_data != NULL); sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - if (dp->i_d.di_size < xfs_dir2_sf_hdr_size(sfp->i8count)) - return -EFSCORRUPTED; - /* * If the block number in the offset is out of range, we're done. */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7eaf1ef74e3c..c7fe2c2123ab 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3475,6 +3475,7 @@ xfs_iflush_int( struct xfs_inode_log_item *iip = ip->i_itemp; struct xfs_dinode *dip; struct xfs_mount *mp = ip->i_mount; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); @@ -3557,9 +3558,14 @@ xfs_iflush_int( if (ip->i_d.di_flushiter == DI_MAX_FLUSH) ip->i_d.di_flushiter = 0; - xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); - if (XFS_IFORK_Q(ip)) - xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); + error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK); + if (error) + return error; + if (XFS_IFORK_Q(ip)) { + error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK); + if (error) + return error; + } xfs_inobp_check(mp, bp); /* From 28ea06c46fbcab63fd9a55531387b7928a18a590 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 6 Mar 2017 12:58:42 -0500 Subject: [PATCH 265/384] gfs2: Avoid alignment hole in struct lm_lockname Commit 88ffbf3e03 switches to using rhashtables for glocks, hashing over the entire struct lm_lockname instead of its individual fields. On some architectures, struct lm_lockname contains a hole of uninitialized memory due to alignment rules, which now leads to incorrect hash values. Get rid of that hole. Signed-off-by: Andreas Gruenbacher Signed-off-by: Bob Peterson CC: #v4.3+ --- fs/gfs2/incore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c45084ac642d..511e1ed7e2de 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -207,7 +207,7 @@ struct lm_lockname { struct gfs2_sbd *ln_sbd; u64 ln_number; unsigned int ln_type; -}; +} __packed __aligned(sizeof(int)); #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ From 8af42949d1681379c1a97d230de9242c1f4f326a Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 13 Mar 2017 07:41:55 +0900 Subject: [PATCH 266/384] openrisc: xchg: fix `computed is not used` warning When building allmodconfig this warning shows. fs/ocfs2/file.c: In function 'ocfs2_file_write_iter': ./arch/openrisc/include/asm/cmpxchg.h:81:3: warning: value computed is not used [-Wunused-value] ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr)))) ^ Applying the same patch logic that was done to the cmpxchg macro. Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/cmpxchg.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h index 5fcb9ac72693..f0a5d8b844d6 100644 --- a/arch/openrisc/include/asm/cmpxchg.h +++ b/arch/openrisc/include/asm/cmpxchg.h @@ -77,7 +77,11 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, return val; } -#define xchg(ptr, with) \ - ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, with) \ + ({ \ + (__typeof__(*(ptr))) __xchg((unsigned long)(with), \ + (ptr), \ + sizeof(*(ptr))); \ + }) #endif /* __ASM_OPENRISC_CMPXCHG_H */ From 154e67cd8e8f964809d0e75e44bb121b169c75b3 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 13 Mar 2017 07:44:45 +0900 Subject: [PATCH 267/384] openrisc: fix issue handling 8 byte get_user calls Was getting the following error with allmodconfig: ERROR: "__get_user_bad" [lib/test_user_copy.ko] undefined! This was simply a missing break statement, causing an unwanted fall through. Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index 140faa16685a..1311e6b13991 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -211,7 +211,7 @@ do { \ case 1: __get_user_asm(x, ptr, retval, "l.lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "l.lhz"); break; \ case 4: __get_user_asm(x, ptr, retval, "l.lwz"); break; \ - case 8: __get_user_asm2(x, ptr, retval); \ + case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ } while (0) From 363dad58e4a0f72dce0bf12d361d617239a80317 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Tue, 14 Mar 2017 22:52:49 +0900 Subject: [PATCH 268/384] openrisc: Export symbols needed by modules This was detected by allmodconfig, errors reported: ERROR: "empty_zero_page" [net/ceph/libceph.ko] undefined! ERROR: "__ucmpdi2" [lib/842/842_decompress.ko] undefined! ERROR: "empty_zero_page" [fs/nfs/objlayout/objlayoutdriver.ko] undefined! ERROR: "empty_zero_page" [fs/exofs/exofs.ko] undefined! ERROR: "empty_zero_page" [fs/crypto/fscrypto.ko] undefined! ERROR: "__ucmpdi2" [fs/btrfs/btrfs.ko] undefined! ERROR: "pm_power_off" [drivers/regulator/act8865-regulator.ko] undefined! ERROR: "__ucmpdi2" [drivers/media/i2c/adv7842.ko] undefined! ERROR: "__clear_user" [drivers/md/dm-mod.ko] undefined! ERROR: "__clear_user" [net/netfilter/x_tables.ko] undefined! Signed-off-by: Stafford Horne --- arch/openrisc/kernel/or32_ksyms.c | 4 ++++ arch/openrisc/kernel/process.c | 1 + 2 files changed, 5 insertions(+) diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c index 5c4695d13542..ee3e604959e1 100644 --- a/arch/openrisc/kernel/or32_ksyms.c +++ b/arch/openrisc/kernel/or32_ksyms.c @@ -30,6 +30,7 @@ #include #include #include +#include #define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name) @@ -42,6 +43,9 @@ DECLARE_EXPORT(__muldi3); DECLARE_EXPORT(__ashrdi3); DECLARE_EXPORT(__ashldi3); DECLARE_EXPORT(__lshrdi3); +DECLARE_EXPORT(__ucmpdi2); +EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(memset); diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 828a29110459..f8da545854f9 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -90,6 +90,7 @@ void arch_cpu_idle(void) } void (*pm_power_off) (void) = machine_power_off; +EXPORT_SYMBOL(pm_power_off); /* * When a process does an "exec", machine state like FPU and debug From e4c204ced0ac25e02e58679f07096c5bac0b0d96 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 14 Mar 2017 16:18:34 +0100 Subject: [PATCH 269/384] cpufreq: intel_pstate: Avoid percentages in limits-related computations Currently, intel_pstate_update_perf_limits() first converts the policy minimum and maximum limits into percentages of the maximum turbo frequency (rounding up to an integer) and then converts these percentages to fractions (by using fixed-point arithmetic to divide them by 100). That introduces a rounding error unnecessarily, because the fractions can be obtained by carrying out fixed-point divisions directly on the input numbers. Rework the computations in intel_pstate_hwp_set() to use fractions instead of percentages (and drop redundant local variables from there) and modify intel_pstate_update_perf_limits() to compute the fractions directly and percentages out of them. While at it, introduce percent_ext_fp() for converting percentages to fractions (with extended number of fraction bits) and use it in the computations. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 56 +++++++++++++++++----------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ee12641ee010..08e134ffba68 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -84,6 +84,11 @@ static inline u64 div_ext_fp(u64 x, u64 y) return div64_u64(x << EXT_FRAC_BITS, y); } +static inline int32_t percent_ext_fp(int percent) +{ + return div_ext_fp(percent, 100); +} + /** * struct sample - Store performance sample * @core_avg_perf: Ratio of APERF/MPERF which is the actual average @@ -850,7 +855,6 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) u64 value, cap; for_each_cpu(cpu, policy->cpus) { - int max_perf_pct, min_perf_pct; struct cpudata *cpu_data = all_cpu_data[cpu]; s16 epp; @@ -864,16 +868,14 @@ static void intel_pstate_hwp_set(struct cpufreq_policy *policy) else hw_max = HWP_HIGHEST_PERF(cap); - max_perf_pct = perf_limits->max_perf_pct; - min_perf_pct = perf_limits->min_perf_pct; - min = hw_max * min_perf_pct / 100; + min = fp_ext_toint(hw_max * perf_limits->min_perf); rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); value &= ~HWP_MIN_PERF(~0L); value |= HWP_MIN_PERF(min); - max = hw_max * max_perf_pct / 100; + max = fp_ext_toint(hw_max * perf_limits->max_perf); value &= ~HWP_MAX_PERF(~0L); value |= HWP_MAX_PERF(max); @@ -1223,7 +1225,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf_pct = max(limits->min_perf_pct, limits->max_perf_pct); - limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); + limits->max_perf = percent_ext_fp(limits->max_perf_pct); intel_pstate_update_policies(); @@ -1260,7 +1262,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); + limits->min_perf = percent_ext_fp(limits->min_perf_pct); intel_pstate_update_policies(); @@ -2078,36 +2080,34 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) static void intel_pstate_update_perf_limits(struct cpufreq_policy *policy, struct perf_limits *limits) { + int32_t max_policy_perf, min_policy_perf; - limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100, - policy->cpuinfo.max_freq); - limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0, 100); + max_policy_perf = div_ext_fp(policy->max, policy->cpuinfo.max_freq); + max_policy_perf = clamp_t(int32_t, max_policy_perf, 0, int_ext_tofp(1)); if (policy->max == policy->min) { - limits->min_policy_pct = limits->max_policy_pct; + min_policy_perf = max_policy_perf; } else { - limits->min_policy_pct = DIV_ROUND_UP(policy->min * 100, - policy->cpuinfo.max_freq); - limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, - 0, 100); + min_policy_perf = div_ext_fp(policy->min, + policy->cpuinfo.max_freq); + min_policy_perf = clamp_t(int32_t, min_policy_perf, + 0, max_policy_perf); } - /* Normalize user input to [min_policy_pct, max_policy_pct] */ - limits->min_perf_pct = max(limits->min_policy_pct, - limits->min_sysfs_pct); - limits->min_perf_pct = min(limits->max_policy_pct, - limits->min_perf_pct); - limits->max_perf_pct = min(limits->max_policy_pct, - limits->max_sysfs_pct); - limits->max_perf_pct = max(limits->min_policy_pct, - limits->max_perf_pct); + /* Normalize user input to [min_perf, max_perf] */ + limits->min_perf = max(min_policy_perf, + percent_ext_fp(limits->min_sysfs_pct)); + limits->min_perf = min(limits->min_perf, max_policy_perf); + limits->max_perf = min(max_policy_perf, + percent_ext_fp(limits->max_sysfs_pct)); + limits->max_perf = max(min_policy_perf, limits->max_perf); - /* Make sure min_perf_pct <= max_perf_pct */ - limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); + /* Make sure min_perf <= max_perf */ + limits->min_perf = min(limits->min_perf, limits->max_perf); - limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); - limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); limits->max_perf = round_up(limits->max_perf, EXT_FRAC_BITS); limits->min_perf = round_up(limits->min_perf, EXT_FRAC_BITS); + limits->max_perf_pct = fp_ext_toint(limits->max_perf * 100); + limits->min_perf_pct = fp_ext_toint(limits->min_perf * 100); pr_debug("cpu:%d max_perf_pct:%d min_perf_pct:%d\n", policy->cpu, limits->max_perf_pct, limits->min_perf_pct); From 85b29008d8af6d94a0723aaa8d93cfb6e041158b Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:11 -0600 Subject: [PATCH 270/384] scsi: hpsa: update check for logical volume status - Add in a new case for volume offline. Resolves internal testing bug for multilun array management. - Return correct status for failed TURs. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 35 ++++++++++++++++------------------- drivers/scsi/hpsa_cmd.h | 2 ++ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 524a0c755ed7..90b76c4c6d36 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3714,7 +3714,7 @@ static int hpsa_get_volume_status(struct ctlr_info *h, * # (integer code indicating one of several NOT READY states * describing why a volume is to be kept offline) */ -static int hpsa_volume_offline(struct ctlr_info *h, +static unsigned char hpsa_volume_offline(struct ctlr_info *h, unsigned char scsi3addr[]) { struct CommandList *c; @@ -3735,7 +3735,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, DEFAULT_TIMEOUT); if (rc) { cmd_free(h, c); - return 0; + return HPSA_VPD_LV_STATUS_UNSUPPORTED; } sense = c->err_info->SenseInfo; if (c->err_info->SenseLen > sizeof(c->err_info->SenseInfo)) @@ -3746,19 +3746,13 @@ static int hpsa_volume_offline(struct ctlr_info *h, cmd_status = c->err_info->CommandStatus; scsi_status = c->err_info->ScsiStatus; cmd_free(h, c); - /* Is the volume 'not ready'? */ - if (cmd_status != CMD_TARGET_STATUS || - scsi_status != SAM_STAT_CHECK_CONDITION || - sense_key != NOT_READY || - asc != ASC_LUN_NOT_READY) { - return 0; - } /* Determine the reason for not ready state */ ldstat = hpsa_get_volume_status(h, scsi3addr); /* Keep volume offline in certain cases: */ switch (ldstat) { + case HPSA_LV_FAILED: case HPSA_LV_UNDERGOING_ERASE: case HPSA_LV_NOT_AVAILABLE: case HPSA_LV_UNDERGOING_RPI: @@ -3780,7 +3774,7 @@ static int hpsa_volume_offline(struct ctlr_info *h, default: break; } - return 0; + return HPSA_LV_OK; } /* @@ -3853,10 +3847,10 @@ static int hpsa_update_device_info(struct ctlr_info *h, /* Do an inquiry to the device to see what it is. */ if (hpsa_scsi_do_inquiry(h, scsi3addr, 0, inq_buff, (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { - /* Inquiry failed (msg printed already) */ dev_err(&h->pdev->dev, - "hpsa_update_device_info: inquiry failed\n"); - rc = -EIO; + "%s: inquiry failed, device will be skipped.\n", + __func__); + rc = HPSA_INQUIRY_FAILED; goto bail_out; } @@ -3885,15 +3879,19 @@ static int hpsa_update_device_info(struct ctlr_info *h, if ((this_device->devtype == TYPE_DISK || this_device->devtype == TYPE_ZBC) && is_logical_dev_addr_mode(scsi3addr)) { - int volume_offline; + unsigned char volume_offline; hpsa_get_raid_level(h, scsi3addr, &this_device->raid_level); if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC) hpsa_get_ioaccel_status(h, scsi3addr, this_device); volume_offline = hpsa_volume_offline(h, scsi3addr); - if (volume_offline < 0 || volume_offline > 0xff) - volume_offline = HPSA_VPD_LV_STATUS_UNSUPPORTED; - this_device->volume_offline = volume_offline & 0xff; + if (volume_offline == HPSA_LV_FAILED) { + rc = HPSA_LV_FAILED; + dev_err(&h->pdev->dev, + "%s: LV failed, device will be skipped.\n", + __func__); + goto bail_out; + } } else { this_device->raid_level = RAID_UNKNOWN; this_device->offload_config = 0; @@ -4379,8 +4377,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h) goto out; } if (rc) { - dev_warn(&h->pdev->dev, - "Inquiry failed, skipping device.\n"); + h->drv_req_rescan = 1; continue; } diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h index a584cdf07058..5961705eef76 100644 --- a/drivers/scsi/hpsa_cmd.h +++ b/drivers/scsi/hpsa_cmd.h @@ -156,6 +156,7 @@ #define CFGTBL_BusType_Fibre2G 0x00000200l /* VPD Inquiry types */ +#define HPSA_INQUIRY_FAILED 0x02 #define HPSA_VPD_SUPPORTED_PAGES 0x00 #define HPSA_VPD_LV_DEVICE_ID 0x83 #define HPSA_VPD_LV_DEVICE_GEOMETRY 0xC1 @@ -166,6 +167,7 @@ /* Logical volume states */ #define HPSA_VPD_LV_STATUS_UNSUPPORTED 0xff #define HPSA_LV_OK 0x0 +#define HPSA_LV_FAILED 0x01 #define HPSA_LV_NOT_AVAILABLE 0x0b #define HPSA_LV_UNDERGOING_ERASE 0x0F #define HPSA_LV_UNDERGOING_RPI 0x12 From 87b9e6aa87d9411f1059aa245c0c79976bc557ac Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:17 -0600 Subject: [PATCH 271/384] scsi: hpsa: limit outstanding rescans Avoid rescan storms. No need to queue another if one is pending. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Tomas Henzl Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 16 +++++++++++++++- drivers/scsi/hpsa.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 90b76c4c6d36..0a8ac68f4ca1 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5555,7 +5555,7 @@ static void hpsa_scan_complete(struct ctlr_info *h) spin_lock_irqsave(&h->scan_lock, flags); h->scan_finished = 1; - wake_up_all(&h->scan_wait_queue); + wake_up(&h->scan_wait_queue); spin_unlock_irqrestore(&h->scan_lock, flags); } @@ -5573,11 +5573,23 @@ static void hpsa_scan_start(struct Scsi_Host *sh) if (unlikely(lockup_detected(h))) return hpsa_scan_complete(h); + /* + * If a scan is already waiting to run, no need to add another + */ + spin_lock_irqsave(&h->scan_lock, flags); + if (h->scan_waiting) { + spin_unlock_irqrestore(&h->scan_lock, flags); + return; + } + + spin_unlock_irqrestore(&h->scan_lock, flags); + /* wait until any scan already in progress is finished. */ while (1) { spin_lock_irqsave(&h->scan_lock, flags); if (h->scan_finished) break; + h->scan_waiting = 1; spin_unlock_irqrestore(&h->scan_lock, flags); wait_event(h->scan_wait_queue, h->scan_finished); /* Note: We don't need to worry about a race between this @@ -5587,6 +5599,7 @@ static void hpsa_scan_start(struct Scsi_Host *sh) */ } h->scan_finished = 0; /* mark scan as in progress */ + h->scan_waiting = 0; spin_unlock_irqrestore(&h->scan_lock, flags); if (unlikely(lockup_detected(h))) @@ -8789,6 +8802,7 @@ static int hpsa_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) init_waitqueue_head(&h->event_sync_wait_queue); mutex_init(&h->reset_mutex); h->scan_finished = 1; /* no scan currently in progress */ + h->scan_waiting = 0; pci_set_drvdata(pdev, h); h->ndevices = 0; diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index bf6cdc106654..6f04f2ad4125 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -201,6 +201,7 @@ struct ctlr_info { dma_addr_t errinfo_pool_dhandle; unsigned long *cmd_pool_bits; int scan_finished; + u8 scan_waiting : 1; spinlock_t scan_lock; wait_queue_head_t scan_wait_queue; From 2ef2884980873081a4edae92f9d88dd580c85f6e Mon Sep 17 00:00:00 2001 From: Don Brace Date: Fri, 10 Mar 2017 14:35:23 -0600 Subject: [PATCH 272/384] scsi: hpsa: do not timeout reset operations Resets can take longer than DEFAULT_TIMEOUT. Reviewed-by: Scott Benesh Reviewed-by: Scott Teel Reviewed-by: Tomas Henzl Signed-off-by: Don Brace Signed-off-by: Martin K. Petersen --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 0a8ac68f4ca1..0d0be7754a65 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -2956,7 +2956,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr, /* fill_cmd can't fail here, no data buffer to map. */ (void) fill_cmd(c, reset_type, h, NULL, 0, 0, scsi3addr, TYPE_MSG); - rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, DEFAULT_TIMEOUT); + rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT); if (rc) { dev_warn(&h->pdev->dev, "Failed to send reset command\n"); goto out; From 949d7fa158b2b1af533bdb1af0dda8ab103ac58d Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Sun, 12 Mar 2017 12:22:02 +0200 Subject: [PATCH 273/384] scsi: ufs: don't check unsigned type for a negative value Fix compilation warning: drivers/scsi/ufs/ufshcd.c:7645:13: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX)) Signed-off-by: Tomas Winkler Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 1359913bf840..e8c26e6e6237 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7642,7 +7642,7 @@ static inline ssize_t ufshcd_pm_lvl_store(struct device *dev, if (kstrtoul(buf, 0, &value)) return -EINVAL; - if ((value < UFS_PM_LVL_0) || (value >= UFS_PM_LVL_MAX)) + if (value >= UFS_PM_LVL_MAX) return -EINVAL; spin_lock_irqsave(hba->host->host_lock, flags); From 7d7080335f8d93a51e8238b6e85be8af4ba452b6 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 8 Mar 2017 14:36:01 -0800 Subject: [PATCH 274/384] scsi: lpfc: Finalize Kconfig options for nvme Reviewing the result of what was just added for Kconfig, we made a poor choice. It worked well for full kernel builds, but not so much for how it would be deployed on a distro. Here's the final result: - lpfc will compile in NVME initiator and/or NVME target support based on whether the kernel has the corresponding subsystem support. Kconfig is not used to drive this specifically for lpfc. - There is a module parameter, lpfc_enable_fc4_type, that indicates whether the ports will do FCP-only or FCP & NVME (NVME-only not yet possible due to dependency on fc transport). As FCP & NVME divvys up exchange resources, and given NVME will not be often initially, the default is changed to FCP only. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 14 -------------- drivers/scsi/lpfc/lpfc_attr.c | 4 ++-- drivers/scsi/lpfc/lpfc_init.c | 7 +++++++ drivers/scsi/lpfc/lpfc_nvme.c | 8 ++++---- drivers/scsi/lpfc/lpfc_nvmet.c | 8 ++++---- 5 files changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 4bf55b5d78be..3c52867dfe28 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1253,20 +1253,6 @@ config SCSI_LPFC_DEBUG_FS This makes debugging information from the lpfc driver available via the debugfs filesystem. -config LPFC_NVME_INITIATOR - bool "Emulex LightPulse Fibre Channel NVME Initiator Support" - depends on SCSI_LPFC && NVME_FC - ---help--- - This enables NVME Initiator support in the Emulex lpfc driver. - -config LPFC_NVME_TARGET - bool "Emulex LightPulse Fibre Channel NVME Initiator Support" - depends on SCSI_LPFC && NVME_TARGET_FC - ---help--- - This enables NVME Target support in the Emulex lpfc driver. - Target enablement must still be enabled on a per adapter - basis by module parameters. - config SCSI_SIM710 tristate "Simple 53c710 SCSI support (Compaq, NCR machines)" depends on (EISA || MCA) && SCSI diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index fbd3a563be53..84aa62f1a4de 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3315,9 +3315,9 @@ LPFC_ATTR_R(nvmet_mrq_post, LPFC_DEF_MRQ_POST, * lpfc_enable_fc4_type: Defines what FC4 types are supported. * Supported Values: 1 - register just FCP * 3 - register both FCP and NVME - * Supported values are [1,3]. Default value is 3 + * Supported values are [1,3]. Default value is 1 */ -LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH, +LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP, LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH, "Define fc4 type to register with fabric."); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 2697d49da4d7..6cc561b04211 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5891,10 +5891,17 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Check to see if it matches any module parameter */ for (i = 0; i < lpfc_enable_nvmet_cnt; i++) { if (wwn == lpfc_enable_nvmet[i]) { +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "6017 NVME Target %016llx\n", wwn); phba->nvmet_support = 1; /* a match */ +#else + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6021 Can't enable NVME Target." + " NVME_TARGET_FC infrastructure" + " is not in kernel\n"); +#endif } } } diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 0a4c19081409..0024de1c6c1f 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2149,7 +2149,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) /* localport is allocated from the stack, but the registration * call allocates heap memory as well as the private area. */ -#ifdef CONFIG_LPFC_NVME_INITIATOR +#if (IS_ENABLED(CONFIG_NVME_FC)) ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template, &vport->phba->pcidev->dev, &localport); #else @@ -2190,7 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) void lpfc_nvme_destroy_localport(struct lpfc_vport *vport) { -#ifdef CONFIG_LPFC_NVME_INITIATOR +#if (IS_ENABLED(CONFIG_NVME_FC)) struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL; @@ -2274,7 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport) int lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { -#ifdef CONFIG_LPFC_NVME_INITIATOR +#if (IS_ENABLED(CONFIG_NVME_FC)) int ret = 0; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; @@ -2403,7 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) void lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { -#ifdef CONFIG_LPFC_NVME_INITIATOR +#if (IS_ENABLED(CONFIG_NVME_FC)) int ret; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index b7739a554fe0..7ca868f394da 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -671,7 +671,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; -#ifdef CONFIG_LPFC_NVME_TARGET +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, &phba->pcidev->dev, &phba->targetport); @@ -756,7 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) { -#ifdef CONFIG_LPFC_NVME_TARGET +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_tgtport *tgtp; if (phba->nvmet_support == 0) @@ -788,7 +788,7 @@ static void lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct hbq_dmabuf *nvmebuf) { -#ifdef CONFIG_LPFC_NVME_TARGET +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct lpfc_nvmet_rcv_ctx *ctxp; @@ -891,7 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, struct rqb_dmabuf *nvmebuf, uint64_t isr_timestamp) { -#ifdef CONFIG_LPFC_NVME_TARGET +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; From 5f655322b1ba4bd46e26e307d04098f9c84df764 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 14 Mar 2017 11:47:29 -0400 Subject: [PATCH 275/384] parisc: support R_PARISC_SECREL32 relocation in modules The parisc kernel doesn't work with CONFIG_MODVERSIONS since the commit 71810db27c1c853b335675bee335d893bc3d324b. It can't load modules with the error: "module unix: Unknown relocation: 41". The commit changes __kcrctab from 64-bit valus to 32-bit values. The assembler generates R_PARISC_SECREL32 secrel relocation for them and the module loader doesn't support this relocation. This patch adds the R_PARISC_SECREL32 relocation to the module loader. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Helge Deller --- arch/parisc/kernel/module.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index a0ecdb4abcc8..c66c943d9322 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -620,6 +620,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, */ *loc = fsel(val, addend); break; + case R_PARISC_SECREL32: + /* 32-bit section relative address. */ + *loc = fsel(val, addend); + break; case R_PARISC_DPREL21L: /* left 21 bit of relative address */ val = lrsel(val - dp, addend); @@ -807,6 +811,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, */ *loc = fsel(val, addend); break; + case R_PARISC_SECREL32: + /* 32-bit section relative address. */ + *loc = fsel(val, addend); + break; case R_PARISC_FPTR64: /* 64-bit function address */ if(in_local(me, (void *)(val + addend))) { From 316ec0624f951166daedbe446988ef92ae72b59f Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 11 Mar 2017 18:03:34 -0500 Subject: [PATCH 276/384] parisc: Optimize flush_kernel_vmap_range and invalidate_kernel_vmap_range The previously submitted patch did not resolve the random segmentation faults observed on the phantom buildd system. There are still unresolved problems with the Debian 4.8 and 4.9 kernels on C8000. The attached patch removes the flush of the offset map pages and does a whole data cache flush for large ranges. No other arch flushes the offset map in these routines as far as I can tell. I have not observed any random segmentation faults on rp3440 in two weeks of testing with 4.10.0 and 4.10.1. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/cacheflush.h | 23 ++--------------------- arch/parisc/kernel/cache.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 19c9c3c5f267..c7e15cc5c668 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -43,28 +43,9 @@ static inline void flush_kernel_dcache_page(struct page *page) #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); -/* vmap range flushes and invalidates. Architecturally, we don't need - * the invalidate, because the CPU should refuse to speculate once an - * area has been flushed, so invalidate is left empty */ -static inline void flush_kernel_vmap_range(void *vaddr, int size) -{ - unsigned long start = (unsigned long)vaddr; - flush_kernel_dcache_range_asm(start, start + size); -} -static inline void invalidate_kernel_vmap_range(void *vaddr, int size) -{ - unsigned long start = (unsigned long)vaddr; - void *cursor = vaddr; - - for ( ; cursor < vaddr + size; cursor += PAGE_SIZE) { - struct page *page = vmalloc_to_page(cursor); - - if (test_and_clear_bit(PG_dcache_dirty, &page->flags)) - flush_kernel_dcache_page(page); - } - flush_kernel_dcache_range_asm(start, start + size); -} +void flush_kernel_vmap_range(void *vaddr, int size); +void invalidate_kernel_vmap_range(void *vaddr, int size); #define flush_cache_vmap(start, end) flush_cache_all() #define flush_cache_vunmap(start, end) flush_cache_all() diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 0dc72d5de861..c32a09095216 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -616,3 +616,25 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } + +void flush_kernel_vmap_range(void *vaddr, int size) +{ + unsigned long start = (unsigned long)vaddr; + + if ((unsigned long)size > parisc_cache_flush_threshold) + flush_data_cache(); + else + flush_kernel_dcache_range_asm(start, start + size); +} +EXPORT_SYMBOL(flush_kernel_vmap_range); + +void invalidate_kernel_vmap_range(void *vaddr, int size) +{ + unsigned long start = (unsigned long)vaddr; + + if ((unsigned long)size > parisc_cache_flush_threshold) + flush_data_cache(); + else + flush_kernel_dcache_range_asm(start, start + size); +} +EXPORT_SYMBOL(invalidate_kernel_vmap_range); From 63d32d1e09cb2fc65b084b261976c06b40d19115 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 15 Mar 2017 21:10:17 +0100 Subject: [PATCH 277/384] parisc: Wire up statx system call Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/unistd.h | 3 ++- arch/parisc/kernel/syscall_table.S | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 6b0741e7a7ed..667c99421003 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -362,8 +362,9 @@ #define __NR_copy_file_range (__NR_Linux + 346) #define __NR_preadv2 (__NR_Linux + 347) #define __NR_pwritev2 (__NR_Linux + 348) +#define __NR_statx (__NR_Linux + 349) -#define __NR_Linux_syscalls (__NR_pwritev2 + 1) +#define __NR_Linux_syscalls (__NR_statx + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 3cfef1de8061..44aeaa9c039f 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -444,6 +444,7 @@ ENTRY_SAME(copy_file_range) ENTRY_COMP(preadv2) ENTRY_COMP(pwritev2) + ENTRY_SAME(statx) .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) From 0f424de1fd9bc4ab24bd1fe5430ab5618e803e31 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Mar 2017 14:42:03 -0400 Subject: [PATCH 278/384] drm/radeon/si: add dpm quirk for Oland MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems with higher sclks. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index d12b8978142f..72e1588580a1 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2984,6 +2984,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, (rdev->pdev->device == 0x6667)) { max_sclk = 75000; } + } else if (rdev->family == CHIP_OLAND) { + if ((rdev->pdev->device == 0x6604) && + (rdev->pdev->subsystem_vendor == 0x1028) && + (rdev->pdev->subsystem_device == 0x066F)) { + max_sclk = 75000; + } } if (rps->vce_active) { From 18a8de1bc37e97dff1c96ee6cf49adbd02a0f775 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Mar 2017 19:24:19 -0400 Subject: [PATCH 279/384] drm/amdgpu/si: add dpm quirk for Oland MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OLAND 0x1002:0x6604 0x1028:0x066F 0x00 seems to have problems with higher sclks. Acked-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index f55e45b52fbc..33b504bafb88 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3464,6 +3464,12 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, (adev->pdev->device == 0x6667)) { max_sclk = 75000; } + } else if (adev->asic_type == CHIP_OLAND) { + if ((adev->pdev->device == 0x6604) && + (adev->pdev->subsystem_vendor == 0x1028) && + (adev->pdev->subsystem_device == 0x066F)) { + max_sclk = 75000; + } } if (rps->vce_active) { From 801a6aa9a63c90724e8899982ad8c7f16be1e2cd Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Wed, 15 Mar 2017 05:34:25 -0400 Subject: [PATCH 280/384] drm/amd/amdgpu: Fix debugfs reg read/write address width MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MMIO space is wider now so we mask the lower 22 bits instead of 18. Signed-off-by: Tom St Denis Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4120b351a8e5..a3a105ec99e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2590,7 +2590,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, use_bank = 0; } - *pos &= 0x3FFFF; + *pos &= (1UL << 22) - 1; if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || @@ -2666,7 +2666,7 @@ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, use_bank = 0; } - *pos &= 0x3FFFF; + *pos &= (1UL << 22) - 1; if (use_bank) { if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || From 186ecf14e58befba434f0774eea89e35f64d3c6a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 15 Mar 2017 21:48:42 +0100 Subject: [PATCH 281/384] parisc: Avoid compiler warnings with access_ok() Commit 09b871ffd4d8 (parisc: Define access_ok() as macro) missed to mark uaddr as used, which then gives compiler warnings about unused variables. Fix it by comparing uaddr to uaddr which then gets optimized away by the compiler. Signed-off-by: Helge Deller Fixes: 09b871ffd4d8 ("parisc: Define access_ok() as macro") --- arch/parisc/include/asm/uaccess.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index fb4382c28259..edfbf9d6a6dd 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -32,7 +32,8 @@ * that put_user is the same as __put_user, etc. */ -#define access_ok(type, uaddr, size) (1) +#define access_ok(type, uaddr, size) \ + ( (uaddr) == (uaddr) ) #define put_user __put_user #define get_user __get_user From 9b4f603e7a9f4282aec451063ffbbb8bb410dcd9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Mar 2017 00:12:16 +0100 Subject: [PATCH 282/384] cpufreq: Fix and clean up show_cpuinfo_cur_freq() There is a missing newline in show_cpuinfo_cur_freq(), so add it, but while at it clean that function up somewhat too. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Cc: All applicable --- drivers/cpufreq/cpufreq.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 38b9fdf854a4..b8ff617d449d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -680,9 +680,11 @@ static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, char *buf) { unsigned int cur_freq = __cpufreq_get(policy); - if (!cur_freq) - return sprintf(buf, ""); - return sprintf(buf, "%u\n", cur_freq); + + if (cur_freq) + return sprintf(buf, "%u\n", cur_freq); + + return sprintf(buf, "\n"); } /** From 6bce725a78de1b171928ce66dec2bae4b569e5d1 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 8 Mar 2017 14:30:34 +0100 Subject: [PATCH 283/384] x86/mpx: Make unnecessarily global function static Make the function get_user_bd_entry() static as it is not used outside of arch/x86/mm/mpx.c This fixes a sparse warning. Signed-off-by: Tobias Klauser Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/mpx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 5126dfd52b18..cd44ae727df7 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, * we might run off the end of the bounds table if we are on * a 64-bit kernel and try to get 8 bytes. */ -int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, +static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, long __user *bd_entry_ptr) { u32 bd_entry_32; From dcc3b5ffe1b32771c9a22e2c916fb94c4fcf5b79 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 6 Mar 2017 21:51:28 -0800 Subject: [PATCH 284/384] sched/deadline: Add missing update_rq_clock() in dl_task_timer() The following warning can be triggered by hot-unplugging the CPU on which an active SCHED_DEADLINE task is running on: ------------[ cut here ]------------ WARNING: CPU: 7 PID: 0 at kernel/sched/sched.h:833 replenish_dl_entity+0x71e/0xc40 rq->clock_update_flags < RQCF_ACT_SKIP CPU: 7 PID: 0 Comm: swapper/7 Tainted: G B 4.11.0-rc1+ #24 Hardware name: LENOVO ThinkCentre M8500t-N000/SHARKBAY, BIOS FBKTC1AUS 02/16/2016 Call Trace: dump_stack+0x85/0xc4 __warn+0x172/0x1b0 warn_slowpath_fmt+0xb4/0xf0 ? __warn+0x1b0/0x1b0 ? debug_check_no_locks_freed+0x2c0/0x2c0 ? cpudl_set+0x3d/0x2b0 replenish_dl_entity+0x71e/0xc40 enqueue_task_dl+0x2ea/0x12e0 ? dl_task_timer+0x777/0x990 ? __hrtimer_run_queues+0x270/0xa50 dl_task_timer+0x316/0x990 ? enqueue_task_dl+0x12e0/0x12e0 ? enqueue_task_dl+0x12e0/0x12e0 __hrtimer_run_queues+0x270/0xa50 ? hrtimer_cancel+0x20/0x20 ? hrtimer_interrupt+0x119/0x600 hrtimer_interrupt+0x19c/0x600 ? trace_hardirqs_off+0xd/0x10 local_apic_timer_interrupt+0x74/0xe0 smp_apic_timer_interrupt+0x76/0xa0 apic_timer_interrupt+0x93/0xa0 The DL task will be migrated to a suitable later deadline rq once the DL timer fires and currnet rq is offline. The rq clock of the new rq should be updated. This patch fixes it by updating the rq clock after holding the new rq's rq lock. Signed-off-by: Wanpeng Li Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Matt Fleming Cc: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1488865888-15894-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 99b2c33a9fbc..c6db3fd727fe 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -638,6 +638,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) lockdep_unpin_lock(&rq->lock, rf.cookie); rq = dl_task_offline_migration(rq, p); rf.cookie = lockdep_pin_lock(&rq->lock); + update_rq_clock(rq); /* * Now that the task has been migrated to the new RQ and we From 6e5f32f7a43f45ee55c401c0b9585eb01f9629a8 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 17 Feb 2017 12:07:30 +0000 Subject: [PATCH 285/384] sched/loadavg: Avoid loadavg spikes caused by delayed NO_HZ accounting If we crossed a sample window while in NO_HZ we will add LOAD_FREQ to the pending sample window time on exit, setting the next update not one window into the future, but two. This situation on exiting NO_HZ is described by: this_rq->calc_load_update < jiffies < calc_load_update In this scenario, what we should be doing is: this_rq->calc_load_update = calc_load_update [ next window ] But what we actually do is: this_rq->calc_load_update = calc_load_update + LOAD_FREQ [ next+1 window ] This has the effect of delaying load average updates for potentially up to ~9seconds. This can result in huge spikes in the load average values due to per-cpu uninterruptible task counts being out of sync when accumulated across all CPUs. It's safe to update the per-cpu active count if we wake between sample windows because any load that we left in 'calc_load_idle' will have been zero'd when the idle load was folded in calc_global_load(). This issue is easy to reproduce before, commit 9d89c257dfb9 ("sched/fair: Rewrite runnable load and utilization average tracking") just by forking short-lived process pipelines built from ps(1) and grep(1) in a loop. I'm unable to reproduce the spikes after that commit, but the bug still seems to be present from code review. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Mike Galbraith Cc: Morten Rasmussen Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vincent Guittot Fixes: commit 5167e8d ("sched/nohz: Rewrite and fix load-avg computation -- again") Link: http://lkml.kernel.org/r/20170217120731.11868-2-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- kernel/sched/loadavg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 7296b7308eca..3a55f3f9ffe4 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -202,8 +202,9 @@ void calc_load_exit_idle(void) struct rq *this_rq = this_rq(); /* - * If we're still before the sample window, we're done. + * If we're still before the pending sample window, we're done. */ + this_rq->calc_load_update = calc_load_update; if (time_before(jiffies, this_rq->calc_load_update)) return; @@ -212,7 +213,6 @@ void calc_load_exit_idle(void) * accounted through the nohz accounting, so skip the entire deal and * sync up for the next window. */ - this_rq->calc_load_update = calc_load_update; if (time_before(jiffies, this_rq->calc_load_update + 10)) this_rq->calc_load_update += LOAD_FREQ; } From caeb5882979bc6f3c8766fcf59c6269b38f521bc Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 17 Feb 2017 12:07:31 +0000 Subject: [PATCH 286/384] sched/loadavg: Use {READ,WRITE}_ONCE() for sample window 'calc_load_update' is accessed without any kind of locking and there's a clear assumption in the code that only a single value is read or written. Make this explicit by using READ_ONCE() and WRITE_ONCE(), and avoid unintentionally seeing multiple values, or having the load/stores split. Technically the loads in calc_global_*() don't require this since those are the only functions that update 'calc_load_update', but I've added the READ_ONCE() for consistency. Suggested-by: Peter Zijlstra Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Mike Galbraith Cc: Morten Rasmussen Cc: Thomas Gleixner Cc: Vincent Guittot Link: http://lkml.kernel.org/r/20170217120731.11868-3-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- kernel/sched/loadavg.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index 3a55f3f9ffe4..f15fb2bdbc0d 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -169,7 +169,7 @@ static inline int calc_load_write_idx(void) * If the folding window started, make sure we start writing in the * next idle-delta. */ - if (!time_before(jiffies, calc_load_update)) + if (!time_before(jiffies, READ_ONCE(calc_load_update))) idx++; return idx & 1; @@ -204,7 +204,7 @@ void calc_load_exit_idle(void) /* * If we're still before the pending sample window, we're done. */ - this_rq->calc_load_update = calc_load_update; + this_rq->calc_load_update = READ_ONCE(calc_load_update); if (time_before(jiffies, this_rq->calc_load_update)) return; @@ -308,13 +308,15 @@ calc_load_n(unsigned long load, unsigned long exp, */ static void calc_global_nohz(void) { + unsigned long sample_window; long delta, active, n; - if (!time_before(jiffies, calc_load_update + 10)) { + sample_window = READ_ONCE(calc_load_update); + if (!time_before(jiffies, sample_window + 10)) { /* * Catch-up, fold however many we are behind still */ - delta = jiffies - calc_load_update - 10; + delta = jiffies - sample_window - 10; n = 1 + (delta / LOAD_FREQ); active = atomic_long_read(&calc_load_tasks); @@ -324,7 +326,7 @@ static void calc_global_nohz(void) avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); - calc_load_update += n * LOAD_FREQ; + WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ); } /* @@ -352,9 +354,11 @@ static inline void calc_global_nohz(void) { } */ void calc_global_load(unsigned long ticks) { + unsigned long sample_window; long active, delta; - if (time_before(jiffies, calc_load_update + 10)) + sample_window = READ_ONCE(calc_load_update); + if (time_before(jiffies, sample_window + 10)) return; /* @@ -371,7 +375,7 @@ void calc_global_load(unsigned long ticks) avenrun[1] = calc_load(avenrun[1], EXP_5, active); avenrun[2] = calc_load(avenrun[2], EXP_15, active); - calc_load_update += LOAD_FREQ; + WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ); /* * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. From 17fcbd590d0c3e35bd9646e2215f86586378bc42 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 25 Feb 2017 01:17:53 +0100 Subject: [PATCH 287/384] locking/rwsem: Fix down_write_killable() for CONFIG_RWSEM_GENERIC_SPINLOCK=y We hang if SIGKILL has been sent, but the task is stuck in down_read() (after do_exit()), even though no task is doing down_write() on the rwsem in question: INFO: task libupnp:21868 blocked for more than 120 seconds. libupnp D 0 21868 1 0x08100008 ... Call Trace: __schedule() schedule() __down_read() do_exit() do_group_exit() __wake_up_parent() This bug has already been fixed for CONFIG_RWSEM_XCHGADD_ALGORITHM=y in the following commit: 04cafed7fc19 ("locking/rwsem: Fix down_write_killable()") ... however, this bug also exists for CONFIG_RWSEM_GENERIC_SPINLOCK=y. Signed-off-by: Niklas Cassel Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Cc: Andrew Morton Cc: Linus Torvalds Cc: Niklas Cassel Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d47996082f52 ("locking/rwsem: Introduce basis for down_write_killable()") Link: http://lkml.kernel.org/r/1487981873-12649-1-git-send-email-niklass@axis.com Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-spinlock.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 7bc24d477805..c65f7989f850 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -213,10 +213,9 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) */ if (sem->count == 0) break; - if (signal_pending_state(state, current)) { - ret = -EINTR; - goto out; - } + if (signal_pending_state(state, current)) + goto out_nolock; + set_current_state(state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); @@ -224,12 +223,19 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) } /* got the lock */ sem->count = -1; -out: list_del(&waiter.list); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); return ret; + +out_nolock: + list_del(&waiter.list); + if (!list_empty(&sem->wait_list)) + __rwsem_do_wake(sem, 1); + raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return -EINTR; } void __sched __down_write(struct rw_semaphore *sem) From 5ac69d37784b237707a7b15d199cdb6c6fdb6780 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 2 Mar 2017 15:10:57 +0100 Subject: [PATCH 288/384] sched/deadline: Make sure the replenishment timer fires in the next period Currently, the replenishment timer is set to fire at the deadline of a task. Although that works for implicit deadline tasks because the deadline is equals to the begin of the next period, that is not correct for constrained deadline tasks (deadline < period). For instance: f.c: --------------- %< --------------- int main (void) { for(;;); } --------------- >% --------------- # gcc -o f f.c # trace-cmd record -e sched:sched_switch \ -e syscalls:sys_exit_sched_setattr \ chrt -d --sched-runtime 490000000 \ --sched-deadline 500000000 \ --sched-period 1000000000 0 ./f # trace-cmd report | grep "{pid of ./f}" After setting parameters, the task is replenished and continue running until being throttled: f-11295 [003] 13322.113776: sys_exit_sched_setattr: 0x0 The task is throttled after running 492318 ms, as expected: f-11295 [003] 13322.606094: sched_switch: f:11295 [-1] R ==> watchdog/3:32 [0] But then, the task is replenished 500719 ms after the first replenishment: -0 [003] 13322.614495: sched_switch: swapper/3:0 [120] R ==> f:11295 [-1] Running for 490277 ms: f-11295 [003] 13323.104772: sched_switch: f:11295 [-1] R ==> swapper/3:0 [120] Hence, in the first period, the task runs 2 * runtime, and that is a bug. During the first replenishment, the next deadline is set one period away. So the runtime / period starts to be respected. However, as the second replenishment took place in the wrong instant, the next replenishment will also be held in a wrong instant of time. Rather than occurring in the nth period away from the first activation, it is taking place in the (nth period - relative deadline). Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Luca Abeni Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Romulo Silva de Oliveira Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/ac50d89887c25285b47465638354b63362f8adff.1488392936.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index c6db3fd727fe..445e2787bf80 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -505,10 +505,15 @@ static void update_dl_entity(struct sched_dl_entity *dl_se, } } +static inline u64 dl_next_period(struct sched_dl_entity *dl_se) +{ + return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period; +} + /* * If the entity depleted all its runtime, and if we want it to sleep * while waiting for some new execution time to become available, we - * set the bandwidth enforcement timer to the replenishment instant + * set the bandwidth replenishment timer to the replenishment instant * and try to activate it. * * Notice that it is important for the caller to know if the timer @@ -530,7 +535,7 @@ static int start_dl_timer(struct task_struct *p) * that it is actually coming from rq->clock and not from * hrtimer's time base reading. */ - act = ns_to_ktime(dl_se->deadline); + act = ns_to_ktime(dl_next_period(dl_se)); now = hrtimer_cb_get_time(timer); delta = ktime_to_ns(now) - rq_clock(rq); act = ktime_add_ns(act, delta); From df8eac8cafce7d086be3bd5cf5a838fa37594dfb Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Thu, 2 Mar 2017 15:10:58 +0100 Subject: [PATCH 289/384] sched/deadline: Throttle a constrained deadline task activated after the deadline During the activation, CBS checks if it can reuse the current task's runtime and period. If the deadline of the task is in the past, CBS cannot use the runtime, and so it replenishes the task. This rule works fine for implicit deadline tasks (deadline == period), and the CBS was designed for implicit deadline tasks. However, a task with constrained deadline (deadine < period) might be awakened after the deadline, but before the next period. In this case, replenishing the task would allow it to run for runtime / deadline. As in this case deadline < period, CBS enables a task to run for more than the runtime / period. In a very loaded system, this can cause a domino effect, making other tasks miss their deadlines. To avoid this problem, in the activation of a constrained deadline task after the deadline but before the next period, throttle the task and set the replenishing timer to the begin of the next period, unless it is boosted. Reproducer: --------------- %< --------------- int main (int argc, char **argv) { int ret; int flags = 0; unsigned long l = 0; struct timespec ts; struct sched_attr attr; memset(&attr, 0, sizeof(attr)); attr.size = sizeof(attr); attr.sched_policy = SCHED_DEADLINE; attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */ attr.sched_deadline = 2 * 1000 * 1000; /* 2 ms */ attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */ ts.tv_sec = 0; ts.tv_nsec = 2000 * 1000; /* 2 ms */ ret = sched_setattr(0, &attr, flags); if (ret < 0) { perror("sched_setattr"); exit(-1); } for(;;) { /* XXX: you may need to adjust the loop */ for (l = 0; l < 150000; l++); /* * The ideia is to go to sleep right before the deadline * and then wake up before the next period to receive * a new replenishment. */ nanosleep(&ts, NULL); } exit(0); } --------------- >% --------------- On my box, this reproducer uses almost 50% of the CPU time, which is obviously wrong for a task with 2/2000 reservation. Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Cc: Juri Lelli Cc: Linus Torvalds Cc: Luca Abeni Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Romulo Silva de Oliveira Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/edf58354e01db46bf42df8d2dd32418833f68c89.1488392936.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 445e2787bf80..736d8b9d9bab 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -695,6 +695,37 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) timer->function = dl_task_timer; } +/* + * During the activation, CBS checks if it can reuse the current task's + * runtime and period. If the deadline of the task is in the past, CBS + * cannot use the runtime, and so it replenishes the task. This rule + * works fine for implicit deadline tasks (deadline == period), and the + * CBS was designed for implicit deadline tasks. However, a task with + * constrained deadline (deadine < period) might be awakened after the + * deadline, but before the next period. In this case, replenishing the + * task would allow it to run for runtime / deadline. As in this case + * deadline < period, CBS enables a task to run for more than the + * runtime / period. In a very loaded system, this can cause a domino + * effect, making other tasks miss their deadlines. + * + * To avoid this problem, in the activation of a constrained deadline + * task after the deadline but before the next period, throttle the + * task and set the replenishing timer to the begin of the next period, + * unless it is boosted. + */ +static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) +{ + struct task_struct *p = dl_task_of(dl_se); + struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se)); + + if (dl_time_before(dl_se->deadline, rq_clock(rq)) && + dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { + if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) + return; + dl_se->dl_throttled = 1; + } +} + static int dl_runtime_exceeded(struct sched_dl_entity *dl_se) { @@ -928,6 +959,11 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se) __dequeue_dl_entity(dl_se); } +static inline bool dl_is_constrained(struct sched_dl_entity *dl_se) +{ + return dl_se->dl_deadline < dl_se->dl_period; +} + static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) { struct task_struct *pi_task = rt_mutex_get_top_task(p); @@ -953,6 +989,15 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) return; } + /* + * Check if a constrained deadline task was activated + * after the deadline but before the next period. + * If that is the case, the task will be throttled and + * the replenishment timer will be set to the next period. + */ + if (!p->dl.dl_throttled && dl_is_constrained(&p->dl)) + dl_check_constrained_dl(&p->dl); + /* * If p is throttled, we do nothing. In fact, if it exhausted * its budget it needs a replenishment and, since it now is on From 2317d5f1c34913bac5971d93d69fb6c31bb74670 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 2 Mar 2017 15:10:59 +0100 Subject: [PATCH 290/384] sched/deadline: Use deadline instead of period when calculating overflow I was testing Daniel's changes with his test case, and tweaked it a little. Instead of having the runtime equal to the deadline, I increased the deadline ten fold. Daniel's test case had: attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */ attr.sched_deadline = 2 * 1000 * 1000; /* 2 ms */ attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */ To make it more interesting, I changed it to: attr.sched_runtime = 2 * 1000 * 1000; /* 2 ms */ attr.sched_deadline = 20 * 1000 * 1000; /* 20 ms */ attr.sched_period = 2 * 1000 * 1000 * 1000; /* 2 s */ The results were rather surprising. The behavior that Daniel's patch was fixing came back. The task started using much more than .1% of the CPU. More like 20%. Looking into this I found that it was due to the dl_entity_overflow() constantly returning true. That's because it uses the relative period against relative runtime vs the absolute deadline against absolute runtime. runtime / (deadline - t) > dl_runtime / dl_period There's even a comment mentioning this, and saying that when relative deadline equals relative period, that the equation is the same as using deadline instead of period. That comment is backwards! What we really want is: runtime / (deadline - t) > dl_runtime / dl_deadline We care about if the runtime can make its deadline, not its period. And then we can say "when the deadline equals the period, the equation is the same as using dl_period instead of dl_deadline". After correcting this, now when the task gets enqueued, it can throttle correctly, and Daniel's fix to the throttling of sleeping deadline tasks works even when the runtime and deadline are not the same. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Linus Torvalds Cc: Luca Abeni Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Romulo Silva de Oliveira Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tommaso Cucinotta Link: http://lkml.kernel.org/r/02135a27f1ae3fe5fd032568a5a2f370e190e8d7.1488392936.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- kernel/sched/deadline.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 736d8b9d9bab..a2ce59015642 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -445,13 +445,13 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * * This function returns true if: * - * runtime / (deadline - t) > dl_runtime / dl_period , + * runtime / (deadline - t) > dl_runtime / dl_deadline , * * IOW we can't recycle current parameters. * - * Notice that the bandwidth check is done against the period. For + * Notice that the bandwidth check is done against the deadline. For * task with deadline equal to period this is the same of using - * dl_deadline instead of dl_period in the equation above. + * dl_period instead of dl_deadline in the equation above. */ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, struct sched_dl_entity *pi_se, u64 t) @@ -476,7 +476,7 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, * of anything below microseconds resolution is actually fiction * (but still we want to give the user that illusion >;). */ - left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); + left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); right = ((dl_se->deadline - t) >> DL_SCALE) * (pi_se->dl_runtime >> DL_SCALE); From f717629c7f834ab2efa05c7dbf0826f1d7c32ade Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Thu, 16 Mar 2017 14:37:11 +0530 Subject: [PATCH 291/384] powerpc: Wire up statx() syscall Test runs on a ppc64 BE guest succeeded. linux/samples/statx/test-statx program was executed on the following file types, 1. Regular file 2. Directory 3. device file 4. symlink 5. Named pipe The test run also included invoking test-statx with the runtime options provided in the main() function of test-statx.c Signed-off-by: Chandan Rajendra Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 4b369d83fe9c..1c9470881c4a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -387,3 +387,4 @@ SYSCALL(copy_file_range) COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) SYSCALL(kexec_file_load) +SYSCALL(statx) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index eb1acee91a20..9ba11dbcaca9 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define NR_syscalls 383 +#define NR_syscalls 384 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index 2f26335a3c42..b85f14228857 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -393,5 +393,6 @@ #define __NR_preadv2 380 #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 +#define __NR_statx 383 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ From e552a8389aa409e257b7dcba74f67f128f979ccc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:48 +0100 Subject: [PATCH 292/384] perf/core: Fix use-after-free in perf_release() Dmitry reported syzcaller tripped a use-after-free in perf_release(). After much puzzlement Oleg spotted the below scenario: Task1 Task2 fork() perf_event_init_task() /* ... */ goto bad_fork_$foo; /* ... */ perf_event_free_task() mutex_lock(ctx->lock) perf_free_event(B) perf_event_release_kernel(A) mutex_lock(A->child_mutex) list_for_each_entry(child, ...) { /* child == B */ ctx = B->ctx; get_ctx(ctx); mutex_unlock(A->child_mutex); mutex_lock(A->child_mutex) list_del_init(B->child_list) mutex_unlock(A->child_mutex) /* ... */ mutex_unlock(ctx->lock); put_ctx() /* >0 */ free_task(); mutex_lock(ctx->lock); mutex_lock(A->child_mutex); /* ... */ mutex_unlock(A->child_mutex); mutex_unlock(ctx->lock) put_ctx() /* 0 */ ctx->task && !TOMBSTONE put_task_struct() /* UAF */ This patch closes the hole by making perf_event_free_task() destroy the task <-> ctx relation such that perf_event_release_kernel() will no longer observe the now dead task. Spotted-by: Oleg Nesterov Reported-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: fweisbec@gmail.com Cc: oleg@redhat.com Cc: stable@vger.kernel.org Fixes: c6e5b73242d2 ("perf: Synchronously clean up child events") Link: http://lkml.kernel.org/r/20170314155949.GE32474@worktop Link: http://lkml.kernel.org/r/20170316125823.140295131@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1031bdf9f012..4742909c56e6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10415,6 +10415,17 @@ void perf_event_free_task(struct task_struct *task) continue; mutex_lock(&ctx->mutex); + raw_spin_lock_irq(&ctx->lock); + /* + * Destroy the task <-> ctx relation and mark the context dead. + * + * This is important because even though the task hasn't been + * exposed yet the context has been (through child_list). + */ + RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL); + WRITE_ONCE(ctx->task, TASK_TOMBSTONE); + put_task_struct(task); /* cannot be last */ + raw_spin_unlock_irq(&ctx->lock); again: list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) From e7cc4865f0f31698ef2f7aac01a50e78968985b7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:49 +0100 Subject: [PATCH 293/384] perf/core: Fix event inheritance on fork() While hunting for clues to a use-after-free, Oleg spotted that perf_event_init_context() can loose an error value with the result that fork() can succeed even though we did not fully inherit the perf event context. Spotted-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: oleg@redhat.com Cc: stable@vger.kernel.org Fixes: 889ff0150661 ("perf/core: Split context's event group list into pinned and non-pinned lists") Link: http://lkml.kernel.org/r/20170316125823.190342547@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4742909c56e6..fc7c9a85944d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10679,7 +10679,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } /* @@ -10695,7 +10695,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) ret = inherit_task_group(event, parent, parent_ctx, child, ctxn, &inherited_all); if (ret) - break; + goto out_unlock; } raw_spin_lock_irqsave(&parent_ctx->lock, flags); @@ -10723,6 +10723,7 @@ static int perf_event_init_context(struct task_struct *child, int ctxn) } raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +out_unlock: mutex_unlock(&parent_ctx->mutex); perf_unpin_context(parent_ctx); From 15121c789e001168decac6483d192bdb7ea29e74 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:50 +0100 Subject: [PATCH 294/384] perf/core: Simplify perf_event_free_task() We have ctx->event_list that contains all events; no need to repeatedly iterate the group lists to find them all. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/r/20170316125823.239678244@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index fc7c9a85944d..5f21e5e09ba4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10426,21 +10426,11 @@ void perf_event_free_task(struct task_struct *task) WRITE_ONCE(ctx->task, TASK_TOMBSTONE); put_task_struct(task); /* cannot be last */ raw_spin_unlock_irq(&ctx->lock); -again: - list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, - group_entry) - perf_free_event(event, ctx); - list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, - group_entry) + list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) perf_free_event(event, ctx); - if (!list_empty(&ctx->pinned_groups) || - !list_empty(&ctx->flexible_groups)) - goto again; - mutex_unlock(&ctx->mutex); - put_ctx(ctx); } } From d8a8cfc76919b6c830305266b23ba671623f37ff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Mar 2017 13:47:51 +0100 Subject: [PATCH 295/384] perf/core: Better explain the inherit magic While going through the event inheritance code Oleg got confused. Add some comments to better explain the silent dissapearance of orphaned events. So what happens is that at perf_event_release_kernel() time; when an event looses its connection to userspace (and ceases to exist from the user's perspective) we can still have an arbitrary amount of inherited copies of the event. We want to synchronously find and remove all these child events. Since that requires a bit of lock juggling, there is the possibility that concurrent clone()s will create new child events. Therefore we first mark the parent event as DEAD, which marks all the extant child events as orphaned. We then avoid copying orphaned events; in order to avoid getting more of them. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Dmitry Vyukov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/r/20170316125823.289567442@infradead.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5f21e5e09ba4..7298e149b732 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4254,7 +4254,7 @@ int perf_event_release_kernel(struct perf_event *event) raw_spin_lock_irq(&ctx->lock); /* - * Mark this even as STATE_DEAD, there is no external reference to it + * Mark this event as STATE_DEAD, there is no external reference to it * anymore. * * Anybody acquiring event->child_mutex after the below loop _must_ @@ -10468,7 +10468,12 @@ const struct perf_event_attr *perf_event_attrs(struct perf_event *event) } /* - * inherit a event from parent task to child task: + * Inherit a event from parent task to child task. + * + * Returns: + * - valid pointer on success + * - NULL for orphaned events + * - IS_ERR() on error */ static struct perf_event * inherit_event(struct perf_event *parent_event, @@ -10562,6 +10567,16 @@ inherit_event(struct perf_event *parent_event, return child_event; } +/* + * Inherits an event group. + * + * This will quietly suppress orphaned events; !inherit_event() is not an error. + * This matches with perf_event_release_kernel() removing all child events. + * + * Returns: + * - 0 on success + * - <0 on error + */ static int inherit_group(struct perf_event *parent_event, struct task_struct *parent, struct perf_event_context *parent_ctx, @@ -10576,6 +10591,11 @@ static int inherit_group(struct perf_event *parent_event, child, NULL, child_ctx); if (IS_ERR(leader)) return PTR_ERR(leader); + /* + * @leader can be NULL here because of is_orphaned_event(). In this + * case inherit_event() will create individual events, similar to what + * perf_group_detach() would do anyway. + */ list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { child_ctr = inherit_event(sub, parent, parent_ctx, child, leader, child_ctx); @@ -10585,6 +10605,17 @@ static int inherit_group(struct perf_event *parent_event, return 0; } +/* + * Creates the child task context and tries to inherit the event-group. + * + * Clears @inherited_all on !attr.inherited or error. Note that we'll leave + * inherited_all set when we 'fail' to inherit an orphaned event; this is + * consistent with perf_event_release_kernel() removing all child events. + * + * Returns: + * - 0 on success + * - <0 on error + */ static int inherit_task_group(struct perf_event *event, struct task_struct *parent, struct perf_event_context *parent_ctx, @@ -10607,7 +10638,6 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, * First allocate and initialize a context for the * child. */ - child_ctx = alloc_perf_context(parent_ctx->pmu, child); if (!child_ctx) return -ENOMEM; From 29c8bbbd6e21daa0997d1c3ee886b897ee7ad652 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:43 +0000 Subject: [PATCH 296/384] afs: Fix missing put_page() In afs_writepages_region(), inside the loop where we find dirty pages to deal with, one of the if-statements is missing a put_page(). Signed-off-by: David Howells --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index c83c1a0e851f..e919e64cd4e0 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -513,6 +513,7 @@ static int afs_writepages_region(struct address_space *mapping, if (PageWriteback(page) || !PageDirty(page)) { unlock_page(page); + put_page(page); continue; } From 5611ef280d814042825ee17688f5751266fc538b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:43 +0000 Subject: [PATCH 297/384] afs: Fix page overput in afs_fill_page() afs_fill_page() loads the page it wants to fill into the afs_read request without incrementing its refcount - but then calls afs_put_read() to clean up afterwards, which then releases a ref on the page. Fix this by getting a ref on the page before calling afs_vnode_fetch_data(). This causes sync after a write to hang in afs_writepages_region() because find_get_pages_tag() gets confused and doesn't return. Fixes: 196ee9cd2d04 ("afs: Make afs_fs_fetch_data() take a list of pages") Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/afs/write.c b/fs/afs/write.c index e919e64cd4e0..3ac52f6a96ff 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -101,6 +101,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, req->pos = pos; req->nr_pages = 1; req->pages[0] = page; + get_page(page); i_size = i_size_read(&vnode->vfs_inode); if (pos + PAGE_SIZE > i_size) From 6186f0788b31f44affceeedc7b48eb10faea120d Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:43 +0000 Subject: [PATCH 298/384] afs: Populate group ID from vnode status The group was hard coded to GLOBAL_ROOT_GID; use the group ID that was received from the server. Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1e4897a048d2..299dbaeb2e2a 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -70,7 +70,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) set_nlink(inode, vnode->status.nlink); inode->i_uid = vnode->status.owner; - inode->i_gid = GLOBAL_ROOT_GID; + inode->i_gid = vnode->status.group; inode->i_size = vnode->status.size; inode->i_ctime.tv_sec = vnode->status.mtime_server; inode->i_ctime.tv_nsec = 0; From 627f46943ff90bcc32ddeb675d881c043c6fa2ae Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: [PATCH 299/384] afs: Adjust mode bits processing Mode bits for an afs file should not be enforced in the usual way. For files, the absence of user bits can restrict file access with respect to what is granted by the server. These bits apply regardless of the owner or the current uid; the rest of the mode bits (group, other) are ignored. Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/security.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index 8d010422dc89..bfa9d3428383 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -340,17 +340,22 @@ int afs_permission(struct inode *inode, int mask) } else { if (!(access & AFS_ACE_LOOKUP)) goto permission_denied; + if ((mask & MAY_EXEC) && !(inode->i_mode & S_IXUSR)) + goto permission_denied; if (mask & (MAY_EXEC | MAY_READ)) { if (!(access & AFS_ACE_READ)) goto permission_denied; + if (!(inode->i_mode & S_IRUSR)) + goto permission_denied; } else if (mask & MAY_WRITE) { if (!(access & AFS_ACE_WRITE)) goto permission_denied; + if (!(inode->i_mode & S_IWUSR)) + goto permission_denied; } } key_put(key); - ret = generic_permission(inode, mask); _leave(" = %d", ret); return ret; From bcd89270d93b7edebb5de5e5e7dca1a77a33496e Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: [PATCH 300/384] afs: Deal with an empty callback array Servers may send a callback array that is the same size as the FID array, or an empty array. If the callback count is 0, the code would attempt to read (fid_count * 12) bytes of data, which would fail and result in an unmarshalling error. This would lead to stale data for remotely modified files or directories. Store the callback array size in the internal afs_call structure and use that to determine the amount of data to read. Signed-off-by: Marc Dionne --- fs/afs/cmservice.c | 11 +++++------ fs/afs/internal.h | 5 ++++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 2edbdcbf6432..3062cceb5c2a 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -187,7 +187,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) struct afs_callback *cb; struct afs_server *server; __be32 *bp; - u32 tmp; int ret, loop; _enter("{%u}", call->unmarshall); @@ -249,9 +248,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) if (ret < 0) return ret; - tmp = ntohl(call->tmp); - _debug("CB count: %u", tmp); - if (tmp != call->count && tmp != 0) + call->count2 = ntohl(call->tmp); + _debug("CB count: %u", call->count2); + if (call->count2 != call->count && call->count2 != 0) return -EBADMSG; call->offset = 0; call->unmarshall++; @@ -259,14 +258,14 @@ static int afs_deliver_cb_callback(struct afs_call *call) case 4: _debug("extract CB array"); ret = afs_extract_data(call, call->buffer, - call->count * 3 * 4, false); + call->count2 * 3 * 4, false); if (ret < 0) return ret; _debug("unmarshall CB array"); cb = call->request; bp = call->buffer; - for (loop = call->count; loop > 0; loop--, cb++) { + for (loop = call->count2; loop > 0; loop--, cb++) { cb->version = ntohl(*bp++); cb->expiry = ntohl(*bp++); cb->type = ntohl(*bp++); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5dfa56903a2d..8499870147ef 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -90,7 +90,10 @@ struct afs_call { unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ unsigned first_offset; /* offset into mapping[first] */ - unsigned last_to; /* amount of mapping[last] */ + union { + unsigned last_to; /* amount of mapping[last] */ + unsigned count2; /* count used in unmarshalling */ + }; unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ From 6db3ac3c4bc552837d232ec794559a2fae2815a0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: [PATCH 301/384] afs: Handle better the server returning excess or short data When an AFS server is given an FS.FetchData{,64} request to read data from a file, it is permitted by the protocol to return more or less than was requested. kafs currently relies on the latter behaviour in readpage{,s} to handle a partial page at the end of the file (we just ask for a whole page and clear space beyond the short read). However, we don't handle all cases. Add: (1) Handle excess data by discarding it rather than aborting. Note that we use a common static buffer to discard into so that the decryption algorithm advances the PCBC state. (2) Handle a short read that affects more than just the last page. Note that if a read comes up unexpectedly short of long, it's possible that the server's copy of the file changed - in which case the data version number will have been incremented and the callback will have been broken - in which case all the pages currently attached to the inode will be zapped anyway at some point. Signed-off-by: David Howells --- fs/afs/file.c | 7 +++++-- fs/afs/fsclient.c | 49 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index ba7b71fba34b..a38e1c30d110 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -184,10 +184,13 @@ int afs_page_filler(void *data, struct page *page) if (!req) goto enomem; + /* We request a full page. If the page is a partial one at the + * end of the file, the server will return a short read and the + * unmarshalling code will clear the unfilled space. + */ atomic_set(&req->usage, 1); req->pos = (loff_t)page->index << PAGE_SHIFT; - req->len = min_t(size_t, i_size_read(inode) - req->pos, - PAGE_SIZE); + req->len = PAGE_SIZE; req->nr_pages = 1; req->pages[0] = page; get_page(page); diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index ac8e766978dc..bf8904a1a58f 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -16,6 +16,12 @@ #include "internal.h" #include "afs_fs.h" +/* + * We need somewhere to discard into in case the server helpfully returns more + * than we asked for in FS.FetchData{,64}. + */ +static u8 afs_discard_buffer[64]; + /* * decode an AFSFid block */ @@ -353,12 +359,6 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) req->actual_len |= ntohl(call->tmp); _debug("DATA length: %llu", req->actual_len); - /* Check that the server didn't want to send us extra. We - * might want to just discard instead, but that requires - * cooperation from AF_RXRPC. - */ - if (req->actual_len > req->len) - return -EBADMSG; req->remain = req->actual_len; call->offset = req->pos & (PAGE_SIZE - 1); @@ -368,6 +368,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->unmarshall++; begin_page: + ASSERTCMP(req->index, <, req->nr_pages); if (req->remain > PAGE_SIZE - call->offset) size = PAGE_SIZE - call->offset; else @@ -390,18 +391,37 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (req->page_done) req->page_done(call, req); if (req->remain > 0) { - req->index++; call->offset = 0; + req->index++; + if (req->index >= req->nr_pages) + goto begin_discard; goto begin_page; } } + goto no_more_data; + + /* Discard any excess data the server gave us */ + begin_discard: + case 4: + size = min_t(size_t, sizeof(afs_discard_buffer), req->remain); + call->count = size; + _debug("extract discard %u/%llu %zu/%u", + req->remain, req->actual_len, call->offset, call->count); + + call->offset = 0; + ret = afs_extract_data(call, afs_discard_buffer, call->count, true); + req->remain -= call->offset; + if (ret < 0) + return ret; + if (req->remain > 0) + goto begin_discard; no_more_data: call->offset = 0; - call->unmarshall++; + call->unmarshall = 5; /* extract the metadata */ - case 4: + case 5: ret = afs_extract_data(call, call->buffer, (21 + 3 + 6) * 4, false); if (ret < 0) @@ -416,16 +436,17 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->offset = 0; call->unmarshall++; - case 5: + case 6: break; } - if (call->count < PAGE_SIZE) { - buffer = kmap(req->pages[req->index]); - memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap(req->pages[req->index]); + for (; req->index < req->nr_pages; req->index++) { + if (call->count < PAGE_SIZE) + zero_user_segment(req->pages[req->index], + call->count, PAGE_SIZE); if (req->page_done) req->page_done(call, req); + call->count = 0; } _leave(" = 0 [done]"); From 3448e6521755862446aed28e29abf12565d8844e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: [PATCH 302/384] afs: Kill struct afs_read::pg_offset Kill struct afs_read::pg_offset as nothing uses it. It's unnecessary as pos can be masked off. Signed-off-by: David Howells --- fs/afs/internal.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8499870147ef..7784a8bc375c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -135,7 +135,6 @@ struct afs_read { atomic_t usage; unsigned int remain; /* Amount remaining */ unsigned int index; /* Which page we're reading into */ - unsigned int pg_offset; /* Offset in page we're at */ unsigned int nr_pages; void (*page_done)(struct afs_call *, struct afs_read *); struct page *pages[]; From e8e581a88c5f5fc7cf1f636d122b77fbcfc8c2f6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:44 +0000 Subject: [PATCH 303/384] afs: Handle a short write to an AFS page Handle the situation where afs_write_begin() is told to expect that a full-page write will be made, but this doesn't happen (EFAULT, CTRL-C, etc.), and so afs_write_end() sees a partial write took place. Currently, no attempt is to deal with the discrepency. Fix this by loading the gap from the server. Reported-by: Al Viro Signed-off-by: David Howells --- fs/afs/fsclient.c | 4 +++- fs/afs/internal.h | 2 +- fs/afs/write.c | 28 +++++++++++++++++++--------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index bf8904a1a58f..6f917dd1238c 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -393,8 +393,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (req->remain > 0) { call->offset = 0; req->index++; - if (req->index >= req->nr_pages) + if (req->index >= req->nr_pages) { + call->unmarshall = 4; goto begin_discard; + } goto begin_page; } } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 7784a8bc375c..dc2cb486e127 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -130,7 +130,7 @@ struct afs_call_type { */ struct afs_read { loff_t pos; /* Where to start reading */ - loff_t len; /* How much to read */ + loff_t len; /* How much we're asking for */ loff_t actual_len; /* How much we're actually getting */ atomic_t usage; unsigned int remain; /* Amount remaining */ diff --git a/fs/afs/write.c b/fs/afs/write.c index 3ac52f6a96ff..ea66890fc188 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -84,10 +84,9 @@ void afs_put_writeback(struct afs_writeback *wb) * partly or wholly fill a page that's under preparation for writing */ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, - loff_t pos, struct page *page) + loff_t pos, unsigned int len, struct page *page) { struct afs_read *req; - loff_t i_size; int ret; _enter(",,%llu", (unsigned long long)pos); @@ -99,16 +98,11 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, atomic_set(&req->usage, 1); req->pos = pos; + req->len = len; req->nr_pages = 1; req->pages[0] = page; get_page(page); - i_size = i_size_read(&vnode->vfs_inode); - if (pos + PAGE_SIZE > i_size) - req->len = i_size - pos; - else - req->len = PAGE_SIZE; - ret = afs_vnode_fetch_data(vnode, key, req); afs_put_read(req); if (ret < 0) { @@ -164,7 +158,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, /* page won't leak in error case: it eventually gets cleaned off LRU */ if (!PageUptodate(page) && len != PAGE_SIZE) { - ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page); + ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { kfree(candidate); _leave(" = %d [prep]", ret); @@ -258,7 +252,9 @@ int afs_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); + struct key *key = file->private_data; loff_t i_size, maybe_i_size; + int ret; _enter("{%x:%u},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); @@ -274,6 +270,20 @@ int afs_write_end(struct file *file, struct address_space *mapping, spin_unlock(&vnode->writeback_lock); } + if (!PageUptodate(page)) { + if (copied < len) { + /* Try and load any missing data from the server. The + * unmarshalling routine will take care of clearing any + * bits that are beyond the EOF. + */ + ret = afs_fill_page(vnode, key, pos + copied, + len - copied, page); + if (ret < 0) + return ret; + } + SetPageUptodate(page); + } + set_page_dirty(page); if (PageDirty(page)) _debug("dirtied"); From 58fed94dfb17e89556b5705f20f90e5b2971b6a1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: [PATCH 304/384] afs: Flush outstanding writes when an fd is closed Flush outstanding writes in afs when an fd is closed. This is what NFS and CIFS do. Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/file.c | 1 + fs/afs/internal.h | 1 + fs/afs/write.c | 14 ++++++++++++++ 3 files changed, 16 insertions(+) diff --git a/fs/afs/file.c b/fs/afs/file.c index a38e1c30d110..b5829443ff69 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -30,6 +30,7 @@ static int afs_readpages(struct file *filp, struct address_space *mapping, const struct file_operations afs_file_operations = { .open = afs_open, + .flush = afs_flush, .release = afs_release, .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index dc2cb486e127..af1d91ec7f2c 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -720,6 +720,7 @@ extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_writeback_all(struct afs_vnode *); +extern int afs_flush(struct file *, fl_owner_t); extern int afs_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/afs/write.c b/fs/afs/write.c index ea66890fc188..f1450ea09406 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -757,6 +757,20 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; } +/* + * Flush out all outstanding writes on a file opened for writing when it is + * closed. + */ +int afs_flush(struct file *file, fl_owner_t id) +{ + _enter(""); + + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; + + return vfs_fsync(file, 0); +} + /* * notification that a previously read-only page is about to become writable * - if it returns an error, the caller will deliver a bus error signal From 944c74f472f926785b1948efa0e73e2f1b3b539b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: [PATCH 305/384] afs: Distinguish mountpoints from symlinks by file mode alone In AFS, mountpoints appear as symlinks with mode 0644 and normal symlinks have mode 0777, so use this to distinguish them rather than reading the content and parsing it. In the case of a mountpoint, the symlink body is a formatted string indicating the location of the target volume. Note that with this, kAFS no longer 'pre-fetches' the contents of symlinks, so afs_readpage() may fail with an access-denial because when the VFS calls d_automount(), it wraps the call in an credentials override that sets the initial creds - thereby preventing access to the caller's keyrings and the authentication keys held therein. To this end, a patch reverting that change to the VFS is required also. Reported-by: Jeffrey Altman Signed-off-by: David Howells --- fs/afs/inode.c | 29 +++++++++++++------------- fs/afs/internal.h | 1 - fs/afs/mntpt.c | 53 ----------------------------------------------- 3 files changed, 15 insertions(+), 68 deletions(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 299dbaeb2e2a..ade6ec3873cf 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -54,8 +54,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_fop = &afs_dir_file_operations; break; case AFS_FTYPE_SYMLINK: - inode->i_mode = S_IFLNK | vnode->status.mode; - inode->i_op = &page_symlink_inode_operations; + /* Symlinks with a mode of 0644 are actually mountpoints. */ + if ((vnode->status.mode & 0777) == 0644) { + inode->i_flags |= S_AUTOMOUNT; + + spin_lock(&vnode->lock); + set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); + spin_unlock(&vnode->lock); + + inode->i_mode = S_IFDIR | 0555; + inode->i_op = &afs_mntpt_inode_operations; + inode->i_fop = &afs_mntpt_file_operations; + } else { + inode->i_mode = S_IFLNK | vnode->status.mode; + inode->i_op = &page_symlink_inode_operations; + } inode_nohighmem(inode); break; default: @@ -79,18 +92,6 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_generation = vnode->fid.unique; inode->i_version = vnode->status.data_version; inode->i_mapping->a_ops = &afs_fs_aops; - - /* check to see whether a symbolic link is really a mountpoint */ - if (vnode->status.type == AFS_FTYPE_SYMLINK) { - afs_mntpt_check_symlink(vnode, key); - - if (test_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags)) { - inode->i_mode = S_IFDIR | vnode->status.mode; - inode->i_op = &afs_mntpt_inode_operations; - inode->i_fop = &afs_mntpt_file_operations; - } - } - return 0; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index af1d91ec7f2c..39de154fb42e 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -559,7 +559,6 @@ extern const struct inode_operations afs_autocell_inode_operations; extern const struct file_operations afs_mntpt_file_operations; extern struct vfsmount *afs_d_automount(struct path *); -extern int afs_mntpt_check_symlink(struct afs_vnode *, struct key *); extern void afs_mntpt_kill_timer(void); /* diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index d4fb0afc0097..bd3b65cde282 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -46,59 +46,6 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); static unsigned long afs_mntpt_expiry_timeout = 10 * 60; -/* - * check a symbolic link to see whether it actually encodes a mountpoint - * - sets the AFS_VNODE_MOUNTPOINT flag on the vnode appropriately - */ -int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key) -{ - struct page *page; - size_t size; - char *buf; - int ret; - - _enter("{%x:%u,%u}", - vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); - - /* read the contents of the symlink into the pagecache */ - page = read_cache_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, - afs_page_filler, key); - if (IS_ERR(page)) { - ret = PTR_ERR(page); - goto out; - } - - ret = -EIO; - if (PageError(page)) - goto out_free; - - buf = kmap(page); - - /* examine the symlink's contents */ - size = vnode->status.size; - _debug("symlink to %*.*s", (int) size, (int) size, buf); - - if (size > 2 && - (buf[0] == '%' || buf[0] == '#') && - buf[size - 1] == '.' - ) { - _debug("symlink is a mountpoint"); - spin_lock(&vnode->lock); - set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); - vnode->vfs_inode.i_flags |= S_AUTOMOUNT; - spin_unlock(&vnode->lock); - } - - ret = 0; - - kunmap(page); -out_free: - put_page(page); -out: - _leave(" = %d", ret); - return ret; -} - /* * no valid lookup procedure on this sort of dir */ From 1d7e4ebf291d3103466006926329e39aa355944f Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: [PATCH 306/384] afs: inode: Replace rcu_assign_pointer() with RCU_INIT_POINTER() The use of "rcu_assign_pointer()" is NULLing out the pointer. According to RCU_INIT_POINTER()'s block comment: "1. This use of RCU_INIT_POINTER() is NULLing out the pointer" it is better to use it instead of rcu_assign_pointer() because it has a smaller overhead. The following Coccinelle semantic patch was used: @@ @@ - rcu_assign_pointer + RCU_INIT_POINTER (..., NULL) Signed-off-by: Andreea-Cristina Bernat Signed-off-by: David Howells --- fs/afs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/inode.c b/fs/afs/inode.c index ade6ec3873cf..e083e086b7ca 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -445,7 +445,7 @@ void afs_evict_inode(struct inode *inode) mutex_lock(&vnode->permits_lock); permits = vnode->permits; - rcu_assign_pointer(vnode->permits, NULL); + RCU_INIT_POINTER(vnode->permits, NULL); mutex_unlock(&vnode->permits_lock); if (permits) call_rcu(&permits->rcu, afs_zap_permits); From df8a09d1b8f9e693ec3f6b7e0162fc817f2cf0db Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Thu, 16 Mar 2017 16:27:45 +0000 Subject: [PATCH 307/384] afs: security: Replace rcu_assign_pointer() with RCU_INIT_POINTER() The use of "rcu_assign_pointer()" is NULLing out the pointer. According to RCU_INIT_POINTER()'s block comment: "1. This use of RCU_INIT_POINTER() is NULLing out the pointer" it is better to use it instead of rcu_assign_pointer() because it has a smaller overhead. The following Coccinelle semantic patch was used: @@ @@ - rcu_assign_pointer + RCU_INIT_POINTER (..., NULL) Signed-off-by: Andreea-Cristina Bernat Signed-off-by: David Howells --- fs/afs/security.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/security.c b/fs/afs/security.c index bfa9d3428383..ecb86a670180 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -114,7 +114,7 @@ void afs_clear_permits(struct afs_vnode *vnode) mutex_lock(&vnode->permits_lock); permits = vnode->permits; - rcu_assign_pointer(vnode->permits, NULL); + RCU_INIT_POINTER(vnode->permits, NULL); mutex_unlock(&vnode->permits_lock); if (permits) From 8a79790bf0b7da216627ffb85f52cfb4adbf1e4e Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: [PATCH 308/384] afs: Migrate vlocation fields to 64-bit get_seconds() returns real wall-clock seconds. On 32-bit systems this value will overflow in year 2038 and beyond. This patch changes afs's vlocation record to use ktime_get_real_seconds() instead, for the fields time_of_death and update_at. Signed-off-by: Tina Ruchandani Signed-off-by: David Howells --- fs/afs/callback.c | 7 ++++--- fs/afs/internal.h | 7 ++++--- fs/afs/server.c | 6 +++--- fs/afs/vlocation.c | 16 +++++++++------- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index b29447e03ede..25d404d22cae 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -362,7 +362,7 @@ static void afs_callback_updater(struct work_struct *work) { struct afs_server *server; struct afs_vnode *vnode, *xvnode; - time_t now; + time64_t now; long timeout; int ret; @@ -370,7 +370,7 @@ static void afs_callback_updater(struct work_struct *work) _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find the first vnode to update */ spin_lock(&server->cb_lock); @@ -424,7 +424,8 @@ static void afs_callback_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vnode->update_at = get_seconds() + afs_vnode_update_timeout; + vnode->update_at = ktime_get_real_seconds() + + afs_vnode_update_timeout; spin_lock(&server->cb_lock); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 39de154fb42e..97a16ce200be 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -249,7 +250,7 @@ struct afs_cache_vhash { */ struct afs_vlocation { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct list_head link; /* link in cell volume location list */ struct list_head grave; /* link in master graveyard list */ struct list_head update; /* link in master update list */ @@ -260,7 +261,7 @@ struct afs_vlocation { struct afs_cache_vlocation vldb; /* volume information DB record */ struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ wait_queue_head_t waitq; /* status change waitqueue */ - time_t update_at; /* time at which record should be updated */ + time64_t update_at; /* time at which record should be updated */ spinlock_t lock; /* access lock */ afs_vlocation_state_t state; /* volume location state */ unsigned short upd_rej_cnt; /* ENOMEDIUM count during update */ @@ -273,7 +274,7 @@ struct afs_vlocation { */ struct afs_server { atomic_t usage; - time_t time_of_death; /* time at which put reduced usage to 0 */ + time64_t time_of_death; /* time at which put reduced usage to 0 */ struct in_addr addr; /* server address */ struct afs_cell *cell; /* cell in which server resides */ struct list_head link; /* link in cell's server list */ diff --git a/fs/afs/server.c b/fs/afs/server.c index d4066ab7dd55..c001b1f2455f 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -242,7 +242,7 @@ void afs_put_server(struct afs_server *server) spin_lock(&afs_server_graveyard_lock); if (atomic_read(&server->usage) == 0) { list_move_tail(&server->grave, &afs_server_graveyard); - server->time_of_death = get_seconds(); + server->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_server_reaper, afs_server_timeout * HZ); } @@ -277,9 +277,9 @@ static void afs_reap_server(struct work_struct *work) LIST_HEAD(corpses); struct afs_server *server; unsigned long delay, expiry; - time_t now; + time64_t now; - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_server_graveyard_lock); while (!list_empty(&afs_server_graveyard)) { diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index d7d8dd8c0b31..37b7c3b342a6 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -340,7 +340,8 @@ static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) struct afs_vlocation *xvl; /* wait at least 10 minutes before updating... */ - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); @@ -506,7 +507,7 @@ void afs_put_vlocation(struct afs_vlocation *vl) if (atomic_read(&vl->usage) == 0) { _debug("buried"); list_move_tail(&vl->grave, &afs_vlocation_graveyard); - vl->time_of_death = get_seconds(); + vl->time_of_death = ktime_get_real_seconds(); queue_delayed_work(afs_wq, &afs_vlocation_reap, afs_vlocation_timeout * HZ); @@ -543,11 +544,11 @@ static void afs_vlocation_reaper(struct work_struct *work) LIST_HEAD(corpses); struct afs_vlocation *vl; unsigned long delay, expiry; - time_t now; + time64_t now; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); spin_lock(&afs_vlocation_graveyard_lock); while (!list_empty(&afs_vlocation_graveyard)) { @@ -622,13 +623,13 @@ static void afs_vlocation_updater(struct work_struct *work) { struct afs_cache_vlocation vldb; struct afs_vlocation *vl, *xvl; - time_t now; + time64_t now; long timeout; int ret; _enter(""); - now = get_seconds(); + now = ktime_get_real_seconds(); /* find a record to update */ spin_lock(&afs_vlocation_updates_lock); @@ -684,7 +685,8 @@ static void afs_vlocation_updater(struct work_struct *work) /* and then reschedule */ _debug("reschedule"); - vl->update_at = get_seconds() + afs_vlocation_update_timeout; + vl->update_at = ktime_get_real_seconds() + + afs_vlocation_update_timeout; spin_lock(&afs_vlocation_updates_lock); From 56e714312e7dbd6bb83b2f78d3ec19a404c7649f Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: [PATCH 309/384] afs: Prevent callback expiry timer overflow get_seconds() returns real wall-clock seconds. On 32-bit systems this value will overflow in year 2038 and beyond. This patch changes afs_vnode record to use ktime_get_real_seconds() instead, for the fields cb_expires and cb_expires_at. Signed-off-by: Tina Ruchandani Signed-off-by: David Howells --- fs/afs/fsclient.c | 2 +- fs/afs/inode.c | 7 ++++--- fs/afs/internal.h | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 6f917dd1238c..c05452a09398 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -145,7 +145,7 @@ static void xdr_decode_AFSCallBack(const __be32 **_bp, struct afs_vnode *vnode) vnode->cb_version = ntohl(*bp++); vnode->cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + ktime_get_real_seconds(); *_bp = bp; } diff --git a/fs/afs/inode.c b/fs/afs/inode.c index e083e086b7ca..4079c832ff27 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -246,12 +246,13 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, vnode->cb_version = 0; vnode->cb_expiry = 0; vnode->cb_type = 0; - vnode->cb_expires = get_seconds(); + vnode->cb_expires = ktime_get_real_seconds(); } else { vnode->cb_version = cb->version; vnode->cb_expiry = cb->expiry; vnode->cb_type = cb->type; - vnode->cb_expires = vnode->cb_expiry + get_seconds(); + vnode->cb_expires = vnode->cb_expiry + + ktime_get_real_seconds(); } } @@ -324,7 +325,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) !test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags) && !test_bit(AFS_VNODE_MODIFIED, &vnode->flags) && !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) { - if (vnode->cb_expires < get_seconds() + 10) { + if (vnode->cb_expires < ktime_get_real_seconds() + 10) { _debug("callback expired"); set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); } else { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 97a16ce200be..832555003d03 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -377,8 +377,8 @@ struct afs_vnode { struct rb_node server_rb; /* link in server->fs_vnodes */ struct rb_node cb_promise; /* link in server->cb_promises */ struct work_struct cb_broken_work; /* work to be done on callback break */ - time_t cb_expires; /* time at which callback expires */ - time_t cb_expires_at; /* time used to order cb_promise */ + time64_t cb_expires; /* time at which callback expires */ + time64_t cb_expires_at; /* time used to order cb_promise */ unsigned cb_version; /* callback version */ unsigned cb_expiry; /* callback expiry time */ afs_callback_type_t cb_type; /* type of callback */ From 29f069853287dcb46eaf45a50dbf1232c1444ac6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: [PATCH 310/384] afs: Fix AFS read bug Fix a bug in AFS read whereby the request page afs_read::index isn't incremented after calling ->page_done() if ->remain reaches 0, indicating that the data read is complete. Without this a NULL pointer exception happens when ->page_done() is called twice for the last page because the page clearing loop will call it also and afs_readpages_page_done() clears the current entry in the page list. BUG: unable to handle kernel NULL pointer dereference at (null) IP: afs_readpages_page_done+0x21/0xa4 [kafs] PGD 0 Oops: 0002 [#1] SMP Modules linked in: kafs(E) CPU: 2 PID: 3002 Comm: md5sum Tainted: G E 4.10.0-fscache #485 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 task: ffff8804017d86c0 task.stack: ffff8803fc1d8000 RIP: 0010:afs_readpages_page_done+0x21/0xa4 [kafs] RSP: 0018:ffff8803fc1db978 EFLAGS: 00010282 RAX: ffff880405d39af8 RBX: 0000000000000000 RCX: ffff880407d83ed4 RDX: 0000000000000000 RSI: ffff880405d39a00 RDI: ffff880405c6f400 RBP: ffff8803fc1db988 R08: 0000000000000000 R09: 0000000000000001 R10: ffff8803fc1db820 R11: ffff88040cf56000 R12: ffff8804088f1780 R13: ffff8804017d86c0 R14: ffff8804088f1780 R15: 0000000000003840 FS: 00007f8154469700(0000) GS:ffff88041fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000004016ec000 CR4: 00000000001406e0 Call Trace: afs_deliver_fs_fetch_data+0x5b9/0x60e [kafs] ? afs_make_call+0x316/0x4e8 [kafs] ? afs_make_call+0x359/0x4e8 [kafs] afs_deliver_to_call+0x173/0x2e8 [kafs] ? afs_make_call+0x316/0x4e8 [kafs] afs_make_call+0x37a/0x4e8 [kafs] ? wake_up_q+0x4f/0x4f ? __init_waitqueue_head+0x36/0x49 afs_fs_fetch_data+0x21c/0x227 [kafs] ? afs_fs_fetch_data+0x21c/0x227 [kafs] afs_vnode_fetch_data+0xf3/0x1d2 [kafs] afs_readpages+0x314/0x3fd [kafs] __do_page_cache_readahead+0x208/0x2c5 ondemand_readahead+0x3a2/0x3b7 ? ondemand_readahead+0x3a2/0x3b7 page_cache_async_readahead+0x5e/0x67 generic_file_read_iter+0x23b/0x70c ? __inode_security_revalidate+0x2f/0x62 __vfs_read+0xc4/0xe8 vfs_read+0xd1/0x15a SyS_read+0x4c/0x89 do_syscall_64+0x80/0x191 entry_SYSCALL64_slow_path+0x25/0x25 Reported-by: Marc Dionne Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/fsclient.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index c05452a09398..4314f9e63a2c 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -390,9 +390,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) if (call->offset == PAGE_SIZE) { if (req->page_done) req->page_done(call, req); + req->index++; if (req->remain > 0) { call->offset = 0; - req->index++; if (req->index >= req->nr_pages) { call->unmarshall = 4; goto begin_discard; From 6a0e3999e5cb3daa0468073fcdee0767422a4056 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: [PATCH 311/384] afs: Make struct afs_read::remain 64-bit Make struct afs_read::remain 64-bit so that it can handle huge transfers if we ever request them or the server decides to give us a bit extra data (the other fields there are already 64-bit). Signed-off-by: David Howells Tested-by: Marc Dionne --- fs/afs/fsclient.c | 8 ++++---- fs/afs/internal.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 4314f9e63a2c..0778c5b6b59b 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -321,7 +321,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) void *buffer; int ret; - _enter("{%u,%zu/%u;%u/%llu}", + _enter("{%u,%zu/%u;%llu/%llu}", call->unmarshall, call->offset, call->count, req->remain, req->actual_len); @@ -379,7 +379,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) /* extract the returned data */ case 3: - _debug("extract data %u/%llu %zu/%u", + _debug("extract data %llu/%llu %zu/%u", req->remain, req->actual_len, call->offset, call->count); buffer = kmap(req->pages[req->index]); @@ -405,9 +405,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) /* Discard any excess data the server gave us */ begin_discard: case 4: - size = min_t(size_t, sizeof(afs_discard_buffer), req->remain); + size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain); call->count = size; - _debug("extract discard %u/%llu %zu/%u", + _debug("extract discard %llu/%llu %zu/%u", req->remain, req->actual_len, call->offset, call->count); call->offset = 0; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 832555003d03..a6901360fb81 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -133,8 +133,8 @@ struct afs_read { loff_t pos; /* Where to start reading */ loff_t len; /* How much we're asking for */ loff_t actual_len; /* How much we're actually getting */ + loff_t remain; /* Amount remaining */ atomic_t usage; - unsigned int remain; /* Amount remaining */ unsigned int index; /* Which page we're reading into */ unsigned int nr_pages; void (*page_done)(struct afs_call *, struct afs_read *); From 2f5705a5c805e7f761f2228820656bb9363a3d8c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:46 +0000 Subject: [PATCH 312/384] afs: Use a bvec rather than a kvec in afs_send_pages() Use a bvec rather than a kvec in afs_send_pages() as we don't then have to call kmap() in advance. This allows us to pass the array of contiguous pages that we extracted through to rxrpc in one go rather than passing a single page at a time. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 95 +++++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 419ef05dcb5e..bf45307ff201 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -259,67 +259,74 @@ void afs_flat_call_destructor(struct afs_call *call) call->buffer = NULL; } +#define AFS_BVEC_MAX 8 + +/* + * Load the given bvec with the next few pages. + */ +static void afs_load_bvec(struct afs_call *call, struct msghdr *msg, + struct bio_vec *bv, pgoff_t first, pgoff_t last, + unsigned offset) +{ + struct page *pages[AFS_BVEC_MAX]; + unsigned int nr, n, i, to, bytes = 0; + + nr = min_t(pgoff_t, last - first + 1, AFS_BVEC_MAX); + n = find_get_pages_contig(call->mapping, first, nr, pages); + ASSERTCMP(n, ==, nr); + + msg->msg_flags |= MSG_MORE; + for (i = 0; i < nr; i++) { + to = PAGE_SIZE; + if (first + i >= last) { + to = call->last_to; + msg->msg_flags &= ~MSG_MORE; + } + bv[i].bv_page = pages[i]; + bv[i].bv_len = to - offset; + bv[i].bv_offset = offset; + bytes += to - offset; + offset = 0; + } + + iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes); +} + /* * attach the data from a bunch of pages on an inode to a call */ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) { - struct page *pages[8]; - unsigned count, n, loop, offset, to; + struct bio_vec bv[AFS_BVEC_MAX]; + unsigned int bytes, nr, loop, offset; pgoff_t first = call->first, last = call->last; int ret; - _enter(""); - offset = call->first_offset; call->first_offset = 0; do { - _debug("attach %lx-%lx", first, last); + afs_load_bvec(call, msg, bv, first, last, offset); + offset = 0; + bytes = msg->msg_iter.count; + nr = msg->msg_iter.nr_segs; - count = last - first + 1; - if (count > ARRAY_SIZE(pages)) - count = ARRAY_SIZE(pages); - n = find_get_pages_contig(call->mapping, first, count, pages); - ASSERTCMP(n, ==, count); - - loop = 0; - do { - struct bio_vec bvec = {.bv_page = pages[loop], - .bv_offset = offset}; - msg->msg_flags = 0; - to = PAGE_SIZE; - if (first + loop >= last) - to = call->last_to; - else - msg->msg_flags = MSG_MORE; - bvec.bv_len = to - offset; - offset = 0; - - _debug("- range %u-%u%s", - offset, to, msg->msg_flags ? " [more]" : ""); - iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, - &bvec, 1, to - offset); - - /* have to change the state *before* sending the last - * packet as RxRPC might give us the reply before it - * returns from sending the request */ - if (first + loop >= last) - call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, - msg, to - offset); - if (ret < 0) - break; - } while (++loop < count); - first += count; - - for (loop = 0; loop < count; loop++) - put_page(pages[loop]); + /* Have to change the state *before* sending the last + * packet as RxRPC might give us the reply before it + * returns from sending the request. + */ + if (first + nr >= last) + call->state = AFS_CALL_AWAIT_REPLY; + ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, + msg, bytes); + for (loop = 0; loop < nr; loop++) + put_page(bv[loop].bv_page); if (ret < 0) break; + + first += nr; } while (first <= last); - _leave(" = %d", ret); return ret; } From 146a1192783697810b63a1e41c4d59fc93387340 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: [PATCH 313/384] afs: Fix the maths in afs_fs_store_data() afs_fs_store_data() works out of the size of the write it's going to make, but it uses 32-bit unsigned subtraction in one place that gets automatically cast to loff_t. However, if to < offset, then the number goes negative, but as the result isn't signed, this doesn't get sign-extended to 64-bits when placed in a loff_t. Fix by casting the operands to loff_t. Signed-off-by: David Howells --- fs/afs/fsclient.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 0778c5b6b59b..d9234b767287 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1236,7 +1236,7 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, _enter(",%x,{%x:%u},,", key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode); - size = to - offset; + size = (loff_t)to - (loff_t)offset; if (first != last) size += (loff_t)(last - first) << PAGE_SHIFT; pos = (loff_t)first << PAGE_SHIFT; From 1157f153f37a8586765034470e4f00a4a6c4ce6f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: [PATCH 314/384] afs: Invalid op ID should abort with RXGEN_OPCODE When we are given an invalid operation ID, we should abort that with RXGEN_OPCODE rather than RX_INVALID_OPERATION. Also map RXGEN_OPCODE to -ENOTSUPP. Signed-off-by: David Howells --- fs/afs/misc.c | 2 ++ fs/afs/rxrpc.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/afs/misc.c b/fs/afs/misc.c index 91ea1aa0d8b3..100b207efc9e 100644 --- a/fs/afs/misc.c +++ b/fs/afs/misc.c @@ -84,6 +84,8 @@ int afs_abort_to_error(u32 abort_code) case RXKADDATALEN: return -EKEYREJECTED; case RXKADILLEGALLEVEL: return -EKEYREJECTED; + case RXGEN_OPCODE: return -ENOTSUPP; + default: return -EREMOTEIO; } } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bf45307ff201..bf7761fe6ef5 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -465,7 +465,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code, -ret, "KNC"); goto do_abort; case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; + abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KIV"); goto do_abort; From 70af0e3bd65142f9e674961c975451638a7ce1d5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: [PATCH 315/384] afs: Better abort and net error handling If we receive a network error, a remote abort or a protocol error whilst we're still transmitting data, make sure we return an appropriate error to the caller rather than ESHUTDOWN or ECONNABORTED. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index bf7761fe6ef5..22d26b369070 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -340,6 +340,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct rxrpc_call *rxcall; struct msghdr msg; struct kvec iov[1]; + size_t offset; + u32 abort_code; int ret; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -388,9 +390,11 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, msg.msg_controllen = 0; msg.msg_flags = (call->send_pages ? MSG_MORE : 0); - /* have to change the state *before* sending the last packet as RxRPC - * might give us the reply before it returns from sending the - * request */ + /* We have to change the state *before* sending the last packet as + * rxrpc might give us the reply before it returns from sending the + * request. Further, if the send fails, we may already have been given + * a notification and may have collected it. + */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(afs_socket, rxcall, @@ -412,7 +416,17 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return afs_wait_for_call_to_complete(call); error_do_abort: - rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); + call->state = AFS_CALL_COMPLETE; + if (ret != -ECONNABORTED) { + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, + -ret, "KSD"); + } else { + abort_code = 0; + offset = 0; + rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset, + false, &abort_code); + ret = call->type->abort_to_error(abort_code); + } error_kill_call: afs_put_call(call); _leave(" = %d", ret); @@ -459,16 +473,18 @@ static void afs_deliver_to_call(struct afs_call *call) case -EINPROGRESS: case -EAGAIN: goto out; + case -ECONNABORTED: + goto call_complete; case -ENOTCONN: abort_code = RX_CALL_DEAD; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KNC"); - goto do_abort; + goto save_error; case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, -ret, "KIV"); - goto do_abort; + goto save_error; case -ENODATA: case -EBADMSG: case -EMSGSIZE: @@ -478,7 +494,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code = RXGEN_SS_UNMARSHAL; rxrpc_kernel_abort_call(afs_socket, call->rxcall, abort_code, EBADMSG, "KUM"); - goto do_abort; + goto save_error; } } @@ -489,8 +505,9 @@ static void afs_deliver_to_call(struct afs_call *call) _leave(""); return; -do_abort: +save_error: call->error = ret; +call_complete: call->state = AFS_CALL_COMPLETE; goto done; } @@ -538,6 +555,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) _debug("call incomplete"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, RX_CALL_DEAD, -ret, abort_why); + } else if (call->error < 0) { + ret = call->error; } _debug("call complete"); From ab94f5d0dd6fd82e7eeca5e7c8096eaea0a0261f Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Thu, 16 Mar 2017 16:27:47 +0000 Subject: [PATCH 316/384] afs: Populate and use client modification time The inode timestamps should be set from the client time in the status received from the server, rather than the server time which is meant for internal server use. Set AFS_SET_MTIME and populate the mtime for operations that take an input status, such as file/dir creation and StoreData. If an input time is not provided the server will set the vnode times based on the current server time. In a situation where the server has some skew with the client, this could lead to the client seeing a timestamp in the future for a file that it just created or wrote. Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/fsclient.c | 18 +++++++++--------- fs/afs/inode.c | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index d9234b767287..19f76ae36982 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -111,7 +111,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_mode = mode; } - vnode->vfs_inode.i_ctime.tv_sec = status->mtime_server; + vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_version = data_version; @@ -734,8 +734,8 @@ int afs_fs_create(struct afs_server *server, memset(bp, 0, padsz); bp = (void *) bp + padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(mode & S_IALLUGO); /* unix mode */ @@ -1003,8 +1003,8 @@ int afs_fs_symlink(struct afs_server *server, memset(bp, 0, c_padsz); bp = (void *) bp + c_padsz; } - *bp++ = htonl(AFS_SET_MODE); - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MODE | AFS_SET_MTIME); + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = htonl(S_IRWXUGO); /* unix mode */ @@ -1203,8 +1203,8 @@ static int afs_fs_store_data64(struct afs_server *server, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ @@ -1280,8 +1280,8 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, *bp++ = htonl(vnode->fid.vnode); *bp++ = htonl(vnode->fid.unique); - *bp++ = 0; /* mask */ - *bp++ = 0; /* mtime */ + *bp++ = htonl(AFS_SET_MTIME); /* mask */ + *bp++ = htonl(vnode->vfs_inode.i_mtime.tv_sec); /* mtime */ *bp++ = 0; /* owner */ *bp++ = 0; /* group */ *bp++ = 0; /* unix mode */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 4079c832ff27..aae55dd15108 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -85,7 +85,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_uid = vnode->status.owner; inode->i_gid = vnode->status.group; inode->i_size = vnode->status.size; - inode->i_ctime.tv_sec = vnode->status.mtime_server; + inode->i_ctime.tv_sec = vnode->status.mtime_client; inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; From 68ae849d7e674b83610bc7fdf74b21621a09b9ac Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: [PATCH 317/384] afs: Don't set PG_error on local EINTR or ENOMEM when filling a page Don't set PG_error on a page if we get local EINTR or ENOMEM when filling a page for writing. Signed-off-by: David Howells --- fs/afs/file.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/afs/file.c b/fs/afs/file.c index b5829443ff69..0d5b8508869b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -212,7 +212,13 @@ int afs_page_filler(void *data, struct page *page) fscache_uncache_page(vnode->cache, page); #endif BUG_ON(PageFsCache(page)); - goto error; + + if (ret == -EINTR || + ret == -ENOMEM || + ret == -ERESTARTSYS || + ret == -EAGAIN) + goto error; + goto io_error; } SetPageUptodate(page); @@ -231,10 +237,12 @@ int afs_page_filler(void *data, struct page *page) _leave(" = 0"); return 0; +io_error: + SetPageError(page); + goto error; enomem: ret = -ENOMEM; error: - SetPageError(page); unlock_page(page); _leave(" = %d", ret); return ret; From 6d06b0d25209c80e99c1e89700f1e09694a3766b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: [PATCH 318/384] afs: Fix page leak in afs_write_begin() afs_write_begin() leaks a ref and a lock on a page if afs_fill_page() fails. Fix the leak by unlocking and releasing the page in the error path. Signed-off-by: David Howells --- fs/afs/write.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index f1450ea09406..6e13e96c3db0 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -154,12 +154,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping, kfree(candidate); return -ENOMEM; } - *pagep = page; - /* page won't leak in error case: it eventually gets cleaned off LRU */ if (!PageUptodate(page) && len != PAGE_SIZE) { ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); if (ret < 0) { + unlock_page(page); + put_page(page); kfree(candidate); _leave(" = %d [prep]", ret); return ret; @@ -167,6 +167,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping, SetPageUptodate(page); } + /* page won't leak in error case: it eventually gets cleaned off LRU */ + *pagep = page; + try_again: spin_lock(&vnode->writeback_lock); From 7286a35e893176169b09715096a4aca557e2ccd2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: [PATCH 319/384] afs: Fix afs_kill_pages() Fix afs_kill_pages() in two ways: (1) If a writeback has been partially flushed, then if we try and kill the pages it contains, some of them may no longer be undergoing writeback and end_page_writeback() will assert. Fix this by checking to see whether the page in question is actually undergoing writeback before ending that writeback. (2) The loop that scans for pages to kill doesn't increase the first page index, and so the loop may not terminate, but it will try to process the same pages over and over again. Fix this by increasing the first page index to one after the last page we processed. Signed-off-by: David Howells --- fs/afs/write.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 6e13e96c3db0..134de0667898 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -321,10 +321,14 @@ static void afs_kill_pages(struct afs_vnode *vnode, bool error, ASSERTCMP(pv.nr, ==, count); for (loop = 0; loop < count; loop++) { - ClearPageUptodate(pv.pages[loop]); + struct page *page = pv.pages[loop]; + ClearPageUptodate(page); if (error) - SetPageError(pv.pages[loop]); - end_page_writeback(pv.pages[loop]); + SetPageError(page); + if (PageWriteback(page)) + end_page_writeback(page); + if (page->index >= first) + first = page->index + 1; } __pagevec_release(&pv); From 445783d0ec173a52bef2e9b129de7d716a19b9fa Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:48 +0000 Subject: [PATCH 320/384] afs: Fix an off-by-one error in afs_send_pages() afs_send_pages() should only put the call into the AFS_CALL_AWAIT_REPLY state if it has sent all the pages - but the check it makes is incorrect and sometimes it will finish the loop early. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 22d26b369070..b12da6aa5412 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -315,7 +315,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg) * packet as RxRPC might give us the reply before it * returns from sending the request. */ - if (first + nr >= last) + if (first + nr - 1 >= last) call->state = AFS_CALL_AWAIT_REPLY; ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, msg, bytes); From 954cd6dc02a65065aecb7150962c0870c5b0e322 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:49 +0000 Subject: [PATCH 321/384] afs: Fix abort on signal while waiting for call completion Fix the way in which a call that's in progress and being waited for is aborted in the case that EINTR is detected. We should be sending RX_USER_ABORT rather than RX_CALL_DEAD as the abort code. Note that since the only two ways out of the loop are if the call completes or if a signal happens, the kill-the-call clause after the loop has finished can only happen in the case of EINTR. This means that we only have one abort case to deal with, not two, and the "KWC" case can never happen and so can be deleted. Note further that simply aborting the call isn't necessarily the best thing here since at this point: the request has been entirely sent and it's likely the server will do the operation anyway - whether we abort it or not. In future, we should punt the handling of the remainder of the call off to a background thread. Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/rxrpc.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index b12da6aa5412..8f76b13d5549 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -517,7 +517,6 @@ static void afs_deliver_to_call(struct afs_call *call) */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -536,13 +535,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) continue; } - abort_why = "KWC"; - ret = call->error; - if (call->state == AFS_CALL_COMPLETE) - break; - abort_why = "KWI"; - ret = -EINTR; - if (signal_pending(current)) + if (call->state == AFS_CALL_COMPLETE || + signal_pending(current)) break; schedule(); } @@ -550,15 +544,14 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); - /* kill the call */ + /* Kill off the call if it's still live. */ if (call->state < AFS_CALL_COMPLETE) { - _debug("call incomplete"); + _debug("call interrupted"); rxrpc_kernel_abort_call(afs_socket, call->rxcall, - RX_CALL_DEAD, -ret, abort_why); - } else if (call->error < 0) { - ret = call->error; + RX_USER_ABORT, -EINTR, "KWI"); } + ret = call->error; _debug("call complete"); afs_put_call(call); _leave(" = %d", ret); From 65a151094edeb04e8f5f6f1502028e2383e81bb8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:49 +0000 Subject: [PATCH 322/384] afs: ->writepage() shouldn't call clear_page_dirty_for_io() The ->writepage() op shouldn't call clear_page_dirty_for_io() as that has already been called by the caller. Fix afs_writepage() by moving the call out of afs_write_back_from_locked_page() to afs_writepages_region() where it is needed. Signed-off-by: David Howells --- fs/afs/write.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 134de0667898..e5f150bccfb5 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -231,7 +231,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping, if (wb->state == AFS_WBACK_PENDING) wb->state = AFS_WBACK_CONFLICTING; spin_unlock(&vnode->writeback_lock); - if (PageDirty(page)) { + if (clear_page_dirty_for_io(page)) { ret = afs_write_back_from_locked_page(wb, page); if (ret < 0) { afs_put_writeback(candidate); @@ -353,8 +353,6 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, _enter(",%lx", primary_page->index); count = 1; - if (!clear_page_dirty_for_io(primary_page)) - BUG(); if (test_set_page_writeback(primary_page)) BUG(); @@ -542,6 +540,8 @@ static int afs_writepages_region(struct address_space *mapping, wb->state = AFS_WBACK_WRITING; spin_unlock(&wb->vnode->writeback_lock); + if (!clear_page_dirty_for_io(page)) + BUG(); ret = afs_write_back_from_locked_page(wb, page); unlock_page(page); put_page(page); From c5051c7bc777dffa5661569dec5997f432b9a34a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 16 Mar 2017 16:27:49 +0000 Subject: [PATCH 323/384] afs: Don't wait for page writeback with the page lock held Drop the page lock before waiting for page writeback. Signed-off-by: David Howells --- fs/afs/write.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index e5f150bccfb5..2d2fccd5044b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -518,17 +518,16 @@ static int afs_writepages_region(struct address_space *mapping, */ lock_page(page); - if (page->mapping != mapping) { + if (page->mapping != mapping || !PageDirty(page)) { unlock_page(page); put_page(page); continue; } - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || !PageDirty(page)) { + if (PageWriteback(page)) { unlock_page(page); + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); put_page(page); continue; } From 271df90e4e530c17f237b27034d6341cb2c2f536 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Thu, 16 Mar 2017 16:40:19 -0700 Subject: [PATCH 324/384] z3fold: fix spinlock unlocking in page reclaim Commmit 5a27aa822029 ("z3fold: add kref refcounting") introduced a bug in z3fold_reclaim_page() with function exit that may leave pool->lock spinlock held. Here comes the trivial fix. Fixes: 5a27aa822029 ("z3fold: add kref refcounting") Link: http://lkml.kernel.org/r/20170311222239.7b83d8e7ef1914e05497649f@gmail.com Reported-by: Alexey Khoroshilov Signed-off-by: Vitaly Wool Cc: Dan Streetman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/z3fold.c b/mm/z3fold.c index 8970a2fd3b1a..f9492bccfd79 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -667,6 +667,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) z3fold_page_unlock(zhdr); spin_lock(&pool->lock); if (kref_put(&zhdr->refcount, release_z3fold_page)) { + spin_unlock(&pool->lock); atomic64_dec(&pool->pages_nr); return 0; } From 5be9b730b09c45c358bbfe7f51d254e306cccc07 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 16 Mar 2017 16:40:21 -0700 Subject: [PATCH 325/384] kasan: add a prototype of task_struct to avoid warning Add a prototype of task_struct to fix below warning on arm64. In file included from arch/arm64/kernel/probes/kprobes.c:19:0: include/linux/kasan.h:81:132: error: 'struct task_struct' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] static inline void kasan_unpoison_task_stack(struct task_struct *task) {} As same as other types (kmem_cache, page, and vm_struct) this adds a prototype of task_struct data structure on top of kasan.h. [arnd] A related warning was fixed before, but now appears in a different line in the same file in v4.11-rc2. The patch from Masami Hiramatsu still seems appropriate, so let's take his version. Fixes: 71af2ed5eeea ("kasan, sched/headers: Remove from ") Link: https://patchwork.kernel.org/patch/9569839/ Link: http://lkml.kernel.org/r/20170313141517.3397802-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Signed-off-by: Masami Hiramatsu Acked-by: Alexander Potapenko Acked-by: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 1c823bef4c15..5734480c9590 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -6,6 +6,7 @@ struct kmem_cache; struct page; struct vm_struct; +struct task_struct; #ifdef CONFIG_KASAN From d0f33ac9ae7b2a727fb678235ae37baf1d0608d5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 16 Mar 2017 16:40:24 -0700 Subject: [PATCH 326/384] mm, x86: fix native_pud_clear build error We still get a build error in random configurations, after this has been modified a few times: In file included from include/linux/mm.h:68:0, from include/linux/suspend.h:8, from arch/x86/kernel/asm-offsets.c:12: arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear' #define pud_clear(pud) native_pud_clear(pud) My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear definition. Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()") Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Ackedy-by: Dave Jiang Cc: Matthew Wilcox Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Garnier Cc: Kees Cook Cc: Dave Hansen Cc: Hugh Dickins Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable-3level.h | 3 --- arch/x86/include/asm/pgtable.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 72277b1028a5..50d35e3185f5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ - defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -#endif static inline void pud_clear(pud_t *pudp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cfb36b8c024..585ee0d42d18 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif -#ifndef __PAGETABLE_PMD_FOLDED +#ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif From 171012f561274784160f666f8398af8b42216e1f Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 16 Mar 2017 16:40:27 -0700 Subject: [PATCH 327/384] mm: don't warn when vmalloc() fails due to a fatal signal When vmalloc() fails it prints a very lengthy message with all the details about memory consumption assuming that it happened due to OOM. However, vmalloc() can also fail due to fatal signal pending. In such case the message is quite confusing because it suggests that it is OOM but the numbers suggest otherwise. The messages can also pollute console considerably. Don't warn when vmalloc() fails due to fatal signal pending. Link: http://lkml.kernel.org/r/20170313114425.72724-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Reviewed-by: Matthew Wilcox Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0dd80222b20b..0b057628a7ba 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1683,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, if (fatal_signal_pending(current)) { area->nr_pages = i; - goto fail; + goto fail_no_warn; } if (node == NUMA_NO_NODE) @@ -1709,6 +1709,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, warn_alloc(gfp_mask, NULL, "vmalloc: allocation failure, allocated %ld of %ld bytes", (area->nr_pages*PAGE_SIZE), area->size); +fail_no_warn: vfree(area->addr); return NULL; } From 55adc1d05dca9e949cdf46c747cb1e91c0e9143d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Mar 2017 16:40:30 -0700 Subject: [PATCH 328/384] mm: add private lock to serialize memory hotplug operations Commit bfc8c90139eb ("mem-hotplug: implement get/put_online_mems") introduced new functions get/put_online_mems() and mem_hotplug_begin/end() in order to allow similar semantics for memory hotplug like for cpu hotplug. The corresponding functions for cpu hotplug are get/put_online_cpus() and cpu_hotplug_begin/done() for cpu hotplug. The commit however missed to introduce functions that would serialize memory hotplug operations like they are done for cpu hotplug with cpu_maps_update_begin/done(). This basically leaves mem_hotplug.active_writer unprotected and allows concurrent writers to modify it, which may lead to problems as outlined by commit f931ab479dd2 ("mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done}"). That commit was extended again with commit b5d24fda9c3d ("mm, devm_memremap_pages: hold device_hotplug lock over mem_hotplug_{begin, done}") which serializes memory hotplug operations for some call sites by using the device_hotplug lock. In addition with commit 3fc21924100b ("mm: validate device_hotplug is held for memory hotplug") a sanity check was added to mem_hotplug_begin() to verify that the device_hotplug lock is held. This in turn triggers the following warning on s390: WARNING: CPU: 6 PID: 1 at drivers/base/core.c:643 assert_held_device_hotplug+0x4a/0x58 Call Trace: assert_held_device_hotplug+0x40/0x58) mem_hotplug_begin+0x34/0xc8 add_memory_resource+0x7e/0x1f8 add_memory+0xda/0x130 add_memory_merged+0x15c/0x178 sclp_detect_standby_memory+0x2ae/0x2f8 do_one_initcall+0xa2/0x150 kernel_init_freeable+0x228/0x2d8 kernel_init+0x2a/0x140 kernel_thread_starter+0x6/0xc One possible fix would be to add more lock_device_hotplug() and unlock_device_hotplug() calls around each call site of mem_hotplug_begin/end(). But that would give the device_hotplug lock additional semantics it better should not have (serialize memory hotplug operations). Instead add a new memory_add_remove_lock which has the similar semantics like cpu_add_remove_lock for cpu hotplug. To keep things hopefully a bit easier the lock will be locked and unlocked within the mem_hotplug_begin/end() functions. Link: http://lkml.kernel.org/r/20170314125226.16779-2-heiko.carstens@de.ibm.com Signed-off-by: Heiko Carstens Reported-by: Sebastian Ott Acked-by: Dan Williams Acked-by: Rafael J. Wysocki Cc: Michal Hocko Cc: Vladimir Davydov Cc: Ben Hutchings Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/memremap.c | 4 ---- mm/memory_hotplug.c | 6 +++++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 06123234f118..07e85e5229da 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -247,11 +247,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data) align_start = res->start & ~(SECTION_SIZE - 1); align_size = ALIGN(resource_size(res), SECTION_SIZE); - lock_device_hotplug(); mem_hotplug_begin(); arch_remove_memory(align_start, align_size); mem_hotplug_done(); - unlock_device_hotplug(); untrack_pfn(NULL, PHYS_PFN(align_start), align_size); pgmap_radix_release(res); @@ -364,11 +362,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, if (error) goto err_pfn_remap; - lock_device_hotplug(); mem_hotplug_begin(); error = arch_add_memory(nid, align_start, align_size, true); mem_hotplug_done(); - unlock_device_hotplug(); if (error) goto err_add_memory; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 295479b792ec..6fa7208bcd56 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -125,9 +125,12 @@ void put_online_mems(void) } +/* Serializes write accesses to mem_hotplug.active_writer. */ +static DEFINE_MUTEX(memory_add_remove_lock); + void mem_hotplug_begin(void) { - assert_held_device_hotplug(); + mutex_lock(&memory_add_remove_lock); mem_hotplug.active_writer = current; @@ -147,6 +150,7 @@ void mem_hotplug_done(void) mem_hotplug.active_writer = NULL; mutex_unlock(&mem_hotplug.lock); memhp_lock_release(); + mutex_unlock(&memory_add_remove_lock); } /* add this memory to iomem resource */ From 15c9e10d9ad4d41d076148bbff1de7f659f68852 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 16 Mar 2017 16:40:33 -0700 Subject: [PATCH 329/384] drivers core: remove assert_held_device_hotplug() The last caller of assert_held_device_hotplug() is gone, so remove it again. Link: http://lkml.kernel.org/r/20170314125226.16779-3-heiko.carstens@de.ibm.com Signed-off-by: Heiko Carstens Acked-by: Dan Williams Cc: Michal Hocko Cc: "Rafael J. Wysocki" Cc: Vladimir Davydov Cc: Ben Hutchings Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Sebastian Ott Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/core.c | 5 ----- include/linux/device.h | 1 - 2 files changed, 6 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 684bda4d14a1..6bb60fb6a30b 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -639,11 +639,6 @@ int lock_device_hotplug_sysfs(void) return restart_syscall(); } -void assert_held_device_hotplug(void) -{ - lockdep_assert_held(&device_hotplug_lock); -} - #ifdef CONFIG_BLOCK static inline int device_is_not_partition(struct device *dev) { diff --git a/include/linux/device.h b/include/linux/device.h index 30c4570e928d..9ef518af5515 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1140,7 +1140,6 @@ static inline bool device_supports_offline(struct device *dev) extern void lock_device_hotplug(void); extern void unlock_device_hotplug(void); extern int lock_device_hotplug_sysfs(void); -void assert_held_device_hotplug(void); extern int device_offline(struct device *dev); extern int device_online(struct device *dev); extern void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); From 8971e1c79d3f6c9a5e6f7a65c50c41f434a4dae6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 17 Mar 2017 16:02:35 +1100 Subject: [PATCH 330/384] powerpc/pseries: Don't give a warning when HPT resizing isn't available As of commit 438cc81a41e8 ("powerpc/pseries: Automatically resize HPT for memory hot add/remove"), when running on the pseries platform, we always attempt to use the PAPR extension to resize the hashed page table (HPT) when we add or remove memory. This is fine, but when the extension is not available we'll give a harmless, but scary warning. Instead check if the firmware supports HPT resizing before populating the mmu_hash_ops.resize_hpt pointer. Signed-off-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/lpar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 251060cf1713..8b1fe895daa3 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -751,7 +751,9 @@ void __init hpte_init_pseries(void) mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; - mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; + + if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) + mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; } void radix_init_pseries(void) From 5dc855d44c2ad960a86f593c60461f1ae1566b6d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Mar 2017 12:59:39 -0700 Subject: [PATCH 331/384] x86/perf: Fix CR4.PCE propagation to use active_mm instead of mm If one thread mmaps a perf event while another thread in the same mm is in some context where active_mm != mm (which can happen in the scheduler, for example), refresh_pce() would write the wrong value to CR4.PCE. This broke some PAPI tests. Reported-and-tested-by: Vince Weaver Signed-off-by: Andy Lutomirski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 7911d3f7af14 ("perf/x86: Only allow rdpmc if a perf_event is mapped") Link: http://lkml.kernel.org/r/0c5b38a76ea50e405f9abe07a13dfaef87c173a1.1489694270.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1635c0c8df23..e07b36c5588a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2100,8 +2100,8 @@ static int x86_pmu_event_init(struct perf_event *event) static void refresh_pce(void *ignored) { - if (current->mm) - load_mm_cr4(current->mm); + if (current->active_mm) + load_mm_cr4(current->active_mm); } static void x86_pmu_event_mapped(struct perf_event *event) From 4b07372a32c0c1505a7634ad7e607d83340ef645 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Mar 2017 12:59:40 -0700 Subject: [PATCH 332/384] x86/perf: Clarify why x86_pmu_event_mapped() isn't racy Naively, it looks racy, but ->mmap_sem saves it. Add a comment and a lockdep assertion. Signed-off-by: Andy Lutomirski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/03a1e629063899168dfc4707f3bb6e581e21f5c6.1489694270.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e07b36c5588a..183a972f9210 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2109,6 +2109,18 @@ static void x86_pmu_event_mapped(struct perf_event *event) if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; + /* + * This function relies on not being called concurrently in two + * tasks in the same mm. Otherwise one task could observe + * perf_rdpmc_allowed > 1 and return all the way back to + * userspace with CR4.PCE clear while another task is still + * doing on_each_cpu_mask() to propagate CR4.PCE. + * + * For now, this can't happen because all callers hold mmap_sem + * for write. If this changes, we'll need a different solution. + */ + lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); } From 2cd29f2387be70de9feb4c9f8dbc7c0bd55748ce Mon Sep 17 00:00:00 2001 From: Robert Middleton Date: Wed, 15 Mar 2017 16:56:47 -0400 Subject: [PATCH 333/384] gpio:mcp23s08 Fixed missing interrupts When an interrupt occurs on an MCP23S08 chip, the INTF register will only contain one bit as causing the interrupt. If more than two pins change at the same time on the chip, this causes one of the pins to not be reported. This patch fixes the logic for checking if a pin has changed, so that multiple pins will always cause more than one change. Cc: stable@vger.kernel.org Signed-off-by: Robert Middleton Tested-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mcp23s08.c | 65 +++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c index bdb692345428..2a57d024481d 100644 --- a/drivers/gpio/gpio-mcp23s08.c +++ b/drivers/gpio/gpio-mcp23s08.c @@ -270,8 +270,10 @@ mcp23s08_direction_output(struct gpio_chip *chip, unsigned offset, int value) static irqreturn_t mcp23s08_irq(int irq, void *data) { struct mcp23s08 *mcp = data; - int intcap, intf, i; + int intcap, intf, i, gpio, gpio_orig, intcap_mask; unsigned int child_irq; + bool intf_set, intcap_changed, gpio_bit_changed, + defval_changed, gpio_set; mutex_lock(&mcp->lock); if (mcp_read(mcp, MCP_INTF, &intf) < 0) { @@ -287,14 +289,67 @@ static irqreturn_t mcp23s08_irq(int irq, void *data) } mcp->cache[MCP_INTCAP] = intcap; + + /* This clears the interrupt(configurable on S18) */ + if (mcp_read(mcp, MCP_GPIO, &gpio) < 0) { + mutex_unlock(&mcp->lock); + return IRQ_HANDLED; + } + gpio_orig = mcp->cache[MCP_GPIO]; + mcp->cache[MCP_GPIO] = gpio; mutex_unlock(&mcp->lock); + if (mcp->cache[MCP_INTF] == 0) { + /* There is no interrupt pending */ + return IRQ_HANDLED; + } + + dev_dbg(mcp->chip.parent, + "intcap 0x%04X intf 0x%04X gpio_orig 0x%04X gpio 0x%04X\n", + intcap, intf, gpio_orig, gpio); for (i = 0; i < mcp->chip.ngpio; i++) { - if ((BIT(i) & mcp->cache[MCP_INTF]) && - ((BIT(i) & intcap & mcp->irq_rise) || - (mcp->irq_fall & ~intcap & BIT(i)) || - (BIT(i) & mcp->cache[MCP_INTCON]))) { + /* We must check all of the inputs on the chip, + * otherwise we may not notice a change on >=2 pins. + * + * On at least the mcp23s17, INTCAP is only updated + * one byte at a time(INTCAPA and INTCAPB are + * not written to at the same time - only on a per-bank + * basis). + * + * INTF only contains the single bit that caused the + * interrupt per-bank. On the mcp23s17, there is + * INTFA and INTFB. If two pins are changed on the A + * side at the same time, INTF will only have one bit + * set. If one pin on the A side and one pin on the B + * side are changed at the same time, INTF will have + * two bits set. Thus, INTF can't be the only check + * to see if the input has changed. + */ + + intf_set = BIT(i) & mcp->cache[MCP_INTF]; + if (i < 8 && intf_set) + intcap_mask = 0x00FF; + else if (i >= 8 && intf_set) + intcap_mask = 0xFF00; + else + intcap_mask = 0x00; + + intcap_changed = (intcap_mask & + (BIT(i) & mcp->cache[MCP_INTCAP])) != + (intcap_mask & (BIT(i) & gpio_orig)); + gpio_set = BIT(i) & mcp->cache[MCP_GPIO]; + gpio_bit_changed = (BIT(i) & gpio_orig) != + (BIT(i) & mcp->cache[MCP_GPIO]); + defval_changed = (BIT(i) & mcp->cache[MCP_INTCON]) && + ((BIT(i) & mcp->cache[MCP_GPIO]) != + (BIT(i) & mcp->cache[MCP_DEFVAL])); + + if (((gpio_bit_changed || intcap_changed) && + (BIT(i) & mcp->irq_rise) && gpio_set) || + ((gpio_bit_changed || intcap_changed) && + (BIT(i) & mcp->irq_fall) && !gpio_set) || + defval_changed) { child_irq = irq_find_mapping(mcp->chip.irqdomain, i); handle_nested_irq(child_irq); } From e7ede72a6d40cb3a30c087142d79381ca8a31dab Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 15 Mar 2017 22:53:37 +0100 Subject: [PATCH 334/384] perf symbols: Fix symbols__fixup_end heuristic for corner cases The current symbols__fixup_end() heuristic for the last entry in the rb tree is suboptimal as it leads to not being able to recognize the symbol in the call graph in a couple of corner cases, for example: i) If the symbol has a start address (f.e. exposed via kallsyms) that is at a page boundary, then the roundup(curr->start, 4096) for the last entry will result in curr->start == curr->end with a symbol length of zero. ii) If the symbol has a start address that is shortly before a page boundary, then also here, curr->end - curr->start will just be very few bytes, where it's unrealistic that we could perform a match against. Instead, change the heuristic to roundup(curr->start, 4096) + 4096, so that we can catch such corner cases and have a better chance to find that specific symbol. It's still just best effort as the real end of the symbol is unknown to us (and could even be at a larger offset than the current range), but better than the current situation. Alexei reported that he recently run into case i) with a JITed eBPF program (these are all page aligned) as the last symbol which wasn't properly shown in the call graph (while other eBPF program symbols in the rb tree were displayed correctly). Since this is a generic issue, lets try to improve the heuristic a bit. Reported-and-Tested-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Fixes: 2e538c4a1847 ("perf tools: Improve kernel/modules symbol lookup") Link: http://lkml.kernel.org/r/bb5c80d27743be6f12afc68405f1956a330e1bc9.1489614365.git.daniel@iogearbox.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 70e389bc4af7..9b4d8ba22fed 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols) /* Last entry */ if (curr->end == curr->start) - curr->end = roundup(curr->start, 4096); + curr->end = roundup(curr->start, 4096) + 4096; } void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) From 38a33101dd5fb8f07244e7e6dd04747a6cd2e3fb Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Thu, 9 Mar 2017 11:36:36 +0800 Subject: [PATCH 335/384] NFS: fix the fault nrequests decreasing for nfs_inode COPY The nfs_commit_file for NFSv4.2's COPY operation goes through the commit path for normal WRITE, but without increase nrequests, so, the nrequests decreased in nfs_commit_release_pages is fault. After that, the nrequests will be wrong. [ 5670.299881] ------------[ cut here ]------------ [ 5670.300295] WARNING: CPU: 0 PID: 27656 at fs/nfs/inode.c:127 nfs_clear_inode+0x66/0x90 [nfs] [ 5670.300558] Modules linked in: nfsv4(E) nfs(E) fscache(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock snd_seq_midi snd_seq_midi_event ppdev f2fs coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_ens1371 intel_rapl_perf gameport snd_ac97_codec vmw_balloon ac97_bus snd_seq snd_pcm joydev snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c vmwgfx drm_kms_helper ttm drm e1000 crc32c_intel mptspi scsi_transport_spi serio_raw mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: fscache] [ 5670.302925] CPU: 0 PID: 27656 Comm: umount.nfs4 Tainted: G W E 4.11.0-rc1+ #519 [ 5670.303292] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 5670.304094] Call Trace: [ 5670.304510] dump_stack+0x63/0x86 [ 5670.304917] __warn+0xcb/0xf0 [ 5670.305276] warn_slowpath_null+0x1d/0x20 [ 5670.305661] nfs_clear_inode+0x66/0x90 [nfs] [ 5670.306093] nfs4_evict_inode+0x61/0x70 [nfsv4] [ 5670.306480] evict+0xbb/0x1c0 [ 5670.306888] dispose_list+0x4d/0x70 [ 5670.307233] evict_inodes+0x178/0x1a0 [ 5670.307579] generic_shutdown_super+0x44/0xf0 [ 5670.307985] nfs_kill_super+0x21/0x40 [nfs] [ 5670.308325] deactivate_locked_super+0x43/0x70 [ 5670.308698] deactivate_super+0x5a/0x60 [ 5670.309036] cleanup_mnt+0x3f/0x90 [ 5670.309407] __cleanup_mnt+0x12/0x20 [ 5670.309837] task_work_run+0x80/0xa0 [ 5670.310162] exit_to_usermode_loop+0x89/0x90 [ 5670.310497] syscall_return_slowpath+0xaa/0xb0 [ 5670.310875] entry_SYSCALL_64_fastpath+0xa7/0xa9 [ 5670.311197] RIP: 0033:0x7f1bb3617fe7 [ 5670.311545] RSP: 002b:00007ffecbabb828 EFLAGS: 00000206 ORIG_RAX: 00000000000000a6 [ 5670.311906] RAX: 0000000000000000 RBX: 0000000001dca1f0 RCX: 00007f1bb3617fe7 [ 5670.312239] RDX: 000000000000000c RSI: 0000000000000001 RDI: 0000000001dc83c0 [ 5670.312653] RBP: 0000000001dc83c0 R08: 0000000000000001 R09: 0000000000000000 [ 5670.312998] R10: 0000000000000755 R11: 0000000000000206 R12: 00007ffecbabc66a [ 5670.313335] R13: 0000000001dc83a0 R14: 0000000000000000 R15: 0000000000000000 [ 5670.313758] ---[ end trace bf4bfe7764e4eb40 ]--- Cc: linux-kernel@vger.kernel.org Fixes: 67911c8f18 ("NFS: Add nfs_commit_file()") Signed-off-by: Kinglong Mee Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e75b056f46f4..abb2c8a3be42 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1784,7 +1784,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) (long long)req_offset(req)); if (status < 0) { nfs_context_set_write_error(req->wb_context, status); - nfs_inode_remove_request(req); + if (req->wb_page) + nfs_inode_remove_request(req); dprintk_cont(", error = %d\n", status); goto next; } @@ -1793,7 +1794,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) * returned by the server against all stored verfs. */ if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { /* We have a match */ - nfs_inode_remove_request(req); + if (req->wb_page) + nfs_inode_remove_request(req); dprintk_cont(" OK\n"); goto next; } From eed50879d64ab1b9f76445dbab822e43a098b309 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 11 Mar 2017 15:52:47 -0500 Subject: [PATCH 336/384] xprtrdma: Squelch kbuild sparse complaint New complaint from kbuild for 4.9.y: net/sunrpc/xprtrdma/verbs.c:489:19: sparse: incompatible types in comparison expression (different type sizes) verbs.c: 489 max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); I can't reproduce this running sparse here. Likewise, "make W=1 net/sunrpc/xprtrdma/verbs.o" never indicated any issue. A little poking suggests that because the range of its values is small, gcc can make the actual width of RPCRDMA_MAX_SEND_SGES smaller than the width of an unsigned integer. Fixes: 16f906d66cd7 ("xprtrdma: Reduce required number of send SGEs") Signed-off-by: Chuck Lever Cc: stable@kernel.org Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 81cd31acf690..3b332b395045 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -503,7 +503,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct ib_cq *sendcq, *recvcq; int rc; - max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES); + max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge, + RPCRDMA_MAX_SEND_SGES); if (max_sge < RPCRDMA_MIN_SEND_SGES) { pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); return -ENOMEM; From 05fae7bbc237bc7de0ee9c3dcf85b2572a80e3b5 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 10 Mar 2017 10:48:13 +0800 Subject: [PATCH 337/384] nfs: make nfs4_cb_sv_ops static Fixes the following sparse warning: fs/nfs/callback.c:235:21: warning: symbol 'nfs4_cb_sv_ops' was not declared. Should it be static? Signed-off-by: Jason Yan Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 484bebc20bca..5c8a096d763e 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -231,12 +231,12 @@ static struct svc_serv_ops nfs41_cb_sv_ops = { .svo_module = THIS_MODULE, }; -struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = &nfs41_cb_sv_ops, }; #else -struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = NULL, }; From 63513232f8cd219dcaa5eafae028740ed3067d83 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 13 Mar 2017 10:36:19 -0400 Subject: [PATCH 338/384] NFS prevent double free in async nfs4_exchange_id Since rpc_task is async, the release function should be called which will free the impl_id, scope, and owner. Trond pointed at 2 more problems: -- use of client pointer after free in the nfs4_exchangeid_release() function -- cl_count mismatch if rpc_run_task() isn't run Fixes: 8d89bd70bc9 ("NFS setup async exchange_id") Signed-off-by: Olga Kornievskaia Cc: stable@vger.kernel.org # 4.9 Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c1f5369cd339..c780d98035cc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7425,11 +7425,11 @@ static void nfs4_exchange_id_release(void *data) struct nfs41_exchange_id_data *cdata = (struct nfs41_exchange_id_data *)data; - nfs_put_client(cdata->args.client); if (cdata->xprt) { xprt_put(cdata->xprt); rpc_clnt_xprt_switch_put(cdata->args.client->cl_rpcclient); } + nfs_put_client(cdata->args.client); kfree(cdata->res.impl_id); kfree(cdata->res.server_scope); kfree(cdata->res.server_owner); @@ -7536,10 +7536,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, task_setup_data.callback_data = calldata; task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - status = PTR_ERR(task); - goto out_impl_id; - } + if (IS_ERR(task)) + return PTR_ERR(task); if (!xprt) { status = rpc_wait_for_completion_task(task); @@ -7567,6 +7565,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, kfree(calldata->res.server_owner); out_calldata: kfree(calldata); + nfs_put_client(clp); goto out; } From 033853325fe3bdc70819a8b97915bd3bca41d3af Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 8 Mar 2017 14:39:15 -0500 Subject: [PATCH 339/384] NFSv4.1 respect server's max size in CREATE_SESSION Currently client doesn't respect max sizes server returns in CREATE_SESSION. nfs4_session_set_rwsize() gets called and server->rsize, server->wsize are 0 so they never get set to the sizes returned by the server. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 5ae9d64ea08b..8346ccbf2d52 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1023,9 +1023,9 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (server->rsize > server_resp_sz) + if (!server->rsize || server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (server->wsize > server_rqst_sz) + if (!server->wsize || server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } From a33e4b036d4612f62220f37a9fa29d273b6fd0ca Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 9 Mar 2017 12:56:48 -0500 Subject: [PATCH 340/384] pNFS: return status from nfs4_pnfs_ds_connect The nfs4_pnfs_ds_connect path can call rpc_create which can fail or it can wait on another context to reach the same failure. This checks that the rpc_create succeeded and returns the error to the caller. When an error is returned, both the files and flexfiles layouts will return NULL from _prepare_ds(). The flexfiles layout will also return the layout with the error NFS4ERR_NXIO. Signed-off-by: Weston Andros Adamson Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 25 ++++++++++++++++++++++- fs/nfs/filelayout/filelayoutdev.c | 7 ++++++- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 3 ++- fs/nfs/internal.h | 2 ++ fs/nfs/pnfs.h | 2 +- fs/nfs/pnfs_nfs.c | 15 ++++++++++++-- 6 files changed, 48 insertions(+), 6 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 91a8d610ba0f..390ada8741bc 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -325,10 +325,33 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat return NULL; } -static bool nfs_client_init_is_complete(const struct nfs_client *clp) +/* + * Return true if @clp is done initializing, false if still working on it. + * + * Use nfs_client_init_status to check if it was successful. + */ +bool nfs_client_init_is_complete(const struct nfs_client *clp) { return clp->cl_cons_state <= NFS_CS_READY; } +EXPORT_SYMBOL_GPL(nfs_client_init_is_complete); + +/* + * Return 0 if @clp was successfully initialized, -errno otherwise. + * + * This must be called *after* nfs_client_init_is_complete() returns true, + * otherwise it will pop WARN_ON_ONCE and return -EINVAL + */ +int nfs_client_init_status(const struct nfs_client *clp) +{ + /* called without checking nfs_client_init_is_complete */ + if (clp->cl_cons_state > NFS_CS_READY) { + WARN_ON_ONCE(1); + return -EINVAL; + } + return clp->cl_cons_state; +} +EXPORT_SYMBOL_GPL(nfs_client_init_status); int nfs_wait_client_init_complete(const struct nfs_client *clp) { diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index f956ca20a8a3..188120626179 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -266,6 +266,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs4_pnfs_ds *ret = ds; struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); + int status; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", @@ -277,9 +278,13 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) if (ds->ds_clp) goto out_test_devid; - nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, + status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, dataserver_retrans, 4, s->nfs_client->cl_minorversion); + if (status) { + ret = NULL; + goto out; + } out_test_devid: if (ret->ds_clp == NULL || diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e5a6f248697b..544e7725e679 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -384,6 +384,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, struct inode *ino = lseg->pls_layout->plh_inode; struct nfs_server *s = NFS_SERVER(ino); unsigned int max_payload; + int status; if (!ff_layout_mirror_valid(lseg, mirror, true)) { pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", @@ -404,7 +405,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. */ - nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, + status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, dataserver_retrans, mirror->mirror_ds->ds_versions[0].version, mirror->mirror_ds->ds_versions[0].minor_version); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 09ca5095c04e..7b38fedb7e03 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -186,6 +186,8 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); +extern bool nfs_client_init_is_complete(const struct nfs_client *clp); +extern int nfs_client_init_status(const struct nfs_client *clp); extern int nfs_wait_client_init_complete(const struct nfs_client *clp); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 63f77b49a586..590e1e35781f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -367,7 +367,7 @@ void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_v3_ds_connect_unload(void); -void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, +int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, unsigned int retrans, u32 version, u32 minor_version); struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 9414b492439f..a7691b927af6 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -745,9 +745,9 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, /* * Create an rpc connection to the nfs4_pnfs_ds data server. * Currently only supports IPv4 and IPv6 addresses. - * If connection fails, make devid unavailable. + * If connection fails, make devid unavailable and return a -errno. */ -void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, +int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, unsigned int retrans, u32 version, u32 minor_version) { @@ -772,6 +772,17 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, } else { nfs4_wait_ds_connect(ds); } + + /* + * At this point the ds->ds_clp should be ready, but it might have + * hit an error. + */ + if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { + WARN_ON_ONCE(1); + return -EINVAL; + } + + return nfs_client_init_status(ds->ds_clp); } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); From da066f3f039eba3e72e97b2ccad0dd8b45ba84bd Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 9 Mar 2017 12:56:49 -0500 Subject: [PATCH 341/384] pNFS/flexfiles: never nfs4_mark_deviceid_unavailable The flexfiles layout should never mark a device unavailable. Move nfs4_mark_deviceid_unavailable out of nfs4_pnfs_ds_connect and call directly from files layout where it's still needed. The flexfiles driver still handles marked devices in error paths, but will now print a rate limited warning. Signed-off-by: Weston Andros Adamson Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayoutdev.c | 1 + fs/nfs/flexfilelayout/flexfilelayout.h | 14 ++++++++++++- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- fs/nfs/pnfs_nfs.c | 24 +++++++++++++++-------- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 188120626179..d913e818858f 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -282,6 +282,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) dataserver_retrans, 4, s->nfs_client->cl_minorversion); if (status) { + nfs4_mark_deviceid_unavailable(devid); ret = NULL; goto out; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index f4f39b0ab09b..98b34c9b0564 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -175,7 +175,19 @@ ff_layout_no_read_on_rw(struct pnfs_layout_segment *lseg) static inline bool ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) { - return nfs4_test_deviceid_unavailable(node); + /* + * Flexfiles should never mark a DS unavailable, but if it does + * print a (ratelimited) warning as this can affect performance. + */ + if (nfs4_test_deviceid_unavailable(node)) { + u32 *p = (u32 *)node->deviceid.data; + + pr_warn_ratelimited("NFS: flexfiles layout referencing an " + "unavailable device [%x%x%x%x]\n", + p[0], p[1], p[2], p[3]); + return true; + } + return false; } static inline int diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 544e7725e679..85fde93dff77 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -421,11 +421,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror->mirror_ds->ds_versions[0].wsize = max_payload; goto out; } +out_fail: ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), mirror, lseg->pls_range.offset, lseg->pls_range.length, NFS4ERR_NXIO, OP_ILLEGAL, GFP_NOIO); -out_fail: if (fail_return || !ff_layout_has_available_ds(lseg)) pnfs_error_mark_layout_for_return(ino, lseg); ds = NULL; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index a7691b927af6..7250b95549ec 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -751,9 +751,11 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, unsigned int retrans, u32 version, u32 minor_version) { - if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { - int err = 0; + int err; +again: + err = 0; + if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { if (version == 3) { err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans); @@ -766,23 +768,29 @@ int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, err = -EPROTONOSUPPORT; } - if (err) - nfs4_mark_deviceid_unavailable(devid); nfs4_clear_ds_conn_bit(ds); } else { nfs4_wait_ds_connect(ds); + + /* what was waited on didn't connect AND didn't mark unavail */ + if (!ds->ds_clp && !nfs4_test_deviceid_unavailable(devid)) + goto again; } /* * At this point the ds->ds_clp should be ready, but it might have * hit an error. */ - if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { - WARN_ON_ONCE(1); - return -EINVAL; + if (!err) { + if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) { + WARN_ON_ONCE(ds->ds_clp || + !nfs4_test_deviceid_unavailable(devid)); + return -EINVAL; + } + err = nfs_client_init_status(ds->ds_clp); } - return nfs_client_init_status(ds->ds_clp); + return err; } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); From 74e3f6e63da6c8e8246fba1689e040bc926b4a1a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 14 Mar 2017 15:24:51 +0530 Subject: [PATCH 342/384] parisc: perf: Fix potential NULL pointer dereference Fix potential NULL pointer dereference and clean up coding style errors (code indent, trailing whitespaces). Signed-off-by: Arvind Yadav Signed-off-by: Helge Deller --- arch/parisc/kernel/perf.c | 94 ++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 45 deletions(-) diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index e282a5131d77..6017a5af2e6e 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -39,7 +39,7 @@ * the PDC INTRIGUE calls. This is done to eliminate bugs introduced * in various PDC revisions. The code is much more maintainable * and reliable this way vs having to debug on every version of PDC - * on every box. + * on every box. */ #include @@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr); static int perf_release(struct inode *inode, struct file *file); static int perf_open(struct inode *inode, struct file *file); static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos); -static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos); +static ssize_t perf_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos); static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static void perf_start_counters(void); static int perf_stop_counters(uint32_t *raddr); @@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void); /* * configure: * - * Configure the cpu with a given data image. First turn off the counters, + * Configure the cpu with a given data image. First turn off the counters, * then download the image, then turn the counters back on. */ static int perf_config(uint32_t *image_ptr) @@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr) error = perf_stop_counters(raddr); if (error != 0) { printk("perf_config: perf_stop_counters = %ld\n", error); - return -EINVAL; + return -EINVAL; } printk("Preparing to write image\n"); @@ -242,7 +242,7 @@ printk("Preparing to write image\n"); error = perf_write_image((uint64_t *)image_ptr); if (error != 0) { printk("perf_config: DOWNLOAD = %ld\n", error); - return -EINVAL; + return -EINVAL; } printk("Preparing to start counters\n"); @@ -254,7 +254,7 @@ printk("Preparing to start counters\n"); } /* - * Open the device and initialize all of its memory. The device is only + * Open the device and initialize all of its memory. The device is only * opened once, but can be "queried" by multiple processes that know its * file descriptor. */ @@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t * called on the processor that the download should happen * on. */ -static ssize_t perf_write(struct file *file, const char __user *buf, size_t count, - loff_t *ppos) +static ssize_t perf_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { size_t image_size; uint32_t image_type; uint32_t interface_type; uint32_t test; - if (perf_processor_interface == ONYX_INTF) + if (perf_processor_interface == ONYX_INTF) image_size = PCXU_IMAGE_SIZE; - else if (perf_processor_interface == CUDA_INTF) + else if (perf_processor_interface == CUDA_INTF) image_size = PCXW_IMAGE_SIZE; - else + else return -EFAULT; if (!capable(CAP_SYS_ADMIN)) @@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun /* First check the machine type is correct for the requested image */ - if (((perf_processor_interface == CUDA_INTF) && - (interface_type != CUDA_INTF)) || - ((perf_processor_interface == ONYX_INTF) && - (interface_type != ONYX_INTF))) + if (((perf_processor_interface == CUDA_INTF) && + (interface_type != CUDA_INTF)) || + ((perf_processor_interface == ONYX_INTF) && + (interface_type != ONYX_INTF))) return -EINVAL; /* Next check to make sure the requested image is valid */ - if (((interface_type == CUDA_INTF) && + if (((interface_type == CUDA_INTF) && (test >= MAX_CUDA_IMAGES)) || - ((interface_type == ONYX_INTF) && - (test >= MAX_ONYX_IMAGES))) + ((interface_type == ONYX_INTF) && + (test >= MAX_ONYX_IMAGES))) return -EINVAL; /* Copy the image into the processor */ - if (interface_type == CUDA_INTF) + if (interface_type == CUDA_INTF) return perf_config(cuda_images[test]); else return perf_config(onyx_images[test]); @@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun static void perf_patch_images(void) { #if 0 /* FIXME!! */ -/* +/* * NOTE: this routine is VERY specific to the current TLB image. * If the image is changed, this routine might also need to be changed. */ @@ -367,9 +367,9 @@ static void perf_patch_images(void) extern void $i_dtlb_miss_2_0(); extern void PA2_0_iva(); - /* + /* * We can only use the lower 32-bits, the upper 32-bits should be 0 - * anyway given this is in the kernel + * anyway given this is in the kernel */ uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0); uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0); @@ -377,21 +377,21 @@ static void perf_patch_images(void) if (perf_processor_interface == ONYX_INTF) { /* clear last 2 bytes */ - onyx_images[TLBMISS][15] &= 0xffffff00; + onyx_images[TLBMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBMISS][17] = itlb_addr; /* clear last 2 bytes */ - onyx_images[TLBHANDMISS][15] &= 0xffffff00; + onyx_images[TLBHANDMISS][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00; onyx_images[TLBHANDMISS][17] = itlb_addr; /* clear last 2 bytes */ - onyx_images[BIG_CPI][15] &= 0xffffff00; + onyx_images[BIG_CPI][15] &= 0xffffff00; /* set 2 bytes */ onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24)); onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00; @@ -404,24 +404,24 @@ static void perf_patch_images(void) } else if (perf_processor_interface == CUDA_INTF) { /* Cuda interface */ - cuda_images[TLBMISS][16] = + cuda_images[TLBMISS][16] = (cuda_images[TLBMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[TLBMISS][17] = + cuda_images[TLBMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000; - cuda_images[TLBHANDMISS][16] = + cuda_images[TLBHANDMISS][16] = (cuda_images[TLBHANDMISS][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[TLBHANDMISS][17] = + cuda_images[TLBHANDMISS][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000; - cuda_images[BIG_CPI][16] = + cuda_images[BIG_CPI][16] = (cuda_images[BIG_CPI][16]&0xffff0000) | ((dtlb_addr >> 8)&0x0000ffff); - cuda_images[BIG_CPI][17] = + cuda_images[BIG_CPI][17] = ((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff); cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000; } else { @@ -433,7 +433,7 @@ static void perf_patch_images(void) /* * ioctl routine - * All routines effect the processor that they are executed on. Thus you + * All routines effect the processor that they are executed on. Thus you * must be running on the processor that you wish to change. */ @@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) } /* copy out the Counters */ - if (copy_to_user((void __user *)arg, raddr, + if (copy_to_user((void __user *)arg, raddr, sizeof (raddr)) != 0) { error = -EFAULT; break; @@ -487,7 +487,7 @@ static const struct file_operations perf_fops = { .open = perf_open, .release = perf_release }; - + static struct miscdevice perf_dev = { MISC_DYNAMIC_MINOR, PA_PERF_DEV, @@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr) /* OR sticky2 (bit 1496) to counter2 bit 32 */ tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000; raddr[2] = (uint32_t)tmp64; - + /* Counter3 is bits 1497 to 1528 */ tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff; /* OR sticky3 (bit 1529) to counter3 bit 32 */ @@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr) userbuf[22] = 0; userbuf[23] = 0; - /* + /* * Write back the zeroed bytes + the image given * the read was destructive. */ @@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr) } else { /* - * Read RDR-15 which contains the counters and sticky bits + * Read RDR-15 which contains the counters and sticky bits */ if (!perf_rdr_read_ubuf(15, userbuf)) { return -13; } - /* + /* * Clear out the counters */ perf_rdr_clear(15); @@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr) raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL); raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL); } - + return 0; } @@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer) i = tentry->num_words; while (i--) { buffer[i] = 0; - } + } /* Check for bits an even number of 64 */ if ((xbits = width & 0x03f) != 0) { @@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr) } runway = ioremap_nocache(cpu_device->hpa.start, 4096); + if (!runway) { + pr_err("perf_write_image: ioremap failed!\n"); + return -ENOMEM; + } /* Merge intrigue bits into Runway STATUS 0 */ tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful; - __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), + __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul), runway + RUNWAY_STATUS); - + /* Write RUNWAY DEBUG registers */ for (i = 0; i < 8; i++) { __raw_writeq(*memaddr++, runway + RUNWAY_DEBUG); } - return 0; + return 0; } /* @@ -843,7 +847,7 @@ printk("perf_rdr_write\n"); perf_rdr_shift_out_U(rdr_num, buffer[i]); } else { perf_rdr_shift_out_W(rdr_num, buffer[i]); - } + } } printk("perf_rdr_write done\n"); } From 73580dac7618e4bcd21679f553cf3c97323fec46 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 18 Mar 2017 17:13:27 +0100 Subject: [PATCH 343/384] parisc: Fix system shutdown halt On those parisc machines which don't provide a software power off function, the system currently kills the init process at the end of a shutdown and unexpectedly restarts insteads of halting. Fix it by adding a loop which will not return. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # 4.9+ --- arch/parisc/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 06f7ca7fe70b..b76f503eee4a 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -142,6 +142,8 @@ void machine_power_off(void) printk(KERN_EMERG "System shut down completed.\n" "Please power this system off now."); + + for (;;); } void (*pm_power_off)(void) = machine_power_off; From 9c28ca4ff8bad7486182291a55b4f67a70af718d Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 8 Mar 2017 00:09:59 -0800 Subject: [PATCH 344/384] target: Drop pointless tfo->check_stop_free check All in-tree fabric drivers provide a tfo->check_stop_free(), so there is no need to do the extra check within existing transport_cmd_check_stop_to_fabric() code. Just to be sure, add a check in target_fabric_tf_ops_check() to notify any out-of-tree drivers that might be missing it. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 4 ++++ drivers/target/target_core_transport.c | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 54b36c9835be..38b5025e4c7a 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -421,6 +421,10 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo) pr_err("Missing tfo->aborted_task()\n"); return -EINVAL; } + if (!tfo->check_stop_free) { + pr_err("Missing tfo->check_stop_free()\n"); + return -EINVAL; + } /* * We at least require tfo->fabric_make_wwn(), tfo->fabric_drop_wwn() * tfo->fabric_make_tpg() and tfo->fabric_drop_tpg() in diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 434d9d693989..b1a3cdb29468 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -636,8 +636,7 @@ static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) * Fabric modules are expected to return '1' here if the se_cmd being * passed is released at this point, or zero if not being released. */ - return cmd->se_tfo->check_stop_free ? cmd->se_tfo->check_stop_free(cmd) - : 0; + return cmd->se_tfo->check_stop_free(cmd); } static void transport_lun_remove_cmd(struct se_cmd *cmd) From 3abaa2bfdb1e6bb33d38a2e82cf3bb82ec0197bf Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:14:39 -0600 Subject: [PATCH 345/384] tcmu: allow hw_max_sectors greater than 128 tcmu hard codes the hw_max_sectors to 128 which is a litle small. Userspace uses the max_sectors to report the optimal IO size and some initiators perform better with larger IOs (open-iscsi seems to do better with 256 to 512 depending on the test). (Fix do not display hw max sectors twice - MNC) Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 54 ++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c3adefe95e50..24e8580f07b8 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -960,7 +960,8 @@ static int tcmu_configure_device(struct se_device *dev) if (dev->dev_attrib.hw_block_size == 0) dev->dev_attrib.hw_block_size = 512; /* Other attributes can be configured in userspace */ - dev->dev_attrib.hw_max_sectors = 128; + if (!dev->dev_attrib.hw_max_sectors) + dev->dev_attrib.hw_max_sectors = 128; dev->dev_attrib.hw_queue_depth = 128; ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name, @@ -1031,16 +1032,42 @@ static void tcmu_free_device(struct se_device *dev) } enum { - Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err, + Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, + Opt_err, }; static match_table_t tokens = { {Opt_dev_config, "dev_config=%s"}, {Opt_dev_size, "dev_size=%u"}, {Opt_hw_block_size, "hw_block_size=%u"}, + {Opt_hw_max_sectors, "hw_max_sectors=%u"}, {Opt_err, NULL} }; +static int tcmu_set_dev_attrib(substring_t *arg, u32 *dev_attrib) +{ + unsigned long tmp_ul; + char *arg_p; + int ret; + + arg_p = match_strdup(arg); + if (!arg_p) + return -ENOMEM; + + ret = kstrtoul(arg_p, 0, &tmp_ul); + kfree(arg_p); + if (ret < 0) { + pr_err("kstrtoul() failed for dev attrib\n"); + return ret; + } + if (!tmp_ul) { + pr_err("dev attrib must be nonzero\n"); + return -EINVAL; + } + *dev_attrib = tmp_ul; + return 0; +} + static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, const char *page, ssize_t count) { @@ -1048,7 +1075,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, char *orig, *ptr, *opts, *arg_p; substring_t args[MAX_OPT_ARGS]; int ret = 0, token; - unsigned long tmp_ul; opts = kstrdup(page, GFP_KERNEL); if (!opts) @@ -1082,22 +1108,12 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, pr_err("kstrtoul() failed for dev_size=\n"); break; case Opt_hw_block_size: - arg_p = match_strdup(&args[0]); - if (!arg_p) { - ret = -ENOMEM; - break; - } - ret = kstrtoul(arg_p, 0, &tmp_ul); - kfree(arg_p); - if (ret < 0) { - pr_err("kstrtoul() failed for hw_block_size=\n"); - break; - } - if (!tmp_ul) { - pr_err("hw_block_size must be nonzero\n"); - break; - } - dev->dev_attrib.hw_block_size = tmp_ul; + ret = tcmu_set_dev_attrib(&args[0], + &(dev->dev_attrib.hw_block_size)); + break; + case Opt_hw_max_sectors: + ret = tcmu_set_dev_attrib(&args[0], + &(dev->dev_attrib.hw_max_sectors)); break; default: break; From 2579325ca0acc598fdf41ba12b2871d3467f28df Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:14:40 -0600 Subject: [PATCH 346/384] tcmu: return on first Opt parse failure We only were returing failure if the last opt to be parsed failed. This has a return failure when we first detect a failure. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 24e8580f07b8..4339ab2133b3 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1118,6 +1118,9 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, default: break; } + + if (ret) + break; } kfree(orig); From 530c6891b1220cba780b6c18f4691d85a3435080 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:13:24 -0600 Subject: [PATCH 347/384] target: allow ALUA setup for some passthrough backends This patch allows passthrough backends to use the core/base LIO ALUA setup and state checks, but still handle the execution of commands. This will allow the target_core_user module to execute STPG and RTPG in userspace, and not have to duplicate the ALUA state checks, path information (needed so we can check if command is executable on specific paths) and setup (rtslib sets/updates the configfs ALUA interface like it does for iblock or file). For STPG, the target_core_user userspace daemon, tcmu-runner will still execute the STPG, and to update the core/base LIO state it will use the existing configfs interface. For RTPG, tcmu-runner will loop over configfs and/or cache the state. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 9 +++++---- drivers/target/target_core_pscsi.c | 3 ++- drivers/target/target_core_tpg.c | 3 ++- include/target/target_core_backend.h | 7 ++++++- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index f5e330099bfc..a41bbb8087cf 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -691,7 +691,7 @@ target_alua_state_check(struct se_cmd *cmd) if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) return 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) return 0; /* @@ -1973,7 +1973,7 @@ ssize_t core_alua_store_tg_pt_gp_info( unsigned char buf[TG_PT_GROUP_NAME_BUF]; int move = 0; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; @@ -2230,7 +2230,7 @@ ssize_t core_alua_store_offline_bit( unsigned long tmp; int ret; - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH || + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA || (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) return -ENODEV; @@ -2316,7 +2316,8 @@ ssize_t core_alua_store_secondary_write_metadata( int core_setup_alua(struct se_device *dev) { - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && + if (!(dev->transport->transport_flags & + TRANSPORT_FLAG_PASSTHROUGH_ALUA) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) { struct t10_alua_lu_gp_member *lu_gp_mem; diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 44d92f23a3f0..94cda7991e80 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1080,7 +1080,8 @@ static void pscsi_req_done(struct request *req, int uptodate) static const struct target_backend_ops pscsi_ops = { .name = "pscsi", .owner = THIS_MODULE, - .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, + .transport_flags = TRANSPORT_FLAG_PASSTHROUGH | + TRANSPORT_FLAG_PASSTHROUGH_ALUA, .attach_hba = pscsi_attach_hba, .detach_hba = pscsi_detach_hba, .pmode_enable_hba = pscsi_pmode_enable_hba, diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index c0dbfa016575..6fb191914f45 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -602,7 +602,8 @@ int core_tpg_add_lun( if (ret) goto out_kill_ref; - if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) && + if (!(dev->transport->transport_flags & + TRANSPORT_FLAG_PASSTHROUGH_ALUA) && !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) target_attach_tg_pt_gp(lun, dev->t10_alua.default_tg_pt_gp); diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index b54b98dc2d4a..1b0f447ce850 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -4,7 +4,12 @@ #include #include -#define TRANSPORT_FLAG_PASSTHROUGH 1 +#define TRANSPORT_FLAG_PASSTHROUGH 0x1 +/* + * ALUA commands, state checks and setup operations are handled by the + * backend module. + */ +#define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2 struct request_queue; struct scatterlist; From 0a4145729871ef29afe8b0c57560a1f5bd736416 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:13:25 -0600 Subject: [PATCH 348/384] target: fail ALUA transitions for pscsi We do not setup the LU group for pscsi devices, so if you write a state to alua_access_state that will cause a transition you will get a NULL pointer dereference. This patch will fail attempts to try and transition the path for backend devices that set the TRANSPORT_FLAG_PASSTHROUGH_ALUA flag. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index a41bbb8087cf..5b5a1e250a65 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1149,6 +1149,9 @@ int core_alua_do_port_transition( struct t10_alua_tg_pt_gp *tg_pt_gp; int primary, valid_states, rc = 0; + if (l_dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_ALUA) + return -ENODEV; + valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states; if (core_alua_check_transition(new_state, valid_states, &primary) != 0) return -EINVAL; From 207ee84133c00a8a2a5bdec94df4a5b37d78881c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Mar 2017 23:13:26 -0600 Subject: [PATCH 349/384] target: Use system workqueue for ALUA transitions If tcmu-runner is processing a STPG and needs to change the kernel's ALUA state then we cannot use the same work queue for task management requests and ALUA transitions, because we could deadlock. The problem occurs when a STPG times out before tcmu-runner is able to call into target_tg_pt_gp_alua_access_state_store-> core_alua_do_port_transition -> core_alua_do_transition_tg_pt -> queue_work. In this case, the tmr is on the work queue waiting for the STPG to complete, but the STPG transition is now queued behind the waiting tmr. Note: This bug will also be fixed by this patch: http://www.spinics.net/lists/target-devel/msg14560.html which switches the tmr code to use the system workqueues. For both, I am not sure if we need a dedicated workqueue since it is not a performance path and I do not think we need WQ_MEM_RECLAIM to make forward progress to free up memory like the block layer does. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 5b5a1e250a65..58bf5e6350ac 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1121,13 +1121,11 @@ static int core_alua_do_transition_tg_pt( unsigned long transition_tmo; transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ; - queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq, - &tg_pt_gp->tg_pt_gp_transition_work, - transition_tmo); + schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, + transition_tmo); } else { tg_pt_gp->tg_pt_gp_transition_complete = &wait; - queue_delayed_work(tg_pt_gp->tg_pt_gp_dev->tmr_wq, - &tg_pt_gp->tg_pt_gp_transition_work, 0); + schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; } From d7175373f2745ed4abe5b388d5aabd06304f801e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:48 -0600 Subject: [PATCH 350/384] target: fix ALUA transition timeout handling The implicit transition time tells initiators the min time to wait before timing out a transition. We currently schedule the transition to occur in tg_pt_gp_implicit_trans_secs seconds so there is no room for delays. If core_alua_do_transition_tg_pt_work->core_alua_update_tpg_primary_metadata needs to write out info to a remote file, then the initiator can easily time out the operation. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 23 ++++++++--------------- include/target/target_core_base.h | 2 +- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 58bf5e6350ac..594807cd92cb 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1013,7 +1013,7 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp) static void core_alua_do_transition_tg_pt_work(struct work_struct *work) { struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work, - struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work.work); + struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work); struct se_device *dev = tg_pt_gp->tg_pt_gp_dev; bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status == ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG); @@ -1076,13 +1076,12 @@ static int core_alua_do_transition_tg_pt( /* * Flush any pending transitions */ - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs && - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == + if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == ALUA_ACCESS_STATE_TRANSITION) { /* Just in case */ tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; tg_pt_gp->tg_pt_gp_transition_complete = &wait; - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; return 0; @@ -1117,15 +1116,9 @@ static int core_alua_do_transition_tg_pt( atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) { - unsigned long transition_tmo; - - transition_tmo = tg_pt_gp->tg_pt_gp_implicit_trans_secs * HZ; - schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, - transition_tmo); - } else { + schedule_work(&tg_pt_gp->tg_pt_gp_transition_work); + if (explicit) { tg_pt_gp->tg_pt_gp_transition_complete = &wait; - schedule_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work, 0); wait_for_completion(&wait); tg_pt_gp->tg_pt_gp_transition_complete = NULL; } @@ -1696,8 +1689,8 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev, mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex); spin_lock_init(&tg_pt_gp->tg_pt_gp_lock); atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0); - INIT_DELAYED_WORK(&tg_pt_gp->tg_pt_gp_transition_work, - core_alua_do_transition_tg_pt_work); + INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work, + core_alua_do_transition_tg_pt_work); tg_pt_gp->tg_pt_gp_dev = dev; atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED); @@ -1805,7 +1798,7 @@ void core_alua_free_tg_pt_gp( dev->t10_alua.alua_tg_pt_gps_counter--; spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - flush_delayed_work(&tg_pt_gp->tg_pt_gp_transition_work); + flush_work(&tg_pt_gp->tg_pt_gp_transition_work); /* * Allow a struct t10_alua_tg_pt_gp_member * referenced by diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 37c274e61acc..4b784b6e21c0 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -299,7 +299,7 @@ struct t10_alua_tg_pt_gp { struct list_head tg_pt_gp_lun_list; struct se_lun *tg_pt_gp_alua_lun; struct se_node_acl *tg_pt_gp_alua_nacl; - struct delayed_work tg_pt_gp_transition_work; + struct work_struct tg_pt_gp_transition_work; struct completion *tg_pt_gp_transition_complete; }; From 1ca4d4fa3bfcbe8964f81e5818a9b90436466eb0 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:49 -0600 Subject: [PATCH 351/384] target: allow userspace to set state to transitioning Userspace target_core_user handlers like tcmu-runner may want to set the ALUA state to transitioning while it does implicit transitions. This patch allows that state when set from configfs. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 37 ++++++++++++++++++------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 594807cd92cb..252d4e4b7b33 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -43,7 +43,7 @@ #include "target_core_ua.h" static sense_reason_t core_alua_check_transition(int state, int valid, - int *primary); + int *primary, int explicit); static int core_alua_set_tg_pt_secondary_state( struct se_lun *lun, int explicit, int offline); @@ -335,8 +335,8 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd) * the state is a primary or secondary target port asymmetric * access state. */ - rc = core_alua_check_transition(alua_access_state, - valid_states, &primary); + rc = core_alua_check_transition(alua_access_state, valid_states, + &primary, 1); if (rc) { /* * If the SET TARGET PORT GROUPS attempts to establish @@ -762,7 +762,7 @@ target_alua_state_check(struct se_cmd *cmd) * Check implicit and explicit ALUA state change request. */ static sense_reason_t -core_alua_check_transition(int state, int valid, int *primary) +core_alua_check_transition(int state, int valid, int *primary, int explicit) { /* * OPTIMIZED, NON-OPTIMIZED, STANDBY and UNAVAILABLE are @@ -804,11 +804,14 @@ core_alua_check_transition(int state, int valid, int *primary) *primary = 0; break; case ALUA_ACCESS_STATE_TRANSITION: - /* - * Transitioning is set internally, and - * cannot be selected manually. - */ - goto not_supported; + if (!(valid & ALUA_T_SUP) || explicit) + /* + * Transitioning is set internally and by tcmu daemon, + * and cannot be selected through a STPG. + */ + goto not_supported; + *primary = 0; + break; default: pr_err("Unknown ALUA access state: 0x%02x\n", state); return TCM_INVALID_PARAMETER_LIST; @@ -1070,7 +1073,7 @@ static int core_alua_do_transition_tg_pt( if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state) return 0; - if (new_state == ALUA_ACCESS_STATE_TRANSITION) + if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) return -EAGAIN; /* @@ -1091,10 +1094,6 @@ static int core_alua_do_transition_tg_pt( * Save the old primary ALUA access state, and set the current state * to ALUA_ACCESS_STATE_TRANSITION. */ - tg_pt_gp->tg_pt_gp_alua_previous_state = - atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); - tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; - atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state, ALUA_ACCESS_STATE_TRANSITION); tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ? @@ -1103,6 +1102,13 @@ static int core_alua_do_transition_tg_pt( core_alua_queue_state_change_ua(tg_pt_gp); + if (new_state == ALUA_ACCESS_STATE_TRANSITION) + return 0; + + tg_pt_gp->tg_pt_gp_alua_previous_state = + atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state); + tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; + /* * Check for the optional ALUA primary state transition delay */ @@ -1144,7 +1150,8 @@ int core_alua_do_port_transition( return -ENODEV; valid_states = l_tg_pt_gp->tg_pt_gp_alua_supported_states; - if (core_alua_check_transition(new_state, valid_states, &primary) != 0) + if (core_alua_check_transition(new_state, valid_states, &primary, + explicit) != 0) return -EINVAL; local_lu_gp_mem = l_dev->dev_alua_lu_gp_mem; From 760bf578edf8122f2503a3a6a3f4b0de3b6ce0bb Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 2 Mar 2017 04:59:50 -0600 Subject: [PATCH 352/384] target: fix race during implicit transition work flushes This fixes the following races: 1. core_alua_do_transition_tg_pt could have read tg_pt_gp_alua_access_state and gone into this if chunk: if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == ALUA_ACCESS_STATE_TRANSITION) { and then core_alua_do_transition_tg_pt_work could update the state. core_alua_do_transition_tg_pt would then only set tg_pt_gp_alua_pending_state and the tg_pt_gp_alua_access_state would not get updated with the second calls state. 2. core_alua_do_transition_tg_pt could be setting tg_pt_gp_transition_complete while the tg_pt_gp_transition_work is already completing. core_alua_do_transition_tg_pt then waits on the completion that will never be called. To handle these issues, we just call flush_work which will return when core_alua_do_transition_tg_pt_work has completed so there is no need to do the complete/wait. And, if core_alua_do_transition_tg_pt_work was running, instead of trying to sneak in the state change, we just schedule up another core_alua_do_transition_tg_pt_work call. Note that this does not handle a possible race where there are multiple threads call core_alua_do_transition_tg_pt at the same time. I think we need a mutex in target_tg_pt_gp_alua_access_state_store. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_alua.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 252d4e4b7b33..fd7c16a7ca6e 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1079,16 +1079,8 @@ static int core_alua_do_transition_tg_pt( /* * Flush any pending transitions */ - if (!explicit && atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == - ALUA_ACCESS_STATE_TRANSITION) { - /* Just in case */ - tg_pt_gp->tg_pt_gp_alua_pending_state = new_state; - tg_pt_gp->tg_pt_gp_transition_complete = &wait; + if (!explicit) flush_work(&tg_pt_gp->tg_pt_gp_transition_work); - wait_for_completion(&wait); - tg_pt_gp->tg_pt_gp_transition_complete = NULL; - return 0; - } /* * Save the old primary ALUA access state, and set the current state From 972c7f167974fa41ea8a2eed4b857cc59f59c42c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 9 Mar 2017 02:42:08 -0600 Subject: [PATCH 353/384] tcmu: add helper to check if dev was configured This adds a helper to check if the dev was configured. It will be used in the next patch to prevent updates to some config settings after the device has been setup. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 4339ab2133b3..892b311e7874 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -998,6 +998,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p) kfree(udev); } +static bool tcmu_dev_configured(struct tcmu_dev *udev) +{ + return udev->uio_info.uio_dev ? true : false; +} + static void tcmu_free_device(struct se_device *dev) { struct tcmu_dev *udev = TCMU_DEV(dev); @@ -1019,8 +1024,7 @@ static void tcmu_free_device(struct se_device *dev) spin_unlock_irq(&udev->commands_lock); WARN_ON(!all_expired); - /* Device was configured */ - if (udev->uio_info.uio_dev) { + if (tcmu_dev_configured(udev)) { tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name, udev->uio_info.uio_dev->minor); From af980e46a26ac8805685bb70c8572dbc47abb126 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 9 Mar 2017 02:42:09 -0600 Subject: [PATCH 354/384] tcmu: make cmd timeout configurable A single daemon could implement multiple types of devices using multuple types of real devices that may not support restarting from crashes and/or handling tcmu timeouts. This makes the cmd timeout configurable, so handlers that do not support it can turn if off for now. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 41 ++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 892b311e7874..10cc15f0b1fa 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -112,6 +112,7 @@ struct tcmu_dev { spinlock_t commands_lock; struct timer_list timeout; + unsigned int cmd_time_out; char dev_config[TCMU_CONFIG_LEN]; }; @@ -172,7 +173,9 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) tcmu_cmd->se_cmd = se_cmd; tcmu_cmd->tcmu_dev = udev; - tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT); + if (udev->cmd_time_out) + tcmu_cmd->deadline = jiffies + + msecs_to_jiffies(udev->cmd_time_out); idr_preload(GFP_KERNEL); spin_lock_irq(&udev->commands_lock); @@ -451,7 +454,11 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) pr_debug("sleeping for ring space\n"); spin_unlock_irq(&udev->cmdr_lock); - ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT)); + if (udev->cmd_time_out) + ret = schedule_timeout( + msecs_to_jiffies(udev->cmd_time_out)); + else + ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT)); finish_wait(&udev->wait_cmdr, &__wait); if (!ret) { pr_warn("tcmu: command timed out\n"); @@ -526,8 +533,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) /* TODO: only if FLUSH and FUA? */ uio_event_notify(&udev->uio_info); - mod_timer(&udev->timeout, - round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT))); + if (udev->cmd_time_out) + mod_timer(&udev->timeout, round_jiffies_up(jiffies + + msecs_to_jiffies(udev->cmd_time_out))); return TCM_NO_SENSE; } @@ -742,6 +750,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) } udev->hba = hba; + udev->cmd_time_out = TCMU_TIME_OUT; init_waitqueue_head(&udev->wait_cmdr); spin_lock_init(&udev->cmdr_lock); @@ -1037,7 +1046,7 @@ static void tcmu_free_device(struct se_device *dev) enum { Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, - Opt_err, + Opt_cmd_time_out, Opt_err, }; static match_table_t tokens = { @@ -1045,6 +1054,7 @@ static match_table_t tokens = { {Opt_dev_size, "dev_size=%u"}, {Opt_hw_block_size, "hw_block_size=%u"}, {Opt_hw_max_sectors, "hw_max_sectors=%u"}, + {Opt_cmd_time_out, "cmd_time_out=%u"}, {Opt_err, NULL} }; @@ -1111,6 +1121,23 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, if (ret < 0) pr_err("kstrtoul() failed for dev_size=\n"); break; + case Opt_cmd_time_out: + if (tcmu_dev_configured(udev)) { + pr_err("Can not update cmd_time_out after device has been configured.\n"); + ret = -EINVAL; + break; + } + arg_p = match_strdup(&args[0]); + if (!arg_p) { + ret = -ENOMEM; + break; + } + ret = kstrtouint(arg_p, 0, &udev->cmd_time_out); + kfree(arg_p); + if (ret < 0) + pr_err("kstrtouint() failed for cmd_time_out=\n"); + udev->cmd_time_out *= MSEC_PER_SEC; + break; case Opt_hw_block_size: ret = tcmu_set_dev_attrib(&args[0], &(dev->dev_attrib.hw_block_size)); @@ -1138,7 +1165,9 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) bl = sprintf(b + bl, "Config: %s ", udev->dev_config[0] ? udev->dev_config : "NULL"); - bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size); + bl += sprintf(b + bl, "Size: %zu ", udev->dev_size); + bl += sprintf(b + bl, "Cmd Time Out: %lu\n", + udev->cmd_time_out / MSEC_PER_SEC); return bl; } From 7d7a743543905a8297dce53b36e793e5307da5d7 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 18 Mar 2017 15:04:13 -0700 Subject: [PATCH 355/384] tcmu: Convert cmd_time_out into backend device attribute Instead of putting cmd_time_out under ../target/core/user_0/foo/control, which has historically been used by parameters needed for initial backend device configuration, go ahead and move cmd_time_out into a backend device attribute. In order to do this, tcmu_module_init() has been updated to create a local struct configfs_attribute **tcmu_attrs, that is based upon the existing passthrough_attrib_attrs along with the new cmd_time_out attribute. Once **tcm_attrs has been setup, go ahead and point it at tcmu_ops->tb_dev_attrib_attrs so it's picked up by target-core. Also following MNC's previous change, ->cmd_time_out is stored in milliseconds but exposed via configfs in seconds. Also, note this patch restricts the modification of ->cmd_time_out to before + after the TCMU device has been configured, but not while it has active fabric exports. Cc: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 94 ++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 26 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 10cc15f0b1fa..c6874c38a10b 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1046,7 +1047,7 @@ static void tcmu_free_device(struct se_device *dev) enum { Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, - Opt_cmd_time_out, Opt_err, + Opt_err, }; static match_table_t tokens = { @@ -1054,7 +1055,6 @@ static match_table_t tokens = { {Opt_dev_size, "dev_size=%u"}, {Opt_hw_block_size, "hw_block_size=%u"}, {Opt_hw_max_sectors, "hw_max_sectors=%u"}, - {Opt_cmd_time_out, "cmd_time_out=%u"}, {Opt_err, NULL} }; @@ -1121,23 +1121,6 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, if (ret < 0) pr_err("kstrtoul() failed for dev_size=\n"); break; - case Opt_cmd_time_out: - if (tcmu_dev_configured(udev)) { - pr_err("Can not update cmd_time_out after device has been configured.\n"); - ret = -EINVAL; - break; - } - arg_p = match_strdup(&args[0]); - if (!arg_p) { - ret = -ENOMEM; - break; - } - ret = kstrtouint(arg_p, 0, &udev->cmd_time_out); - kfree(arg_p); - if (ret < 0) - pr_err("kstrtouint() failed for cmd_time_out=\n"); - udev->cmd_time_out *= MSEC_PER_SEC; - break; case Opt_hw_block_size: ret = tcmu_set_dev_attrib(&args[0], &(dev->dev_attrib.hw_block_size)); @@ -1165,9 +1148,7 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) bl = sprintf(b + bl, "Config: %s ", udev->dev_config[0] ? udev->dev_config : "NULL"); - bl += sprintf(b + bl, "Size: %zu ", udev->dev_size); - bl += sprintf(b + bl, "Cmd Time Out: %lu\n", - udev->cmd_time_out / MSEC_PER_SEC); + bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size); return bl; } @@ -1186,7 +1167,48 @@ tcmu_parse_cdb(struct se_cmd *cmd) return passthrough_parse_cdb(cmd, tcmu_queue_cmd); } -static const struct target_backend_ops tcmu_ops = { +static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page) +{ + struct se_dev_attrib *da = container_of(to_config_group(item), + struct se_dev_attrib, da_group); + struct tcmu_dev *udev = container_of(da->da_dev, + struct tcmu_dev, se_dev); + + return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC); +} + +static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page, + size_t count) +{ + struct se_dev_attrib *da = container_of(to_config_group(item), + struct se_dev_attrib, da_group); + struct tcmu_dev *udev = container_of(da->da_dev, + struct tcmu_dev, se_dev); + u32 val; + int ret; + + if (da->da_dev->export_count) { + pr_err("Unable to set tcmu cmd_time_out while exports exist\n"); + return -EINVAL; + } + + ret = kstrtou32(page, 0, &val); + if (ret < 0) + return ret; + + if (!val) { + pr_err("Illegal value for cmd_time_out\n"); + return -EINVAL; + } + + udev->cmd_time_out = val * MSEC_PER_SEC; + return count; +} +CONFIGFS_ATTR(tcmu_, cmd_time_out); + +static struct configfs_attribute **tcmu_attrs; + +static struct target_backend_ops tcmu_ops = { .name = "user", .owner = THIS_MODULE, .transport_flags = TRANSPORT_FLAG_PASSTHROUGH, @@ -1200,12 +1222,12 @@ static const struct target_backend_ops tcmu_ops = { .show_configfs_dev_params = tcmu_show_configfs_dev_params, .get_device_type = sbc_get_device_type, .get_blocks = tcmu_get_blocks, - .tb_dev_attrib_attrs = passthrough_attrib_attrs, + .tb_dev_attrib_attrs = NULL, }; static int __init tcmu_module_init(void) { - int ret; + int ret, i, len = 0; BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); @@ -1227,12 +1249,31 @@ static int __init tcmu_module_init(void) goto out_unreg_device; } + for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) { + len += sizeof(struct configfs_attribute *); + } + len += sizeof(struct configfs_attribute *) * 2; + + tcmu_attrs = kzalloc(len, GFP_KERNEL); + if (!tcmu_attrs) { + ret = -ENOMEM; + goto out_unreg_genl; + } + + for (i = 0; passthrough_attrib_attrs[i] != NULL; i++) { + tcmu_attrs[i] = passthrough_attrib_attrs[i]; + } + tcmu_attrs[i] = &tcmu_attr_cmd_time_out; + tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs; + ret = transport_backend_register(&tcmu_ops); if (ret) - goto out_unreg_genl; + goto out_attrs; return 0; +out_attrs: + kfree(tcmu_attrs); out_unreg_genl: genl_unregister_family(&tcmu_genl_family); out_unreg_device: @@ -1246,6 +1287,7 @@ static int __init tcmu_module_init(void) static void __exit tcmu_module_exit(void) { target_backend_unregister(&tcmu_ops); + kfree(tcmu_attrs); genl_unregister_family(&tcmu_genl_family); root_device_unregister(tcmu_root_device); kmem_cache_destroy(tcmu_cmd_cache); From c4a9b538ab2a109c5f9798bea1f8f4bf93aadfb9 Mon Sep 17 00:00:00 2001 From: Joe Carnuccio Date: Wed, 15 Mar 2017 09:48:43 -0700 Subject: [PATCH 356/384] qla2xxx: Allow vref count to timeout on vport delete. Cc: Signed-off-by: Joe Carnuccio Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_attr.c | 4 +--- drivers/scsi/qla2xxx/qla_def.h | 6 +++++- drivers/scsi/qla2xxx/qla_init.c | 1 + drivers/scsi/qla2xxx/qla_mid.c | 14 ++++++++------ drivers/scsi/qla2xxx/qla_os.c | 1 + 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index f610103994af..435ff7fd6384 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2154,8 +2154,6 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) "Timer for the VP[%d] has stopped\n", vha->vp_idx); } - BUG_ON(atomic_read(&vha->vref_count)); - qla2x00_free_fcports(vha); mutex_lock(&ha->vport_lock); @@ -2166,7 +2164,7 @@ qla24xx_vport_delete(struct fc_vport *fc_vport) dma_free_coherent(&ha->pdev->dev, vha->gnl.size, vha->gnl.l, vha->gnl.ldma); - if (vha->qpair->vp_idx == vha->vp_idx) { + if (vha->qpair && vha->qpair->vp_idx == vha->vp_idx) { if (qla2xxx_delete_qpair(vha, vha->qpair) != QLA_SUCCESS) ql_log(ql_log_warn, vha, 0x7087, "Queue Pair delete failed.\n"); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 625d438e3cce..8662ef4192db 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -4076,6 +4076,7 @@ typedef struct scsi_qla_host { /* Count of active session/fcport */ int fcport_count; wait_queue_head_t fcport_waitQ; + wait_queue_head_t vref_waitq; } scsi_qla_host_t; struct qla27xx_image_status { @@ -4131,14 +4132,17 @@ struct qla2_sgx { mb(); \ if (__vha->flags.delete_progress) { \ atomic_dec(&__vha->vref_count); \ + wake_up(&__vha->vref_waitq); \ __bail = 1; \ } else { \ __bail = 0; \ } \ } while (0) -#define QLA_VHA_MARK_NOT_BUSY(__vha) \ +#define QLA_VHA_MARK_NOT_BUSY(__vha) do { \ atomic_dec(&__vha->vref_count); \ + wake_up(&__vha->vref_waitq); \ +} while (0) \ #define QLA_QPAIR_MARK_BUSY(__qpair, __bail) do { \ atomic_inc(&__qpair->ref_count); \ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 32fb9007f137..9f3740c68cc8 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5148,6 +5148,7 @@ qla2x00_update_fcports(scsi_qla_host_t *base_vha) } } atomic_dec(&vha->vref_count); + wake_up(&vha->vref_waitq); } spin_unlock_irqrestore(&ha->vport_slock, flags); } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index c6d6f0d912ff..09a490c98763 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -74,13 +74,14 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha) * ensures no active vp_list traversal while the vport is removed * from the queue) */ + wait_event_timeout(vha->vref_waitq, atomic_read(&vha->vref_count), + 10*HZ); + spin_lock_irqsave(&ha->vport_slock, flags); - while (atomic_read(&vha->vref_count)) { - spin_unlock_irqrestore(&ha->vport_slock, flags); - - msleep(500); - - spin_lock_irqsave(&ha->vport_slock, flags); + if (atomic_read(&vha->vref_count)) { + ql_dbg(ql_dbg_vport, vha, 0xfffa, + "vha->vref_count=%u timeout\n", vha->vref_count.counter); + vha->vref_count = (atomic_t)ATOMIC_INIT(0); } list_del(&vha->list); qlt_update_vp_map(vha, RESET_VP_IDX); @@ -269,6 +270,7 @@ qla2x00_alert_all_vps(struct rsp_que *rsp, uint16_t *mb) spin_lock_irqsave(&ha->vport_slock, flags); atomic_dec(&vha->vref_count); + wake_up(&vha->vref_waitq); } i++; } diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 1fed235a1b4a..54d4e802bde0 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -4268,6 +4268,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, spin_lock_init(&vha->work_lock); spin_lock_init(&vha->cmd_list_lock); init_waitqueue_head(&vha->fcport_waitQ); + init_waitqueue_head(&vha->vref_waitq); vha->gnl.size = sizeof(struct get_name_list_extended) * (ha->max_loop_id + 1); From ae940f2c472a62904dc18234de5cf3ed28f195ee Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:44 -0700 Subject: [PATCH 357/384] qla2xxx: Fix memory leak for abts processing Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 45f5077684f0..ecf97c5993e8 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6642,6 +6642,8 @@ qlt_handle_abts_recv_work(struct work_struct *work) spin_lock_irqsave(&ha->hardware_lock, flags); qlt_response_pkt_all_vps(vha, (response_t *)&op->atio); spin_unlock_irqrestore(&ha->hardware_lock, flags); + + kfree(op); } void From 8b666809e10cda9814af3e8be339d35b83909056 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:45 -0700 Subject: [PATCH 358/384] qla2xxx: Fix request queue corruption. When FW notify driver or driver detects low FW resource, driver tries to send out Busy SCSI Status to tell Initiator side to back off. During the send process, the lock was not held. Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index ecf97c5993e8..a463bcc57902 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -5079,16 +5079,22 @@ qlt_send_busy(struct scsi_qla_host *vha, static int qlt_chk_qfull_thresh_hold(struct scsi_qla_host *vha, - struct atio_from_isp *atio) + struct atio_from_isp *atio, bool ha_locked) { struct qla_hw_data *ha = vha->hw; uint16_t status; + unsigned long flags; if (ha->tgt.num_pend_cmds < Q_FULL_THRESH_HOLD(ha)) return 0; + if (!ha_locked) + spin_lock_irqsave(&ha->hardware_lock, flags); status = temp_sam_status; qlt_send_busy(vha, atio, status); + if (!ha_locked) + spin_unlock_irqrestore(&ha->hardware_lock, flags); + return 1; } @@ -5133,7 +5139,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, if (likely(atio->u.isp24.fcp_cmnd.task_mgmt_flags == 0)) { - rc = qlt_chk_qfull_thresh_hold(vha, atio); + rc = qlt_chk_qfull_thresh_hold(vha, atio, ha_locked); if (rc != 0) { tgt->atio_irq_cmd_count--; return; @@ -5256,7 +5262,7 @@ static void qlt_response_pkt(struct scsi_qla_host *vha, response_t *pkt) break; } - rc = qlt_chk_qfull_thresh_hold(vha, atio); + rc = qlt_chk_qfull_thresh_hold(vha, atio, true); if (rc != 0) { tgt->irq_cmd_count--; return; From 8f6fc8d4e7ae2347d6261d11a7eb2b247d2954d8 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:46 -0700 Subject: [PATCH 359/384] qla2xxx: Fix inadequate lock protection for ABTS. Normally, ABTS is sent to Target Core as Task MGMT command. In the case of error, qla2xxx needs to send response, hardware_lock is required to prevent request queue corruption. Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index a463bcc57902..a78c3e9bcb57 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -130,6 +130,9 @@ static void qlt_send_term_imm_notif(struct scsi_qla_host *vha, static struct fc_port *qlt_create_sess(struct scsi_qla_host *vha, fc_port_t *fcport, bool local); void qlt_unreg_sess(struct fc_port *sess); +static void qlt_24xx_handle_abts(struct scsi_qla_host *, + struct abts_recv_from_24xx *); + /* * Global Variables */ @@ -389,6 +392,8 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha, (struct abts_recv_from_24xx *)atio; struct scsi_qla_host *host = qlt_find_host_by_vp_idx(vha, entry->vp_index); + unsigned long flags; + if (unlikely(!host)) { ql_dbg(ql_dbg_tgt, vha, 0xffff, "qla_target(%d): Response pkt (ABTS_RECV_24XX) " @@ -396,9 +401,12 @@ static bool qlt_24xx_atio_pkt_all_vps(struct scsi_qla_host *vha, vha->vp_idx, entry->vp_index); break; } - qlt_response_pkt(host, (response_t *)atio); + if (!ha_locked) + spin_lock_irqsave(&host->hw->hardware_lock, flags); + qlt_24xx_handle_abts(host, (struct abts_recv_from_24xx *)atio); + if (!ha_locked) + spin_unlock_irqrestore(&host->hw->hardware_lock, flags); break; - } /* case PUREX_IOCB_TYPE: ql2xmvasynctoatio */ From f159b3c7cd45c550d0f73806451a10b6b6bc08ae Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:47 -0700 Subject: [PATCH 360/384] qla2xxx: Fix sess_lock & hardware_lock lock order problem. The main lock that needs to be held for CMD or TMR submission to upper layer is the sess_lock. The sess_lock is used to serialize cmd submission and session deletion. The addition of hardware_lock being held is not necessary. This patch removes hardware_lock dependency from CMD/TMR submission. Use hardware_lock only for error response in this case. Path1 CPU0 CPU1 ---- ---- lock(&(&ha->tgt.sess_lock)->rlock); lock(&(&ha->hardware_lock)->rlock); lock(&(&ha->tgt.sess_lock)->rlock); lock(&(&ha->hardware_lock)->rlock); Path2/deadlock *** DEADLOCK *** Call Trace: dump_stack+0x85/0xc2 print_circular_bug+0x1e3/0x250 __lock_acquire+0x1425/0x1620 lock_acquire+0xbf/0x210 _raw_spin_lock_irqsave+0x53/0x70 qlt_sess_work_fn+0x21d/0x480 [qla2xxx] process_one_work+0x1f4/0x6e0 Cc: Cc: Bart Van Assche Reported-by: Bart Van Assche Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 43 ++++++++++++++----------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index a78c3e9bcb57..989f931af156 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -5727,30 +5727,23 @@ static void qlt_abort_work(struct qla_tgt *tgt, } } - spin_lock_irqsave(&ha->hardware_lock, flags); - - if (tgt->tgt_stop) - goto out_term; - rc = __qlt_24xx_handle_abts(vha, &prm->abts, sess); + ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); + if (rc != 0) goto out_term; - spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (sess) - ha->tgt.tgt_ops->put_sess(sess); - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); return; out_term2: - spin_lock_irqsave(&ha->hardware_lock, flags); - -out_term: - qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false); - spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (sess) ha->tgt.tgt_ops->put_sess(sess); spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); + +out_term: + spin_lock_irqsave(&ha->hardware_lock, flags); + qlt_24xx_send_abts_resp(vha, &prm->abts, FCP_TMF_REJECTED, false); + spin_unlock_irqrestore(&ha->hardware_lock, flags); } static void qlt_tmr_work(struct qla_tgt *tgt, @@ -5770,7 +5763,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt, spin_lock_irqsave(&ha->tgt.sess_lock, flags); if (tgt->tgt_stop) - goto out_term; + goto out_term2; s_id = prm->tm_iocb2.u.isp24.fcp_hdr.s_id; sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id); @@ -5782,11 +5775,11 @@ static void qlt_tmr_work(struct qla_tgt *tgt, spin_lock_irqsave(&ha->tgt.sess_lock, flags); if (!sess) - goto out_term; + goto out_term2; } else { if (sess->deleted) { sess = NULL; - goto out_term; + goto out_term2; } if (!kref_get_unless_zero(&sess->sess_kref)) { @@ -5794,7 +5787,7 @@ static void qlt_tmr_work(struct qla_tgt *tgt, "%s: kref_get fail %8phC\n", __func__, sess->port_name); sess = NULL; - goto out_term; + goto out_term2; } } @@ -5804,17 +5797,19 @@ static void qlt_tmr_work(struct qla_tgt *tgt, unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); rc = qlt_issue_task_mgmt(sess, unpacked_lun, fn, iocb, 0); + ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + if (rc != 0) goto out_term; - - ha->tgt.tgt_ops->put_sess(sess); - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); return; +out_term2: + if (sess) + ha->tgt.tgt_ops->put_sess(sess); + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); out_term: qlt_send_term_exchange(vha, NULL, &prm->tm_iocb2, 1, 0); - ha->tgt.tgt_ops->put_sess(sess); - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); } static void qlt_sess_work_fn(struct work_struct *work) From 5b33469a055c77001fd2c62b0f985c991b0e5b65 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:48 -0700 Subject: [PATCH 361/384] qla2xxx: Allow relogin to proceed if remote login did not finish If the remote port have started the login process, then the PLOGI and PRLI should be back to back. Driver will allow the remote port to complete the process. For the case where the remote port decide to back off from sending PRLI, this local port sets an expiration timer for the PRLI. Once the expiration time passes, the relogin retry logic is allowed to go through and perform login with the remote port. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 2 ++ drivers/scsi/qla2xxx/qla_init.c | 12 ++++++++++-- drivers/scsi/qla2xxx/qla_isr.c | 25 +++++++++++++++++++------ drivers/scsi/qla2xxx/qla_target.c | 1 + 4 files changed, 32 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 8662ef4192db..56cd45fc600a 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2300,6 +2300,8 @@ typedef struct fc_port { struct ct_sns_desc ct_desc; enum discovery_state disc_state; enum login_state fw_login_state; + unsigned long plogi_nack_done_deadline; + u32 login_gen, last_login_gen; u32 rscn_gen, last_rscn_gen; u32 chip_reset; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 9f3740c68cc8..a7865a5d556d 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -876,10 +876,14 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) fcport->login_retry--; if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) || - (fcport->fw_login_state == DSC_LS_PLOGI_COMP) || (fcport->fw_login_state == DSC_LS_PRLI_PEND)) return 0; + if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { + if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) + return 0; + } + /* for pure Target Mode. Login will not be initiated */ if (vha->host->active_mode == MODE_TARGET) return 0; @@ -1041,10 +1045,14 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha, fcport->flags); if ((fcport->fw_login_state == DSC_LS_PLOGI_PEND) || - (fcport->fw_login_state == DSC_LS_PLOGI_COMP) || (fcport->fw_login_state == DSC_LS_PRLI_PEND)) return; + if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { + if (time_before_eq(jiffies, fcport->plogi_nack_done_deadline)) + return; + } + if (fcport->flags & FCF_ASYNC_SENT) { fcport->login_retry++; set_bit(RELOGIN_NEEDED, &vha->dpc_flags); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 3c66ea29de27..b2c6da752edd 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1620,9 +1620,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req, QLA_LOGIO_LOGIN_RETRIED : 0; if (logio->entry_status) { ql_log(ql_log_warn, fcport->vha, 0x5034, - "Async-%s error entry - hdl=%x" + "Async-%s error entry - %8phC hdl=%x" "portid=%02x%02x%02x entry-status=%x.\n", - type, sp->handle, fcport->d_id.b.domain, + type, fcport->port_name, sp->handle, fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa, logio->entry_status); ql_dump_buffer(ql_dbg_async + ql_dbg_buffer, vha, 0x504d, @@ -1633,8 +1633,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req, if (le16_to_cpu(logio->comp_status) == CS_COMPLETE) { ql_dbg(ql_dbg_async, fcport->vha, 0x5036, - "Async-%s complete - hdl=%x portid=%02x%02x%02x " - "iop0=%x.\n", type, sp->handle, fcport->d_id.b.domain, + "Async-%s complete - %8phC hdl=%x portid=%02x%02x%02x " + "iop0=%x.\n", type, fcport->port_name, sp->handle, + fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa, le32_to_cpu(logio->io_parameter[0])); @@ -1674,6 +1675,17 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req, case LSC_SCODE_NPORT_USED: data[0] = MBS_LOOP_ID_USED; break; + case LSC_SCODE_CMD_FAILED: + if (iop[1] == 0x0606) { + /* + * PLOGI/PRLI Completed. We must have Recv PLOGI/PRLI, + * Target side acked. + */ + data[0] = MBS_COMMAND_COMPLETE; + goto logio_done; + } + data[0] = MBS_COMMAND_ERROR; + break; case LSC_SCODE_NOXCB: vha->hw->exch_starvation++; if (vha->hw->exch_starvation > 5) { @@ -1695,8 +1707,9 @@ qla24xx_logio_entry(scsi_qla_host_t *vha, struct req_que *req, } ql_dbg(ql_dbg_async, fcport->vha, 0x5037, - "Async-%s failed - hdl=%x portid=%02x%02x%02x comp=%x " - "iop0=%x iop1=%x.\n", type, sp->handle, fcport->d_id.b.domain, + "Async-%s failed - %8phC hdl=%x portid=%02x%02x%02x comp=%x " + "iop0=%x iop1=%x.\n", type, fcport->port_name, + sp->handle, fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa, le16_to_cpu(logio->comp_status), le32_to_cpu(logio->io_parameter[0]), diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 989f931af156..925d9b858b24 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -562,6 +562,7 @@ void qla2x00_async_nack_sp_done(void *s, int res) sp->fcport->login_gen++; sp->fcport->fw_login_state = DSC_LS_PLOGI_COMP; sp->fcport->logout_on_delete = 1; + sp->fcport->plogi_nack_done_deadline = jiffies + HZ; break; case SRB_NACK_PRLI: From be25152c0d9e236076323abbe9def9714234b761 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:49 -0700 Subject: [PATCH 362/384] qla2xxx: Improve T10-DIF/PI handling in driver. Add routines to support T10 DIF tag. Signed-off-by: Quinn Tran Signed-off-by: Anil Gurumurthy Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_dbg.h | 1 + drivers/scsi/qla2xxx/qla_def.h | 10 + drivers/scsi/qla2xxx/qla_gbl.h | 6 +- drivers/scsi/qla2xxx/qla_iocb.c | 13 +- drivers/scsi/qla2xxx/qla_target.c | 542 +++++++++++++++++------------ drivers/scsi/qla2xxx/qla_target.h | 38 +- drivers/scsi/qla2xxx/tcm_qla2xxx.c | 49 ++- 7 files changed, 407 insertions(+), 252 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index e1fc4e66966a..c6bffe929fe7 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -348,6 +348,7 @@ ql_log_pci(uint32_t, struct pci_dev *pdev, int32_t, const char *fmt, ...); #define ql_dbg_tgt 0x00004000 /* Target mode */ #define ql_dbg_tgt_mgt 0x00002000 /* Target mode management */ #define ql_dbg_tgt_tmr 0x00001000 /* Target mode task management */ +#define ql_dbg_tgt_dif 0x00000800 /* Target mode dif */ extern int qla27xx_dump_mpi_ram(struct qla_hw_data *, uint32_t, uint32_t *, uint32_t, void **); diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 56cd45fc600a..9d1d3dcf1c87 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3127,6 +3127,16 @@ struct bidi_statistics { unsigned long long transfer_bytes; }; +struct qla_tc_param { + struct scsi_qla_host *vha; + uint32_t blk_sz; + uint32_t bufflen; + struct scatterlist *sg; + struct scatterlist *prot_sg; + struct crc_context *ctx; + uint8_t *ctx_dsd_alloced; +}; + /* Multi queue support */ #define MBC_INITIALIZE_MULTIQ 0x1f #define QLA_QUE_PAGE 0X1000 diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index b3d6441d1d90..ca6f122e5865 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -256,11 +256,11 @@ extern unsigned long qla2x00_get_async_timeout(struct scsi_qla_host *); extern void *qla2x00_alloc_iocbs(scsi_qla_host_t *, srb_t *); extern int qla2x00_issue_marker(scsi_qla_host_t *, int); extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *, - uint32_t *, uint16_t, struct qla_tgt_cmd *); + uint32_t *, uint16_t, struct qla_tc_param *); extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *, - uint32_t *, uint16_t, struct qla_tgt_cmd *); + uint32_t *, uint16_t, struct qla_tc_param *); extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *, - uint32_t *, uint16_t, struct qla_tgt_cmd *); + uint32_t *, uint16_t, struct qla_tc_param *); extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *); extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *); extern int qla24xx_build_scsi_crc_2_iocbs(srb_t *, diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 535079280288..ea027f6a7fd4 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -889,7 +889,7 @@ qla24xx_get_one_block_sg(uint32_t blk_sz, struct qla2_sgx *sgx, int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp, - uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc) + uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc) { void *next_dsd; uint8_t avail_dsds = 0; @@ -898,7 +898,6 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp, struct scatterlist *sg_prot; uint32_t *cur_dsd = dsd; uint16_t used_dsds = tot_dsds; - uint32_t prot_int; /* protection interval */ uint32_t partial; struct qla2_sgx sgx; @@ -966,7 +965,7 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp, } else { list_add_tail(&dsd_ptr->list, &(tc->ctx->dsd_list)); - tc->ctx_dsd_alloced = 1; + *tc->ctx_dsd_alloced = 1; } @@ -1005,7 +1004,7 @@ qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *ha, srb_t *sp, int qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd, - uint16_t tot_dsds, struct qla_tgt_cmd *tc) + uint16_t tot_dsds, struct qla_tc_param *tc) { void *next_dsd; uint8_t avail_dsds = 0; @@ -1066,7 +1065,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd, } else { list_add_tail(&dsd_ptr->list, &(tc->ctx->dsd_list)); - tc->ctx_dsd_alloced = 1; + *tc->ctx_dsd_alloced = 1; } /* add new list to cmd iocb or last list */ @@ -1092,7 +1091,7 @@ qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd, int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp, - uint32_t *dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc) + uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc) { void *next_dsd; uint8_t avail_dsds = 0; @@ -1158,7 +1157,7 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp, } else { list_add_tail(&dsd_ptr->list, &(tc->ctx->dsd_list)); - tc->ctx_dsd_alloced = 1; + *tc->ctx_dsd_alloced = 1; } /* add new list to cmd iocb or last list */ diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 925d9b858b24..532004981dbd 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -143,6 +143,20 @@ static struct workqueue_struct *qla_tgt_wq; static DEFINE_MUTEX(qla_tgt_mutex); static LIST_HEAD(qla_tgt_glist); +static const char *prot_op_str(u32 prot_op) +{ + switch (prot_op) { + case TARGET_PROT_NORMAL: return "NORMAL"; + case TARGET_PROT_DIN_INSERT: return "DIN_INSERT"; + case TARGET_PROT_DOUT_INSERT: return "DOUT_INSERT"; + case TARGET_PROT_DIN_STRIP: return "DIN_STRIP"; + case TARGET_PROT_DOUT_STRIP: return "DOUT_STRIP"; + case TARGET_PROT_DIN_PASS: return "DIN_PASS"; + case TARGET_PROT_DOUT_PASS: return "DOUT_PASS"; + default: return "UNKNOWN"; + } +} + /* This API intentionally takes dest as a parameter, rather than returning * int value to avoid caller forgetting to issue wmb() after the store */ void qlt_do_generation_tick(struct scsi_qla_host *vha, int *dest) @@ -2022,6 +2036,70 @@ void qlt_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd) } EXPORT_SYMBOL(qlt_free_mcmd); +/* + * ha->hardware_lock supposed to be held on entry. Might drop it, then + * reacquire + */ +void qlt_send_resp_ctio(scsi_qla_host_t *vha, struct qla_tgt_cmd *cmd, + uint8_t scsi_status, uint8_t sense_key, uint8_t asc, uint8_t ascq) +{ + struct atio_from_isp *atio = &cmd->atio; + struct ctio7_to_24xx *ctio; + uint16_t temp; + + ql_dbg(ql_dbg_tgt_dif, vha, 0x3066, + "Sending response CTIO7 (vha=%p, atio=%p, scsi_status=%02x, " + "sense_key=%02x, asc=%02x, ascq=%02x", + vha, atio, scsi_status, sense_key, asc, ascq); + + ctio = (struct ctio7_to_24xx *)qla2x00_alloc_iocbs(vha, NULL); + if (!ctio) { + ql_dbg(ql_dbg_async, vha, 0x3067, + "qla2x00t(%ld): %s failed: unable to allocate request packet", + vha->host_no, __func__); + goto out; + } + + ctio->entry_type = CTIO_TYPE7; + ctio->entry_count = 1; + ctio->handle = QLA_TGT_SKIP_HANDLE; + ctio->nport_handle = cmd->sess->loop_id; + ctio->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); + ctio->vp_index = vha->vp_idx; + ctio->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2]; + ctio->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1]; + ctio->initiator_id[2] = atio->u.isp24.fcp_hdr.s_id[0]; + ctio->exchange_addr = atio->u.isp24.exchange_addr; + ctio->u.status1.flags = (atio->u.isp24.attr << 9) | + cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_1 | CTIO7_FLAGS_SEND_STATUS); + temp = be16_to_cpu(atio->u.isp24.fcp_hdr.ox_id); + ctio->u.status1.ox_id = cpu_to_le16(temp); + ctio->u.status1.scsi_status = + cpu_to_le16(SS_RESPONSE_INFO_LEN_VALID | scsi_status); + ctio->u.status1.response_len = cpu_to_le16(18); + ctio->u.status1.residual = cpu_to_le32(get_datalen_for_atio(atio)); + + if (ctio->u.status1.residual != 0) + ctio->u.status1.scsi_status |= + cpu_to_le16(SS_RESIDUAL_UNDER); + + /* Response code and sense key */ + put_unaligned_le32(((0x70 << 24) | (sense_key << 8)), + (&ctio->u.status1.sense_data)[0]); + /* Additional sense length */ + put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]); + /* ASC and ASCQ */ + put_unaligned_le32(((asc << 24) | (ascq << 16)), + (&ctio->u.status1.sense_data)[3]); + + /* Memory Barrier */ + wmb(); + + qla2x00_start_iocbs(vha, vha->req); +out: + return; +} + /* callback from target fabric module code */ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd) { @@ -2270,7 +2348,7 @@ static int qlt_24xx_build_ctio_pkt(struct qla_tgt_prm *prm, */ return -EAGAIN; } else - ha->tgt.cmds[h-1] = prm->cmd; + ha->tgt.cmds[h - 1] = prm->cmd; pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK; pkt->nport_handle = prm->cmd->loop_id; @@ -2400,6 +2478,50 @@ static inline int qlt_has_data(struct qla_tgt_cmd *cmd) return cmd->bufflen > 0; } +static void qlt_print_dif_err(struct qla_tgt_prm *prm) +{ + struct qla_tgt_cmd *cmd; + struct scsi_qla_host *vha; + + /* asc 0x10=dif error */ + if (prm->sense_buffer && (prm->sense_buffer[12] == 0x10)) { + cmd = prm->cmd; + vha = cmd->vha; + /* ASCQ */ + switch (prm->sense_buffer[13]) { + case 1: + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "BE detected Guard TAG ERR: lba[0x%llx|%lld] len[0x%x] " + "se_cmd=%p tag[%x]", + cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr); + break; + case 2: + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "BE detected APP TAG ERR: lba[0x%llx|%lld] len[0x%x] " + "se_cmd=%p tag[%x]", + cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr); + break; + case 3: + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "BE detected REF TAG ERR: lba[0x%llx|%lld] len[0x%x] " + "se_cmd=%p tag[%x]", + cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr); + break; + default: + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "BE detected Dif ERR: lba[%llx|%lld] len[%x] " + "se_cmd=%p tag[%x]", + cmd->lba, cmd->lba, cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr); + break; + } + ql_dump_buffer(ql_dbg_tgt_dif, vha, 0xffff, cmd->cdb, 16); + } +} + /* * Called without ha->hardware_lock held */ @@ -2521,18 +2643,9 @@ static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio, for (i = 0; i < prm->sense_buffer_len/4; i++) ((uint32_t *)ctio->u.status1.sense_data)[i] = cpu_to_be32(((uint32_t *)prm->sense_buffer)[i]); -#if 0 - if (unlikely((prm->sense_buffer_len % 4) != 0)) { - static int q; - if (q < 10) { - ql_dbg(ql_dbg_tgt, vha, 0xe04f, - "qla_target(%d): %d bytes of sense " - "lost", prm->tgt->ha->vp_idx, - prm->sense_buffer_len % 4); - q++; - } - } -#endif + + qlt_print_dif_err(prm); + } else { ctio->u.status1.flags &= ~cpu_to_le16(CTIO7_FLAGS_STATUS_MODE_0); @@ -2546,19 +2659,9 @@ static void qlt_24xx_init_ctio_to_isp(struct ctio7_to_24xx *ctio, /* Sense with len > 24, is it possible ??? */ } - - -/* diff */ static inline int qlt_hba_err_chk_enabled(struct se_cmd *se_cmd) { - /* - * Uncomment when corresponding SCSI changes are done. - * - if (!sp->cmd->prot_chk) - return 0; - * - */ switch (se_cmd->prot_op) { case TARGET_PROT_DOUT_INSERT: case TARGET_PROT_DIN_STRIP: @@ -2579,16 +2682,38 @@ qlt_hba_err_chk_enabled(struct se_cmd *se_cmd) return 0; } -/* - * qla24xx_set_t10dif_tags_from_cmd - Extract Ref and App tags from SCSI command - * - */ -static inline void -qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx) +static inline int +qla_tgt_ref_mask_check(struct se_cmd *se_cmd) { - uint32_t lba = 0xffffffff & se_cmd->t_task_lba; + switch (se_cmd->prot_op) { + case TARGET_PROT_DIN_INSERT: + case TARGET_PROT_DOUT_INSERT: + case TARGET_PROT_DIN_STRIP: + case TARGET_PROT_DOUT_STRIP: + case TARGET_PROT_DIN_PASS: + case TARGET_PROT_DOUT_PASS: + return 1; + default: + return 0; + } + return 0; +} - /* wait til Mode Sense/Select cmd, modepage Ah, subpage 2 +/* + * qla_tgt_set_dif_tags - Extract Ref and App tags from SCSI command + */ +static void +qla_tgt_set_dif_tags(struct qla_tgt_cmd *cmd, struct crc_context *ctx, + uint16_t *pfw_prot_opts) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + uint32_t lba = 0xffffffff & se_cmd->t_task_lba; + scsi_qla_host_t *vha = cmd->tgt->vha; + struct qla_hw_data *ha = vha->hw; + uint32_t t32 = 0; + + /* + * wait till Mode Sense/Select cmd, modepage Ah, subpage 2 * have been immplemented by TCM, before AppTag is avail. * Look for modesense_handlers[] */ @@ -2596,65 +2721,73 @@ qlt_set_t10dif_tags(struct se_cmd *se_cmd, struct crc_context *ctx) ctx->app_tag_mask[0] = 0x0; ctx->app_tag_mask[1] = 0x0; + if (IS_PI_UNINIT_CAPABLE(ha)) { + if ((se_cmd->prot_type == TARGET_DIF_TYPE1_PROT) || + (se_cmd->prot_type == TARGET_DIF_TYPE2_PROT)) + *pfw_prot_opts |= PO_DIS_VALD_APP_ESC; + else if (se_cmd->prot_type == TARGET_DIF_TYPE3_PROT) + *pfw_prot_opts |= PO_DIS_VALD_APP_REF_ESC; + } + + t32 = ha->tgt.tgt_ops->get_dif_tags(cmd, pfw_prot_opts); + switch (se_cmd->prot_type) { case TARGET_DIF_TYPE0_PROT: /* - * No check for ql2xenablehba_err_chk, as it would be an - * I/O error if hba tag generation is not done. + * No check for ql2xenablehba_err_chk, as it + * would be an I/O error if hba tag generation + * is not done. */ ctx->ref_tag = cpu_to_le32(lba); - - if (!qlt_hba_err_chk_enabled(se_cmd)) - break; - /* enable ALL bytes of the ref tag */ ctx->ref_tag_mask[0] = 0xff; ctx->ref_tag_mask[1] = 0xff; ctx->ref_tag_mask[2] = 0xff; ctx->ref_tag_mask[3] = 0xff; break; - /* - * For TYpe 1 protection: 16 bit GUARD tag, 32 bit REF tag, and - * 16 bit app tag. - */ case TARGET_DIF_TYPE1_PROT: - ctx->ref_tag = cpu_to_le32(lba); - - if (!qlt_hba_err_chk_enabled(se_cmd)) - break; - - /* enable ALL bytes of the ref tag */ - ctx->ref_tag_mask[0] = 0xff; - ctx->ref_tag_mask[1] = 0xff; - ctx->ref_tag_mask[2] = 0xff; - ctx->ref_tag_mask[3] = 0xff; - break; - /* - * For TYPE 2 protection: 16 bit GUARD + 32 bit REF tag has to - * match LBA in CDB + N - */ + /* + * For TYPE 1 protection: 16 bit GUARD tag, 32 bit + * REF tag, and 16 bit app tag. + */ + ctx->ref_tag = cpu_to_le32(lba); + if (!qla_tgt_ref_mask_check(se_cmd) || + !(ha->tgt.tgt_ops->chk_dif_tags(t32))) { + *pfw_prot_opts |= PO_DIS_REF_TAG_VALD; + break; + } + /* enable ALL bytes of the ref tag */ + ctx->ref_tag_mask[0] = 0xff; + ctx->ref_tag_mask[1] = 0xff; + ctx->ref_tag_mask[2] = 0xff; + ctx->ref_tag_mask[3] = 0xff; + break; case TARGET_DIF_TYPE2_PROT: - ctx->ref_tag = cpu_to_le32(lba); - - if (!qlt_hba_err_chk_enabled(se_cmd)) - break; - - /* enable ALL bytes of the ref tag */ - ctx->ref_tag_mask[0] = 0xff; - ctx->ref_tag_mask[1] = 0xff; - ctx->ref_tag_mask[2] = 0xff; - ctx->ref_tag_mask[3] = 0xff; - break; - - /* For Type 3 protection: 16 bit GUARD only */ + /* + * For TYPE 2 protection: 16 bit GUARD + 32 bit REF + * tag has to match LBA in CDB + N + */ + ctx->ref_tag = cpu_to_le32(lba); + if (!qla_tgt_ref_mask_check(se_cmd) || + !(ha->tgt.tgt_ops->chk_dif_tags(t32))) { + *pfw_prot_opts |= PO_DIS_REF_TAG_VALD; + break; + } + /* enable ALL bytes of the ref tag */ + ctx->ref_tag_mask[0] = 0xff; + ctx->ref_tag_mask[1] = 0xff; + ctx->ref_tag_mask[2] = 0xff; + ctx->ref_tag_mask[3] = 0xff; + break; case TARGET_DIF_TYPE3_PROT: - ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] = - ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00; - break; + /* For TYPE 3 protection: 16 bit GUARD only */ + *pfw_prot_opts |= PO_DIS_REF_TAG_VALD; + ctx->ref_tag_mask[0] = ctx->ref_tag_mask[1] = + ctx->ref_tag_mask[2] = ctx->ref_tag_mask[3] = 0x00; + break; } } - static inline int qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) { @@ -2673,6 +2806,7 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) struct se_cmd *se_cmd = &cmd->se_cmd; uint32_t h; struct atio_from_isp *atio = &prm->cmd->atio; + struct qla_tc_param tc; uint16_t t16; ha = vha->hw; @@ -2698,16 +2832,15 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) case TARGET_PROT_DIN_INSERT: case TARGET_PROT_DOUT_STRIP: transfer_length = data_bytes; - data_bytes += dif_bytes; + if (cmd->prot_sg_cnt) + data_bytes += dif_bytes; break; - case TARGET_PROT_DIN_STRIP: case TARGET_PROT_DOUT_INSERT: case TARGET_PROT_DIN_PASS: case TARGET_PROT_DOUT_PASS: transfer_length = data_bytes + dif_bytes; break; - default: BUG(); break; @@ -2743,7 +2876,6 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) break; } - /* ---- PKT ---- */ /* Update entry type to indicate Command Type CRC_2 IOCB */ pkt->entry_type = CTIO_CRC2; @@ -2761,9 +2893,8 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) } else ha->tgt.cmds[h-1] = prm->cmd; - pkt->handle = h | CTIO_COMPLETION_HANDLE_MARK; - pkt->nport_handle = prm->cmd->loop_id; + pkt->nport_handle = cpu_to_le16(prm->cmd->loop_id); pkt->timeout = cpu_to_le16(QLA_TGT_TIMEOUT); pkt->initiator_id[0] = atio->u.isp24.fcp_hdr.s_id[2]; pkt->initiator_id[1] = atio->u.isp24.fcp_hdr.s_id[1]; @@ -2784,12 +2915,10 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) else if (cmd->dma_data_direction == DMA_FROM_DEVICE) pkt->flags = cpu_to_le16(CTIO7_FLAGS_DATA_OUT); - pkt->dseg_count = prm->tot_dsds; /* Fibre channel byte count */ pkt->transfer_length = cpu_to_le32(transfer_length); - /* ----- CRC context -------- */ /* Allocate CRC context from global pool */ @@ -2809,13 +2938,12 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) /* Set handle */ crc_ctx_pkt->handle = pkt->handle; - qlt_set_t10dif_tags(se_cmd, crc_ctx_pkt); + qla_tgt_set_dif_tags(cmd, crc_ctx_pkt, &fw_prot_opts); pkt->crc_context_address[0] = cpu_to_le32(LSD(crc_ctx_dma)); pkt->crc_context_address[1] = cpu_to_le32(MSD(crc_ctx_dma)); pkt->crc_context_len = CRC_CONTEXT_LEN_FW; - if (!bundling) { cur_dsd = (uint32_t *) &crc_ctx_pkt->u.nobundling.data_address; } else { @@ -2836,16 +2964,24 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) crc_ctx_pkt->byte_count = cpu_to_le32(data_bytes); crc_ctx_pkt->guard_seed = cpu_to_le16(0); + memset((uint8_t *)&tc, 0 , sizeof(tc)); + tc.vha = vha; + tc.blk_sz = cmd->blk_sz; + tc.bufflen = cmd->bufflen; + tc.sg = cmd->sg; + tc.prot_sg = cmd->prot_sg; + tc.ctx = crc_ctx_pkt; + tc.ctx_dsd_alloced = &cmd->ctx_dsd_alloced; /* Walks data segments */ pkt->flags |= cpu_to_le16(CTIO7_FLAGS_DSD_PTR); if (!bundling && prm->prot_seg_cnt) { if (qla24xx_walk_and_build_sglist_no_difb(ha, NULL, cur_dsd, - prm->tot_dsds, cmd)) + prm->tot_dsds, &tc)) goto crc_queuing_error; } else if (qla24xx_walk_and_build_sglist(ha, NULL, cur_dsd, - (prm->tot_dsds - prm->prot_seg_cnt), cmd)) + (prm->tot_dsds - prm->prot_seg_cnt), &tc)) goto crc_queuing_error; if (bundling && prm->prot_seg_cnt) { @@ -2854,18 +2990,18 @@ qlt_build_ctio_crc2_pkt(struct qla_tgt_prm *prm, scsi_qla_host_t *vha) cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address; if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd, - prm->prot_seg_cnt, cmd)) + prm->prot_seg_cnt, &tc)) goto crc_queuing_error; } return QLA_SUCCESS; crc_queuing_error: /* Cleanup will be performed by the caller */ + vha->hw->tgt.cmds[h - 1] = NULL; return QLA_FUNCTION_FAILED; } - /* * Callback to setup response of xmit_type of QLA_TGT_XMIT_DATA and * * QLA_TGT_XMIT_STATUS for >= 24xx silicon @@ -3113,139 +3249,113 @@ EXPORT_SYMBOL(qlt_rdy_to_xfer); /* - * Checks the guard or meta-data for the type of error - * detected by the HBA. + * it is assumed either hardware_lock or qpair lock is held. */ -static inline int +static void qlt_handle_dif_error(struct scsi_qla_host *vha, struct qla_tgt_cmd *cmd, - struct ctio_crc_from_fw *sts) + struct ctio_crc_from_fw *sts) { uint8_t *ap = &sts->actual_dif[0]; uint8_t *ep = &sts->expected_dif[0]; - uint32_t e_ref_tag, a_ref_tag; - uint16_t e_app_tag, a_app_tag; - uint16_t e_guard, a_guard; uint64_t lba = cmd->se_cmd.t_task_lba; + uint8_t scsi_status, sense_key, asc, ascq; + unsigned long flags; - a_guard = be16_to_cpu(*(uint16_t *)(ap + 0)); - a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2)); - a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4)); + cmd->trc_flags |= TRC_DIF_ERR; - e_guard = be16_to_cpu(*(uint16_t *)(ep + 0)); - e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2)); - e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4)); + cmd->a_guard = be16_to_cpu(*(uint16_t *)(ap + 0)); + cmd->a_app_tag = be16_to_cpu(*(uint16_t *)(ap + 2)); + cmd->a_ref_tag = be32_to_cpu(*(uint32_t *)(ap + 4)); - ql_dbg(ql_dbg_tgt, vha, 0xe075, - "iocb(s) %p Returned STATUS.\n", sts); + cmd->e_guard = be16_to_cpu(*(uint16_t *)(ep + 0)); + cmd->e_app_tag = be16_to_cpu(*(uint16_t *)(ep + 2)); + cmd->e_ref_tag = be32_to_cpu(*(uint32_t *)(ep + 4)); - ql_dbg(ql_dbg_tgt, vha, 0xf075, - "dif check TGT cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x]\n", - cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba, - a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, a_guard, e_guard); + ql_dbg(ql_dbg_tgt_dif, vha, 0xf075, + "%s: aborted %d state %d\n", __func__, cmd->aborted, cmd->state); - /* - * Ignore sector if: - * For type 3: ref & app tag is all 'f's - * For type 0,1,2: app tag is all 'f's - */ - if ((a_app_tag == 0xffff) && - ((cmd->se_cmd.prot_type != TARGET_DIF_TYPE3_PROT) || - (a_ref_tag == 0xffffffff))) { - uint32_t blocks_done; + scsi_status = sense_key = asc = ascq = 0; - /* 2TB boundary case covered automatically with this */ - blocks_done = e_ref_tag - (uint32_t)lba + 1; - cmd->se_cmd.bad_sector = e_ref_tag; - cmd->se_cmd.pi_err = 0; - ql_dbg(ql_dbg_tgt, vha, 0xf074, - "need to return scsi good\n"); + /* check appl tag */ + if (cmd->e_app_tag != cmd->a_app_tag) { + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "App Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] " + "Ref[%x|%x], App[%x|%x], " + "Guard [%x|%x] cmd=%p ox_id[%04x]", + cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks, + cmd->a_ref_tag, cmd->e_ref_tag, + cmd->a_app_tag, cmd->e_app_tag, + cmd->a_guard, cmd->e_guard, + cmd, cmd->atio.u.isp24.fcp_hdr.ox_id); - /* Update protection tag */ - if (cmd->prot_sg_cnt) { - uint32_t i, k = 0, num_ent; - struct scatterlist *sg, *sgl; - - - sgl = cmd->prot_sg; - - /* Patch the corresponding protection tags */ - for_each_sg(sgl, sg, cmd->prot_sg_cnt, i) { - num_ent = sg_dma_len(sg) / 8; - if (k + num_ent < blocks_done) { - k += num_ent; - continue; - } - k = blocks_done; - break; - } - - if (k != blocks_done) { - ql_log(ql_log_warn, vha, 0xf076, - "unexpected tag values tag:lba=%u:%llu)\n", - e_ref_tag, (unsigned long long)lba); - goto out; - } - -#if 0 - struct sd_dif_tuple *spt; - /* TODO: - * This section came from initiator. Is it valid here? - * should ulp be override with actual val??? - */ - spt = page_address(sg_page(sg)) + sg->offset; - spt += j; - - spt->app_tag = 0xffff; - if (cmd->se_cmd.prot_type == SCSI_PROT_DIF_TYPE3) - spt->ref_tag = 0xffffffff; -#endif - } - - return 0; - } - - /* check guard */ - if (e_guard != a_guard) { - cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED; - cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba; - - ql_log(ql_log_warn, vha, 0xe076, - "Guard ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n", - cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba, - a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, - a_guard, e_guard, cmd); - goto out; + cmd->dif_err_code = DIF_ERR_APP; + scsi_status = SAM_STAT_CHECK_CONDITION; + sense_key = ABORTED_COMMAND; + asc = 0x10; + ascq = 0x2; } /* check ref tag */ - if (e_ref_tag != a_ref_tag) { - cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED; - cmd->se_cmd.bad_sector = e_ref_tag; + if (cmd->e_ref_tag != cmd->a_ref_tag) { + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "Ref Tag ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] " + "Ref[%x|%x], App[%x|%x], " + "Guard[%x|%x] cmd=%p ox_id[%04x] ", + cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks, + cmd->a_ref_tag, cmd->e_ref_tag, + cmd->a_app_tag, cmd->e_app_tag, + cmd->a_guard, cmd->e_guard, + cmd, cmd->atio.u.isp24.fcp_hdr.ox_id); - ql_log(ql_log_warn, vha, 0xe077, - "Ref Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n", - cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba, - a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, - a_guard, e_guard, cmd); + cmd->dif_err_code = DIF_ERR_REF; + scsi_status = SAM_STAT_CHECK_CONDITION; + sense_key = ABORTED_COMMAND; + asc = 0x10; + ascq = 0x3; goto out; } - /* check appl tag */ - if (e_app_tag != a_app_tag) { - cmd->se_cmd.pi_err = TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED; - cmd->se_cmd.bad_sector = cmd->se_cmd.t_task_lba; - - ql_log(ql_log_warn, vha, 0xe078, - "App Tag ERR: cdb 0x%x lba 0x%llx: [Actual|Expected] Ref Tag[0x%x|0x%x], App Tag [0x%x|0x%x], Guard [0x%x|0x%x] cmd=%p\n", - cmd->atio.u.isp24.fcp_cmnd.cdb[0], lba, - a_ref_tag, e_ref_tag, a_app_tag, e_app_tag, - a_guard, e_guard, cmd); - goto out; + /* check guard */ + if (cmd->e_guard != cmd->a_guard) { + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "Guard ERR: cdb[%x] lba[%llx %llx] blks[%x] [Actual|Expected] " + "Ref[%x|%x], App[%x|%x], " + "Guard [%x|%x] cmd=%p ox_id[%04x]", + cmd->cdb[0], lba, (lba+cmd->num_blks), cmd->num_blks, + cmd->a_ref_tag, cmd->e_ref_tag, + cmd->a_app_tag, cmd->e_app_tag, + cmd->a_guard, cmd->e_guard, + cmd, cmd->atio.u.isp24.fcp_hdr.ox_id); + cmd->dif_err_code = DIF_ERR_GRD; + scsi_status = SAM_STAT_CHECK_CONDITION; + sense_key = ABORTED_COMMAND; + asc = 0x10; + ascq = 0x1; } out: - return 1; -} + switch (cmd->state) { + case QLA_TGT_STATE_NEED_DATA: + /* handle_data will load DIF error code */ + cmd->state = QLA_TGT_STATE_DATA_IN; + vha->hw->tgt.tgt_ops->handle_data(cmd); + break; + default: + spin_lock_irqsave(&cmd->cmd_lock, flags); + if (cmd->aborted) { + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + vha->hw->tgt.tgt_ops->free_cmd(cmd); + break; + } + spin_unlock_irqrestore(&cmd->cmd_lock, flags); + qlt_send_resp_ctio(vha, cmd, scsi_status, sense_key, asc, ascq); + /* assume scsi status gets out on the wire. + * Will not wait for completion. + */ + vha->hw->tgt.tgt_ops->free_cmd(cmd); + break; + } +} /* If hardware_lock held on entry, might drop it, then reaquire */ /* This function sends the appropriate CTIO to ISP 2xxx or 24xx */ @@ -3552,6 +3662,16 @@ static int qlt_term_ctio_exchange(struct scsi_qla_host *vha, void *ctio, { int term = 0; + if (cmd->se_cmd.prot_op) + ql_dbg(ql_dbg_tgt_dif, vha, 0xffff, + "Term DIF cmd: lba[0x%llx|%lld] len[0x%x] " + "se_cmd=%p tag[%x] op %#x/%s", + cmd->lba, cmd->lba, + cmd->num_blks, &cmd->se_cmd, + cmd->atio.u.isp24.exchange_addr, + cmd->se_cmd.prot_op, + prot_op_str(cmd->se_cmd.prot_op)); + if (ctio != NULL) { struct ctio7_from_24xx *c = (struct ctio7_from_24xx *)ctio; term = !(c->flags & @@ -3769,32 +3889,15 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle, struct ctio_crc_from_fw *crc = (struct ctio_crc_from_fw *)ctio; ql_dbg(ql_dbg_tgt_mgt, vha, 0xf073, - "qla_target(%d): CTIO with DIF_ERROR status %x received (state %x, se_cmd %p) actual_dif[0x%llx] expect_dif[0x%llx]\n", + "qla_target(%d): CTIO with DIF_ERROR status %x " + "received (state %x, ulp_cmd %p) actual_dif[0x%llx] " + "expect_dif[0x%llx]\n", vha->vp_idx, status, cmd->state, se_cmd, *((u64 *)&crc->actual_dif[0]), *((u64 *)&crc->expected_dif[0])); - if (qlt_handle_dif_error(vha, cmd, ctio)) { - if (cmd->state == QLA_TGT_STATE_NEED_DATA) { - /* scsi Write/xfer rdy complete */ - goto skip_term; - } else { - /* scsi read/xmit respond complete - * call handle dif to send scsi status - * rather than terminate exchange. - */ - cmd->state = QLA_TGT_STATE_PROCESSED; - ha->tgt.tgt_ops->handle_dif_err(cmd); - return; - } - } else { - /* Need to generate a SCSI good completion. - * because FW did not send scsi status. - */ - status = 0; - goto skip_term; - } - break; + qlt_handle_dif_error(vha, cmd, ctio); + return; } default: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05b, @@ -3817,7 +3920,6 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle, return; } } -skip_term: if (cmd->state == QLA_TGT_STATE_PROCESSED) { cmd->trc_flags |= TRC_CTIO_DONE; diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index a7f90dcaae37..c35f889b94a6 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -378,6 +378,14 @@ static inline void adjust_corrupted_atio(struct atio_from_isp *atio) atio->u.isp24.fcp_cmnd.add_cdb_len = 0; } +static inline int get_datalen_for_atio(struct atio_from_isp *atio) +{ + int len = atio->u.isp24.fcp_cmnd.add_cdb_len; + + return (be32_to_cpu(get_unaligned((uint32_t *) + &atio->u.isp24.fcp_cmnd.add_cdb[len * 4]))); +} + #define CTIO_TYPE7 0x12 /* Continue target I/O entry (for 24xx) */ /* @@ -667,7 +675,6 @@ struct qla_tgt_func_tmpl { int (*handle_cmd)(struct scsi_qla_host *, struct qla_tgt_cmd *, unsigned char *, uint32_t, int, int, int); void (*handle_data)(struct qla_tgt_cmd *); - void (*handle_dif_err)(struct qla_tgt_cmd *); int (*handle_tmr)(struct qla_tgt_mgmt_cmd *, uint32_t, uint16_t, uint32_t); void (*free_cmd)(struct qla_tgt_cmd *); @@ -684,6 +691,8 @@ struct qla_tgt_func_tmpl { void (*clear_nacl_from_fcport_map)(struct fc_port *); void (*put_sess)(struct fc_port *); void (*shutdown_sess)(struct fc_port *); + int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts); + int (*chk_dif_tags)(uint32_t tag); }; int qla2x00_wait_for_hba_online(struct scsi_qla_host *); @@ -720,8 +729,8 @@ int qla2x00_wait_for_hba_online(struct scsi_qla_host *); #define QLA_TGT_ABORT_ALL 0xFFFE #define QLA_TGT_NEXUS_LOSS_SESS 0xFFFD #define QLA_TGT_NEXUS_LOSS 0xFFFC -#define QLA_TGT_ABTS 0xFFFB -#define QLA_TGT_2G_ABORT_TASK 0xFFFA +#define QLA_TGT_ABTS 0xFFFB +#define QLA_TGT_2G_ABORT_TASK 0xFFFA /* Notify Acknowledge flags */ #define NOTIFY_ACK_RES_COUNT BIT_8 @@ -845,6 +854,7 @@ enum trace_flags { TRC_CMD_FREE = BIT_17, TRC_DATA_IN = BIT_18, TRC_ABORT = BIT_19, + TRC_DIF_ERR = BIT_20, }; struct qla_tgt_cmd { @@ -862,7 +872,6 @@ struct qla_tgt_cmd { unsigned int sg_mapped:1; unsigned int free_sg:1; unsigned int write_data_transferred:1; - unsigned int ctx_dsd_alloced:1; unsigned int q_full:1; unsigned int term_exchg:1; unsigned int cmd_sent_to_fw:1; @@ -885,11 +894,25 @@ struct qla_tgt_cmd { struct list_head cmd_list; struct atio_from_isp atio; - /* t10dif */ + + uint8_t ctx_dsd_alloced; + + /* T10-DIF */ +#define DIF_ERR_NONE 0 +#define DIF_ERR_GRD 1 +#define DIF_ERR_REF 2 +#define DIF_ERR_APP 3 + int8_t dif_err_code; struct scatterlist *prot_sg; uint32_t prot_sg_cnt; - uint32_t blk_sz; + uint32_t blk_sz, num_blks; + uint8_t scsi_status, sense_key, asc, ascq; + struct crc_context *ctx; + uint8_t *cdb; + uint64_t lba; + uint16_t a_guard, e_guard, a_app_tag, e_app_tag; + uint32_t a_ref_tag, e_ref_tag; uint64_t jiffies_at_alloc; uint64_t jiffies_at_free; @@ -1053,4 +1076,7 @@ extern int qlt_free_qfull_cmds(struct scsi_qla_host *); extern void qlt_logo_completion_handler(fc_port_t *, int); extern void qlt_do_generation_tick(struct scsi_qla_host *, int *); +void qlt_send_resp_ctio(scsi_qla_host_t *, struct qla_tgt_cmd *, uint8_t, + uint8_t, uint8_t, uint8_t); + #endif /* __QLA_TARGET_H */ diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 8e8ab0fa9672..7443e4efa3ae 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -531,6 +531,24 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work) return; } + switch (cmd->dif_err_code) { + case DIF_ERR_GRD: + cmd->se_cmd.pi_err = + TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED; + break; + case DIF_ERR_REF: + cmd->se_cmd.pi_err = + TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED; + break; + case DIF_ERR_APP: + cmd->se_cmd.pi_err = + TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED; + break; + case DIF_ERR_NONE: + default: + break; + } + if (cmd->se_cmd.pi_err) transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err); @@ -555,25 +573,23 @@ static void tcm_qla2xxx_handle_data(struct qla_tgt_cmd *cmd) queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work); } -static void tcm_qla2xxx_handle_dif_work(struct work_struct *work) +static int tcm_qla2xxx_chk_dif_tags(uint32_t tag) { - struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work); - - /* take an extra kref to prevent cmd free too early. - * need to wait for SCSI status/check condition to - * finish responding generate by transport_generic_request_failure. - */ - kref_get(&cmd->se_cmd.cmd_kref); - transport_generic_request_failure(&cmd->se_cmd, cmd->se_cmd.pi_err); + return 0; } -/* - * Called from qla_target.c:qlt_do_ctio_completion() - */ -static void tcm_qla2xxx_handle_dif_err(struct qla_tgt_cmd *cmd) +static int tcm_qla2xxx_dif_tags(struct qla_tgt_cmd *cmd, + uint16_t *pfw_prot_opts) { - INIT_WORK(&cmd->work, tcm_qla2xxx_handle_dif_work); - queue_work(tcm_qla2xxx_free_wq, &cmd->work); + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD)) + *pfw_prot_opts |= PO_DISABLE_GUARD_CHECK; + + if (!(se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG)) + *pfw_prot_opts |= PO_DIS_APP_TAG_VALD; + + return 0; } /* @@ -1610,7 +1626,6 @@ static void tcm_qla2xxx_update_sess(struct fc_port *sess, port_id_t s_id, static struct qla_tgt_func_tmpl tcm_qla2xxx_template = { .handle_cmd = tcm_qla2xxx_handle_cmd, .handle_data = tcm_qla2xxx_handle_data, - .handle_dif_err = tcm_qla2xxx_handle_dif_err, .handle_tmr = tcm_qla2xxx_handle_tmr, .free_cmd = tcm_qla2xxx_free_cmd, .free_mcmd = tcm_qla2xxx_free_mcmd, @@ -1622,6 +1637,8 @@ static struct qla_tgt_func_tmpl tcm_qla2xxx_template = { .clear_nacl_from_fcport_map = tcm_qla2xxx_clear_nacl_from_fcport_map, .put_sess = tcm_qla2xxx_put_sess, .shutdown_sess = tcm_qla2xxx_shutdown_sess, + .get_dif_tags = tcm_qla2xxx_dif_tags, + .chk_dif_tags = tcm_qla2xxx_chk_dif_tags, }; static int tcm_qla2xxx_init_lport(struct tcm_qla2xxx_lport *lport) From 54b9993c8cf2d77c0f23be828a22e0817f742442 Mon Sep 17 00:00:00 2001 From: Anil Gurumurthy Date: Wed, 15 Mar 2017 09:48:50 -0700 Subject: [PATCH 363/384] qla2xxx: Export DIF stats via debugfs Signed-off-by: Anil Gurumurthy Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 12 ++++++++++++ drivers/scsi/qla2xxx/qla_dfs.c | 15 +++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 9d1d3dcf1c87..8228dfac6a31 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3108,6 +3108,16 @@ struct qla_chip_state_84xx { uint32_t gold_fw_version; }; +struct qla_dif_statistics { + uint64_t dif_input_bytes; + uint64_t dif_output_bytes; + uint64_t dif_input_requests; + uint64_t dif_output_requests; + uint32_t dif_guard_err; + uint32_t dif_ref_tag_err; + uint32_t dif_app_tag_err; +}; + struct qla_statistics { uint32_t total_isp_aborts; uint64_t input_bytes; @@ -3120,6 +3130,8 @@ struct qla_statistics { uint32_t stat_max_pend_cmds; uint32_t stat_max_qfull_cmds_alloc; uint32_t stat_max_qfull_cmds_dropped; + + struct qla_dif_statistics qla_dif_stats; }; struct bidi_statistics { diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index b48cce696bac..3b35905619b0 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -114,6 +114,21 @@ qla_dfs_tgt_counters_show(struct seq_file *s, void *unused) seq_printf(s, "num Q full sent = %lld\n", vha->tgt_counters.num_q_full_sent); + /* DIF stats */ + seq_printf(s, "DIF Inp Bytes = %lld\n", + vha->qla_stats.qla_dif_stats.dif_input_bytes); + seq_printf(s, "DIF Outp Bytes = %lld\n", + vha->qla_stats.qla_dif_stats.dif_output_bytes); + seq_printf(s, "DIF Inp Req = %lld\n", + vha->qla_stats.qla_dif_stats.dif_input_requests); + seq_printf(s, "DIF Outp Req = %lld\n", + vha->qla_stats.qla_dif_stats.dif_output_requests); + seq_printf(s, "DIF Guard err = %d\n", + vha->qla_stats.qla_dif_stats.dif_guard_err); + seq_printf(s, "DIF Ref tag err = %d\n", + vha->qla_stats.qla_dif_stats.dif_ref_tag_err); + seq_printf(s, "DIF App tag err = %d\n", + vha->qla_stats.qla_dif_stats.dif_app_tag_err); return 0; } From f1443eebca7792b3b8b41b27652d67ddc5d31fa2 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:51 -0700 Subject: [PATCH 364/384] qla2xxx: Add async new target notification Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_target.c | 6 +++--- drivers/scsi/qla2xxx/qla_target.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 532004981dbd..563116188c43 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -6005,13 +6005,13 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha) tgt->datasegs_per_cmd = QLA_TGT_DATASEGS_PER_CMD_24XX; tgt->datasegs_per_cont = QLA_TGT_DATASEGS_PER_CONT_24XX; - if (base_vha->fc_vport) - return 0; - mutex_lock(&qla_tgt_mutex); list_add_tail(&tgt->tgt_list_entry, &qla_tgt_glist); mutex_unlock(&qla_tgt_mutex); + if (ha->tgt.tgt_ops && ha->tgt.tgt_ops->add_target) + ha->tgt.tgt_ops->add_target(base_vha); + return 0; } diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index c35f889b94a6..d64420251194 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -693,6 +693,7 @@ struct qla_tgt_func_tmpl { void (*shutdown_sess)(struct fc_port *); int (*get_dif_tags)(struct qla_tgt_cmd *cmd, uint16_t *pfw_prot_opts); int (*chk_dif_tags)(uint32_t tag); + void (*add_target)(struct scsi_qla_host *); }; int qla2x00_wait_for_hba_online(struct scsi_qla_host *); From 15f30a5752287f20c7de428423c34bc51cfbe465 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:52 -0700 Subject: [PATCH 365/384] qla2xxx: Use IOCB interface to submit non-critical MBX. The Mailbox interface is currently over subscribed. We like to reserve the Mailbox interface for the chip managment and link initialization. Any non essential Mailbox command will be routed through the IOCB interface. The IOCB interface is able to absorb more commands. Following commands are being routed through IOCB interface - Get ID List (007Ch) - Get Port DB (0064h) - Get Link Priv Stats (006Dh) Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 12 +- drivers/scsi/qla2xxx/qla_gbl.h | 10 +- drivers/scsi/qla2xxx/qla_init.c | 46 +---- drivers/scsi/qla2xxx/qla_isr.c | 2 +- drivers/scsi/qla2xxx/qla_mbx.c | 270 ++++++++++++++++++++++++++++-- drivers/scsi/qla2xxx/qla_target.c | 4 +- 6 files changed, 279 insertions(+), 65 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 8228dfac6a31..ae38b7a789b1 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -395,11 +395,15 @@ struct srb_iocb { struct completion comp; } abt; struct ct_arg ctarg; +#define MAX_IOCB_MB_REG 28 +#define SIZEOF_IOCB_MB_REG (MAX_IOCB_MB_REG * sizeof(uint16_t)) struct { - __le16 in_mb[28]; /* fr fw */ - __le16 out_mb[28]; /* to fw */ + __le16 in_mb[MAX_IOCB_MB_REG]; /* from FW */ + __le16 out_mb[MAX_IOCB_MB_REG]; /* to FW */ void *out, *in; dma_addr_t out_dma, in_dma; + struct completion comp; + int rc; } mbx; struct { struct imm_ntfy_from_isp *ntfy; @@ -437,7 +441,7 @@ typedef struct srb { uint32_t handle; uint16_t flags; uint16_t type; - char *name; + const char *name; int iocbs; struct qla_qpair *qpair; u32 gen1; /* scratch */ @@ -3364,6 +3368,8 @@ struct qla_hw_data { uint32_t exlogins_enabled:1; uint32_t exchoffld_enabled:1; /* 35 bits */ + + uint32_t fw_started:1; } flags; /* This spinlock is used to protect "io transactions", you must diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index ca6f122e5865..323b912b47f7 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -193,6 +193,7 @@ extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *); void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *, uint16_t *); int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *); +int qla24xx_async_abort_cmd(srb_t *); /* * Global Functions in qla_mid.c source file. @@ -368,7 +369,7 @@ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *, extern int qla24xx_get_isp_stats(scsi_qla_host_t *, struct link_statistics *, - dma_addr_t, uint); + dma_addr_t, uint16_t); extern int qla24xx_abort_command(srb_t *); extern int qla24xx_async_abort_command(srb_t *); @@ -472,6 +473,13 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *, dma_addr_t, uint32_t, uint32_t); extern int qla26xx_dport_diagnostics(scsi_qla_host_t *, void *, uint, uint); +int qla24xx_send_mb_cmd(struct scsi_qla_host *, mbx_cmd_t *); +int qla24xx_gpdb_wait(struct scsi_qla_host *, fc_port_t *, u8); +int qla24xx_gidlist_wait(struct scsi_qla_host *, void *, dma_addr_t, + uint16_t *); +int __qla24xx_parse_gpdb(struct scsi_qla_host *, fc_port_t *, + struct port_database_24xx *); + /* * Global Function Prototypes in qla_isr.c source file. */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index a7865a5d556d..b1bfa63f7d4e 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -629,7 +629,6 @@ void qla24xx_async_gpdb_sp_done(void *s, int res) struct srb *sp = s; struct scsi_qla_host *vha = sp->vha; struct qla_hw_data *ha = vha->hw; - uint64_t zero = 0; struct port_database_24xx *pd; fc_port_t *fcport = sp->fcport; u16 *mb = sp->u.iocb_cmd.u.mbx.in_mb; @@ -649,48 +648,7 @@ void qla24xx_async_gpdb_sp_done(void *s, int res) pd = (struct port_database_24xx *)sp->u.iocb_cmd.u.mbx.in; - /* Check for logged in state. */ - if (pd->current_login_state != PDS_PRLI_COMPLETE && - pd->last_login_state != PDS_PRLI_COMPLETE) { - ql_dbg(ql_dbg_mbx, vha, 0xffff, - "Unable to verify login-state (%x/%x) for " - "loop_id %x.\n", pd->current_login_state, - pd->last_login_state, fcport->loop_id); - rval = QLA_FUNCTION_FAILED; - goto gpd_error_out; - } - - if (fcport->loop_id == FC_NO_LOOP_ID || - (memcmp(fcport->port_name, (uint8_t *)&zero, 8) && - memcmp(fcport->port_name, pd->port_name, 8))) { - /* We lost the device mid way. */ - rval = QLA_NOT_LOGGED_IN; - goto gpd_error_out; - } - - /* Names are little-endian. */ - memcpy(fcport->node_name, pd->node_name, WWN_SIZE); - - /* Get port_id of device. */ - fcport->d_id.b.domain = pd->port_id[0]; - fcport->d_id.b.area = pd->port_id[1]; - fcport->d_id.b.al_pa = pd->port_id[2]; - fcport->d_id.b.rsvd_1 = 0; - - /* If not target must be initiator or unknown type. */ - if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0) - fcport->port_type = FCT_INITIATOR; - else - fcport->port_type = FCT_TARGET; - - /* Passback COS information. */ - fcport->supported_classes = (pd->flags & PDF_CLASS_2) ? - FC_COS_CLASS2 : FC_COS_CLASS3; - - if (pd->prli_svc_param_word_3[0] & BIT_7) { - fcport->flags |= FCF_CONF_COMP_SUPPORTED; - fcport->conf_compl_supported = 1; - } + rval = __qla24xx_parse_gpdb(vha, fcport, pd); gpd_error_out: memset(&ea, 0, sizeof(ea)); @@ -1266,7 +1224,7 @@ qla24xx_abort_sp_done(void *ptr, int res) complete(&abt->u.abt.comp); } -static int +int qla24xx_async_abort_cmd(srb_t *cmd_sp) { scsi_qla_host_t *vha = cmd_sp->vha; diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index b2c6da752edd..3953c8d6af69 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2692,7 +2692,7 @@ qla24xx_abort_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, return; abt = &sp->u.iocb_cmd; - abt->u.abt.comp_status = le32_to_cpu(pkt->nport_handle); + abt->u.abt.comp_status = le16_to_cpu(pkt->nport_handle); sp->done(sp, 0); } diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 35079f417417..e40ed570d3c1 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -10,6 +10,28 @@ #include #include +static struct mb_cmd_name { + uint16_t cmd; + const char *str; +} mb_str[] = { + {MBC_GET_PORT_DATABASE, "GPDB"}, + {MBC_GET_ID_LIST, "GIDList"}, + {MBC_GET_LINK_PRIV_STATS, "Stats"}, +}; + +static const char *mb_to_str(uint16_t cmd) +{ + int i; + struct mb_cmd_name *e; + + for (i = 0; i < ARRAY_SIZE(mb_str); i++) { + e = mb_str + i; + if (cmd == e->cmd) + return e->str; + } + return "unknown"; +} + static struct rom_cmd { uint16_t cmd; } rom_cmds[] = { @@ -2818,7 +2840,7 @@ qla2x00_get_link_status(scsi_qla_host_t *vha, uint16_t loop_id, int qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats, - dma_addr_t stats_dma, uint options) + dma_addr_t stats_dma, uint16_t options) { int rval; mbx_cmd_t mc; @@ -2828,19 +2850,17 @@ qla24xx_get_isp_stats(scsi_qla_host_t *vha, struct link_statistics *stats, ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1088, "Entered %s.\n", __func__); - mcp->mb[0] = MBC_GET_LINK_PRIV_STATS; - mcp->mb[2] = MSW(stats_dma); - mcp->mb[3] = LSW(stats_dma); - mcp->mb[6] = MSW(MSD(stats_dma)); - mcp->mb[7] = LSW(MSD(stats_dma)); - mcp->mb[8] = sizeof(struct link_statistics) / 4; - mcp->mb[9] = vha->vp_idx; - mcp->mb[10] = options; - mcp->out_mb = MBX_10|MBX_9|MBX_8|MBX_7|MBX_6|MBX_3|MBX_2|MBX_0; - mcp->in_mb = MBX_2|MBX_1|MBX_0; - mcp->tov = MBX_TOV_SECONDS; - mcp->flags = IOCTL_CMD; - rval = qla2x00_mailbox_command(vha, mcp); + memset(&mc, 0, sizeof(mc)); + mc.mb[0] = MBC_GET_LINK_PRIV_STATS; + mc.mb[2] = MSW(stats_dma); + mc.mb[3] = LSW(stats_dma); + mc.mb[6] = MSW(MSD(stats_dma)); + mc.mb[7] = LSW(MSD(stats_dma)); + mc.mb[8] = sizeof(struct link_statistics) / 4; + mc.mb[9] = cpu_to_le16(vha->vp_idx); + mc.mb[10] = cpu_to_le16(options); + + rval = qla24xx_send_mb_cmd(vha, &mc); if (rval == QLA_SUCCESS) { if (mcp->mb[0] != MBS_COMMAND_COMPLETE) { @@ -5827,3 +5847,225 @@ qla26xx_dport_diagnostics(scsi_qla_host_t *vha, return rval; } + +static void qla2x00_async_mb_sp_done(void *s, int res) +{ + struct srb *sp = s; + + sp->u.iocb_cmd.u.mbx.rc = res; + + complete(&sp->u.iocb_cmd.u.mbx.comp); + /* don't free sp here. Let the caller do the free */ +} + +/* + * This mailbox uses the iocb interface to send MB command. + * This allows non-critial (non chip setup) command to go + * out in parrallel. + */ +int qla24xx_send_mb_cmd(struct scsi_qla_host *vha, mbx_cmd_t *mcp) +{ + int rval = QLA_FUNCTION_FAILED; + srb_t *sp; + struct srb_iocb *c; + + if (!vha->hw->flags.fw_started) + goto done; + + sp = qla2x00_get_sp(vha, NULL, GFP_KERNEL); + if (!sp) + goto done; + + sp->type = SRB_MB_IOCB; + sp->name = mb_to_str(mcp->mb[0]); + + qla2x00_init_timer(sp, qla2x00_get_async_timeout(vha) + 2); + + memcpy(sp->u.iocb_cmd.u.mbx.out_mb, mcp->mb, SIZEOF_IOCB_MB_REG); + + c = &sp->u.iocb_cmd; + c->timeout = qla2x00_async_iocb_timeout; + init_completion(&c->u.mbx.comp); + + sp->done = qla2x00_async_mb_sp_done; + + rval = qla2x00_start_sp(sp); + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "%s: %s Failed submission. %x.\n", + __func__, sp->name, rval); + goto done_free_sp; + } + + ql_dbg(ql_dbg_mbx, vha, 0xffff, "MB:%s hndl %x submitted\n", + sp->name, sp->handle); + + wait_for_completion(&c->u.mbx.comp); + memcpy(mcp->mb, sp->u.iocb_cmd.u.mbx.in_mb, SIZEOF_IOCB_MB_REG); + + rval = c->u.mbx.rc; + switch (rval) { + case QLA_FUNCTION_TIMEOUT: + ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Timeout. %x.\n", + __func__, sp->name, rval); + break; + case QLA_SUCCESS: + ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s done.\n", + __func__, sp->name); + sp->free(sp); + break; + default: + ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %s Failed. %x.\n", + __func__, sp->name, rval); + sp->free(sp); + break; + } + + return rval; + +done_free_sp: + sp->free(sp); +done: + return rval; +} + +/* + * qla24xx_gpdb_wait + * NOTE: Do not call this routine from DPC thread + */ +int qla24xx_gpdb_wait(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) +{ + int rval = QLA_FUNCTION_FAILED; + dma_addr_t pd_dma; + struct port_database_24xx *pd; + struct qla_hw_data *ha = vha->hw; + mbx_cmd_t mc; + + if (!vha->hw->flags.fw_started) + goto done; + + pd = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &pd_dma); + if (pd == NULL) { + ql_log(ql_log_warn, vha, 0xffff, + "Failed to allocate port database structure.\n"); + goto done_free_sp; + } + memset(pd, 0, max(PORT_DATABASE_SIZE, PORT_DATABASE_24XX_SIZE)); + + memset(&mc, 0, sizeof(mc)); + mc.mb[0] = MBC_GET_PORT_DATABASE; + mc.mb[1] = cpu_to_le16(fcport->loop_id); + mc.mb[2] = MSW(pd_dma); + mc.mb[3] = LSW(pd_dma); + mc.mb[6] = MSW(MSD(pd_dma)); + mc.mb[7] = LSW(MSD(pd_dma)); + mc.mb[9] = cpu_to_le16(vha->vp_idx); + mc.mb[10] = cpu_to_le16((uint16_t)opt); + + rval = qla24xx_send_mb_cmd(vha, &mc); + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "%s: %8phC fail\n", __func__, fcport->port_name); + goto done_free_sp; + } + + rval = __qla24xx_parse_gpdb(vha, fcport, pd); + + ql_dbg(ql_dbg_mbx, vha, 0xffff, "%s: %8phC done\n", + __func__, fcport->port_name); + +done_free_sp: + if (pd) + dma_pool_free(ha->s_dma_pool, pd, pd_dma); +done: + return rval; +} + +int __qla24xx_parse_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, + struct port_database_24xx *pd) +{ + int rval = QLA_SUCCESS; + uint64_t zero = 0; + + /* Check for logged in state. */ + if (pd->current_login_state != PDS_PRLI_COMPLETE && + pd->last_login_state != PDS_PRLI_COMPLETE) { + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "Unable to verify login-state (%x/%x) for " + "loop_id %x.\n", pd->current_login_state, + pd->last_login_state, fcport->loop_id); + rval = QLA_FUNCTION_FAILED; + goto gpd_error_out; + } + + if (fcport->loop_id == FC_NO_LOOP_ID || + (memcmp(fcport->port_name, (uint8_t *)&zero, 8) && + memcmp(fcport->port_name, pd->port_name, 8))) { + /* We lost the device mid way. */ + rval = QLA_NOT_LOGGED_IN; + goto gpd_error_out; + } + + /* Names are little-endian. */ + memcpy(fcport->node_name, pd->node_name, WWN_SIZE); + memcpy(fcport->port_name, pd->port_name, WWN_SIZE); + + /* Get port_id of device. */ + fcport->d_id.b.domain = pd->port_id[0]; + fcport->d_id.b.area = pd->port_id[1]; + fcport->d_id.b.al_pa = pd->port_id[2]; + fcport->d_id.b.rsvd_1 = 0; + + /* If not target must be initiator or unknown type. */ + if ((pd->prli_svc_param_word_3[0] & BIT_4) == 0) + fcport->port_type = FCT_INITIATOR; + else + fcport->port_type = FCT_TARGET; + + /* Passback COS information. */ + fcport->supported_classes = (pd->flags & PDF_CLASS_2) ? + FC_COS_CLASS2 : FC_COS_CLASS3; + + if (pd->prli_svc_param_word_3[0] & BIT_7) { + fcport->flags |= FCF_CONF_COMP_SUPPORTED; + fcport->conf_compl_supported = 1; + } + +gpd_error_out: + return rval; +} + +/* + * qla24xx_gidlist__wait + * NOTE: don't call this routine from DPC thread. + */ +int qla24xx_gidlist_wait(struct scsi_qla_host *vha, + void *id_list, dma_addr_t id_list_dma, uint16_t *entries) +{ + int rval = QLA_FUNCTION_FAILED; + mbx_cmd_t mc; + + if (!vha->hw->flags.fw_started) + goto done; + + memset(&mc, 0, sizeof(mc)); + mc.mb[0] = MBC_GET_ID_LIST; + mc.mb[2] = MSW(id_list_dma); + mc.mb[3] = LSW(id_list_dma); + mc.mb[6] = MSW(MSD(id_list_dma)); + mc.mb[7] = LSW(MSD(id_list_dma)); + mc.mb[8] = 0; + mc.mb[9] = cpu_to_le16(vha->vp_idx); + + rval = qla24xx_send_mb_cmd(vha, &mc); + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "%s: fail\n", __func__); + } else { + *entries = mc.mb[1]; + ql_dbg(ql_dbg_mbx, vha, 0xffff, + "%s: done\n", __func__); + } +done: + return rval; +} diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 563116188c43..38bdcd325fa4 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1238,7 +1238,7 @@ static int qla24xx_get_loop_id(struct scsi_qla_host *vha, const uint8_t *s_id, } /* Get list of logged in devices */ - rc = qla2x00_get_id_list(vha, gid_list, gid_list_dma, &entries); + rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, &entries); if (rc != QLA_SUCCESS) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf045, "qla_target(%d): get_id_list() failed: %x\n", @@ -5648,7 +5648,7 @@ static fc_port_t *qlt_get_port_database(struct scsi_qla_host *vha, fcport->loop_id = loop_id; - rc = qla2x00_get_port_database(vha, fcport, 0); + rc = qla24xx_gpdb_wait(vha, fcport, 0); if (rc != QLA_SUCCESS) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf070, "qla_target(%d): Failed to retrieve fcport " From c423437e3ff41b8ca551ab6621baf11538dbfe9d Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Wed, 15 Mar 2017 09:48:53 -0700 Subject: [PATCH 366/384] qla2xxx: Add DebugFS node to display Port Database Signed-off-by: Himanshu Madhani Signed-off-by: Giridhar Malavali Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 2 + drivers/scsi/qla2xxx/qla_dfs.c | 92 ++++++++++++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index ae38b7a789b1..089480280558 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3300,6 +3300,8 @@ struct qlt_hw_data { uint8_t tgt_node_name[WWN_SIZE]; struct dentry *dfs_tgt_sess; + struct dentry *dfs_tgt_port_database; + struct list_head q_full_list; uint32_t num_pend_cmds; uint32_t num_qfull_cmds_alloc; diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index 3b35905619b0..989e17b0758c 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -19,11 +19,11 @@ qla2x00_dfs_tgt_sess_show(struct seq_file *s, void *unused) struct qla_hw_data *ha = vha->hw; unsigned long flags; struct fc_port *sess = NULL; - struct qla_tgt *tgt= vha->vha_tgt.qla_tgt; + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; - seq_printf(s, "%s\n",vha->host_str); + seq_printf(s, "%s\n", vha->host_str); if (tgt) { - seq_printf(s, "Port ID Port Name Handle\n"); + seq_puts(s, "Port ID Port Name Handle\n"); spin_lock_irqsave(&ha->tgt.sess_lock, flags); list_for_each_entry(sess, &vha->vp_fcports, list) @@ -44,7 +44,6 @@ qla2x00_dfs_tgt_sess_open(struct inode *inode, struct file *file) return single_open(file, qla2x00_dfs_tgt_sess_show, vha); } - static const struct file_operations dfs_tgt_sess_ops = { .open = qla2x00_dfs_tgt_sess_open, .read = seq_read, @@ -52,6 +51,78 @@ static const struct file_operations dfs_tgt_sess_ops = { .release = single_release, }; +static int +qla2x00_dfs_tgt_port_database_show(struct seq_file *s, void *unused) +{ + scsi_qla_host_t *vha = s->private; + struct qla_hw_data *ha = vha->hw; + struct gid_list_info *gid_list; + dma_addr_t gid_list_dma; + fc_port_t fc_port; + char *id_iter; + int rc, i; + uint16_t entries, loop_id; + struct qla_tgt *tgt = vha->vha_tgt.qla_tgt; + + seq_printf(s, "%s\n", vha->host_str); + if (tgt) { + gid_list = dma_alloc_coherent(&ha->pdev->dev, + qla2x00_gid_list_size(ha), + &gid_list_dma, GFP_KERNEL); + if (!gid_list) { + ql_dbg(ql_dbg_user, vha, 0x705c, + "DMA allocation failed for %u\n", + qla2x00_gid_list_size(ha)); + return 0; + } + + rc = qla24xx_gidlist_wait(vha, gid_list, gid_list_dma, + &entries); + if (rc != QLA_SUCCESS) + goto out_free_id_list; + + id_iter = (char *)gid_list; + + seq_puts(s, "Port Name Port ID Loop ID\n"); + + for (i = 0; i < entries; i++) { + struct gid_list_info *gid = + (struct gid_list_info *)id_iter; + loop_id = le16_to_cpu(gid->loop_id); + memset(&fc_port, 0, sizeof(fc_port_t)); + + fc_port.loop_id = loop_id; + + rc = qla24xx_gpdb_wait(vha, &fc_port, 0); + seq_printf(s, "%8phC %02x%02x%02x %d\n", + fc_port.port_name, fc_port.d_id.b.domain, + fc_port.d_id.b.area, fc_port.d_id.b.al_pa, + fc_port.loop_id); + id_iter += ha->gid_list_info_size; + } +out_free_id_list: + dma_free_coherent(&ha->pdev->dev, qla2x00_gid_list_size(ha), + gid_list, gid_list_dma); + } + + return 0; +} + +static int +qla2x00_dfs_tgt_port_database_open(struct inode *inode, struct file *file) +{ + scsi_qla_host_t *vha = inode->i_private; + + return single_open(file, qla2x00_dfs_tgt_port_database_show, vha); +} + +static const struct file_operations dfs_tgt_port_database_ops = { + .open = qla2x00_dfs_tgt_port_database_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int qla_dfs_fw_resource_cnt_show(struct seq_file *s, void *unused) { @@ -296,6 +367,14 @@ qla2x00_dfs_setup(scsi_qla_host_t *vha) goto out; } + ha->tgt.dfs_tgt_port_database = debugfs_create_file("tgt_port_database", + S_IRUSR, ha->dfs_dir, vha, &dfs_tgt_port_database_ops); + if (!ha->tgt.dfs_tgt_port_database) { + ql_log(ql_log_warn, vha, 0xffff, + "Unable to create debugFS tgt_port_database node.\n"); + goto out; + } + ha->dfs_fce = debugfs_create_file("fce", S_IRUSR, ha->dfs_dir, vha, &dfs_fce_ops); if (!ha->dfs_fce) { @@ -326,6 +405,11 @@ qla2x00_dfs_remove(scsi_qla_host_t *vha) ha->tgt.dfs_tgt_sess = NULL; } + if (ha->tgt.dfs_tgt_port_database) { + debugfs_remove(ha->tgt.dfs_tgt_port_database); + ha->tgt.dfs_tgt_port_database = NULL; + } + if (ha->dfs_fw_resource_cnt) { debugfs_remove(ha->dfs_fw_resource_cnt); ha->dfs_fw_resource_cnt = NULL; From 482c9dc79204bb83c3433a59680c787a0b98c000 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:54 -0700 Subject: [PATCH 367/384] qla2xxx: Change scsi host lookup method. For target mode, when new scsi command arrive, driver first performs a look up of the SCSI Host. The current look up method is based on the ALPA portion of the NPort ID. For Cisco switch, the ALPA can not be used as the index. Instead, the new search method is based on the full value of the Nport_ID via btree lib. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/Kconfig | 1 + drivers/scsi/qla2xxx/qla_def.h | 2 + drivers/scsi/qla2xxx/qla_gbl.h | 2 + drivers/scsi/qla2xxx/qla_init.c | 14 ++--- drivers/scsi/qla2xxx/qla_mbx.c | 28 +++------- drivers/scsi/qla2xxx/qla_os.c | 1 + drivers/scsi/qla2xxx/qla_target.c | 92 ++++++++++++++++++++++++++----- 7 files changed, 100 insertions(+), 40 deletions(-) diff --git a/drivers/scsi/qla2xxx/Kconfig b/drivers/scsi/qla2xxx/Kconfig index 67c0d5aa3212..de952935b5d2 100644 --- a/drivers/scsi/qla2xxx/Kconfig +++ b/drivers/scsi/qla2xxx/Kconfig @@ -3,6 +3,7 @@ config SCSI_QLA_FC depends on PCI && SCSI depends on SCSI_FC_ATTRS select FW_LOADER + select BTREE ---help--- This qla2xxx driver supports all QLogic Fibre Channel PCI and PCIe host adapters. diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 089480280558..9251918773b1 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -3311,6 +3312,7 @@ struct qlt_hw_data { spinlock_t sess_lock; int rspq_vector_cpuid; spinlock_t atio_lock ____cacheline_aligned; + struct btree_head32 host_map; }; #define MAX_QFULL_CMDS_ALLOC 8192 diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 323b912b47f7..5b2451745e9f 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -854,5 +854,7 @@ extern struct fc_port *qlt_find_sess_invalidate_other(scsi_qla_host_t *, uint64_t wwn, port_id_t port_id, uint16_t loop_id, struct fc_port **); void qla24xx_delete_sess_fn(struct work_struct *); void qlt_unknown_atio_work_fn(struct work_struct *); +void qlt_update_host_map(struct scsi_qla_host *, port_id_t); +void qlt_remove_target_resources(struct qla_hw_data *); #endif /* _QLA_GBL_H */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index b1bfa63f7d4e..b0f6ad3020d3 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3340,8 +3340,8 @@ qla2x00_configure_hba(scsi_qla_host_t *vha) uint8_t domain; char connect_type[22]; struct qla_hw_data *ha = vha->hw; - unsigned long flags; scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev); + port_id_t id; /* Get host addresses. */ rval = qla2x00_get_adapter_id(vha, @@ -3419,13 +3419,11 @@ qla2x00_configure_hba(scsi_qla_host_t *vha) /* Save Host port and loop ID. */ /* byte order - Big Endian */ - vha->d_id.b.domain = domain; - vha->d_id.b.area = area; - vha->d_id.b.al_pa = al_pa; - - spin_lock_irqsave(&ha->vport_slock, flags); - qlt_update_vp_map(vha, SET_AL_PA); - spin_unlock_irqrestore(&ha->vport_slock, flags); + id.b.domain = domain; + id.b.area = area; + id.b.al_pa = al_pa; + id.b.rsvd_1 = 0; + qlt_update_host_map(vha, id); if (!vha->flags.init_done) ql_log(ql_log_info, vha, 0x2010, diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index e40ed570d3c1..53d9579acc74 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3623,6 +3623,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, scsi_qla_host_t *vp = NULL; unsigned long flags; int found; + port_id_t id; ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6, "Entered %s.\n", __func__); @@ -3630,6 +3631,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, if (rptid_entry->entry_status != 0) return; + id.b.domain = rptid_entry->port_id[2]; + id.b.area = rptid_entry->port_id[1]; + id.b.al_pa = rptid_entry->port_id[0]; + id.b.rsvd_1 = 0; + if (rptid_entry->format == 0) { /* loop */ ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7, @@ -3641,13 +3647,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, rptid_entry->port_id[2], rptid_entry->port_id[1], rptid_entry->port_id[0]); - vha->d_id.b.domain = rptid_entry->port_id[2]; - vha->d_id.b.area = rptid_entry->port_id[1]; - vha->d_id.b.al_pa = rptid_entry->port_id[0]; - - spin_lock_irqsave(&ha->vport_slock, flags); - qlt_update_vp_map(vha, SET_AL_PA); - spin_unlock_irqrestore(&ha->vport_slock, flags); + qlt_update_host_map(vha, id); } else if (rptid_entry->format == 1) { /* fabric */ @@ -3673,12 +3673,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, WWN_SIZE); } - vha->d_id.b.domain = rptid_entry->port_id[2]; - vha->d_id.b.area = rptid_entry->port_id[1]; - vha->d_id.b.al_pa = rptid_entry->port_id[0]; - spin_lock_irqsave(&ha->vport_slock, flags); - qlt_update_vp_map(vha, SET_AL_PA); - spin_unlock_irqrestore(&ha->vport_slock, flags); + qlt_update_host_map(vha, id); } fc_host_port_name(vha->host) = @@ -3714,12 +3709,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, if (!found) return; - vp->d_id.b.domain = rptid_entry->port_id[2]; - vp->d_id.b.area = rptid_entry->port_id[1]; - vp->d_id.b.al_pa = rptid_entry->port_id[0]; - spin_lock_irqsave(&ha->vport_slock, flags); - qlt_update_vp_map(vp, SET_AL_PA); - spin_unlock_irqrestore(&ha->vport_slock, flags); + qlt_update_host_map(vp, id); /* * Cannot configure here as we are still sitting on the diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 54d4e802bde0..344faf59783d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3469,6 +3469,7 @@ qla2x00_remove_one(struct pci_dev *pdev) qla2x00_free_sysfs_attr(base_vha, true); fc_remove_host(base_vha->host); + qlt_remove_target_resources(ha); scsi_remove_host(base_vha->host); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 38bdcd325fa4..7278e046bf87 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -187,21 +187,23 @@ static inline struct scsi_qla_host *qlt_find_host_by_d_id(struct scsi_qla_host *vha, uint8_t *d_id) { - struct qla_hw_data *ha = vha->hw; - uint8_t vp_idx; + struct scsi_qla_host *host; + uint32_t key = 0; - if ((vha->d_id.b.area != d_id[1]) || (vha->d_id.b.domain != d_id[0])) - return NULL; - - if (vha->d_id.b.al_pa == d_id[2]) + if ((vha->d_id.b.area == d_id[1]) && (vha->d_id.b.domain == d_id[0]) && + (vha->d_id.b.al_pa == d_id[2])) return vha; - BUG_ON(ha->tgt.tgt_vp_map == NULL); - vp_idx = ha->tgt.tgt_vp_map[d_id[2]].idx; - if (likely(test_bit(vp_idx, ha->vp_idx_map))) - return ha->tgt.tgt_vp_map[vp_idx].vha; + key = (uint32_t)d_id[0] << 16; + key |= (uint32_t)d_id[1] << 8; + key |= (uint32_t)d_id[2]; - return NULL; + host = btree_lookup32(&vha->hw->tgt.host_map, key); + if (!host) + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "Unable to find host %06x\n", key); + + return host; } static inline @@ -6040,6 +6042,17 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha) return 0; } +void qlt_remove_target_resources(struct qla_hw_data *ha) +{ + struct scsi_qla_host *node; + u32 key = 0; + + btree_for_each_safe32(&ha->tgt.host_map, key, node) + btree_remove32(&ha->tgt.host_map, key); + + btree_destroy32(&ha->tgt.host_map); +} + static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn, unsigned char *b) { @@ -6693,6 +6706,8 @@ qlt_modify_vp_config(struct scsi_qla_host *vha, void qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) { + int rc; + if (!QLA_TGT_MODE_ENABLED()) return; @@ -6712,6 +6727,13 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) qlt_unknown_atio_work_fn); qlt_clear_mode(base_vha); + + rc = btree_init32(&ha->tgt.host_map); + if (rc) + ql_log(ql_log_info, base_vha, 0xffff, + "Unable to initialize ha->host_map btree\n"); + + qlt_update_vp_map(base_vha, SET_VP_IDX); } irqreturn_t @@ -6820,25 +6842,69 @@ qlt_mem_free(struct qla_hw_data *ha) void qlt_update_vp_map(struct scsi_qla_host *vha, int cmd) { + void *slot; + u32 key; + int rc; + if (!QLA_TGT_MODE_ENABLED()) return; + key = vha->d_id.b24; + switch (cmd) { case SET_VP_IDX: vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = vha; break; case SET_AL_PA: - vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = vha->vp_idx; + slot = btree_lookup32(&vha->hw->tgt.host_map, key); + if (!slot) { + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "Save vha in host_map %p %06x\n", vha, key); + rc = btree_insert32(&vha->hw->tgt.host_map, + key, vha, GFP_ATOMIC); + if (rc) + ql_log(ql_log_info, vha, 0xffff, + "Unable to insert s_id into host_map: %06x\n", + key); + return; + } + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "replace existing vha in host_map %p %06x\n", vha, key); + btree_update32(&vha->hw->tgt.host_map, key, vha); break; case RESET_VP_IDX: vha->hw->tgt.tgt_vp_map[vha->vp_idx].vha = NULL; break; case RESET_AL_PA: - vha->hw->tgt.tgt_vp_map[vha->d_id.b.al_pa].idx = 0; + ql_dbg(ql_dbg_tgt_mgt, vha, 0xffff, + "clear vha in host_map %p %06x\n", vha, key); + slot = btree_lookup32(&vha->hw->tgt.host_map, key); + if (slot) + btree_remove32(&vha->hw->tgt.host_map, key); + vha->d_id.b24 = 0; break; } } +void qlt_update_host_map(struct scsi_qla_host *vha, port_id_t id) +{ + unsigned long flags; + struct qla_hw_data *ha = vha->hw; + + if (!vha->d_id.b24) { + spin_lock_irqsave(&ha->vport_slock, flags); + vha->d_id = id; + qlt_update_vp_map(vha, SET_AL_PA); + spin_unlock_irqrestore(&ha->vport_slock, flags); + } else if (vha->d_id.b24 != id.b24) { + spin_lock_irqsave(&ha->vport_slock, flags); + qlt_update_vp_map(vha, RESET_AL_PA); + vha->d_id = id; + qlt_update_vp_map(vha, SET_AL_PA); + spin_unlock_irqrestore(&ha->vport_slock, flags); + } +} + static int __init qlt_parse_ini_mode(void) { if (strcasecmp(qlini_mode, QLA2XXX_INI_MODE_STR_EXCLUSIVE) == 0) From ec7193e26055112bc824929fd943035f9a30b06f Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 15 Mar 2017 09:48:55 -0700 Subject: [PATCH 368/384] qla2xxx: Fix delayed response to command for loop mode/direct connect. Current driver wait for FW to be in the ready state before processing in-coming commands. For Arbitrated Loop or Point-to- Point (not switch), FW Ready state can take a while. FW will transition to ready state after all Nports have been logged in. In the mean time, certain initiators have completed the login and starts IO. Driver needs to start processing all queues if FW is already started. Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_def.h | 10 ++++++-- drivers/scsi/qla2xxx/qla_init.c | 12 ++++++++++ drivers/scsi/qla2xxx/qla_isr.c | 14 +++++++++++- drivers/scsi/qla2xxx/qla_mbx.c | 6 ++--- drivers/scsi/qla2xxx/qla_os.c | 21 ++++++++++++++++- drivers/scsi/qla2xxx/qla_target.c | 38 ++++++++++++++++++++----------- 6 files changed, 81 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 9251918773b1..ae119018dfaa 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3322,6 +3322,10 @@ struct qlt_hw_data { #define LEAK_EXCHG_THRESH_HOLD_PERCENT 75 /* 75 percent */ +#define QLA_EARLY_LINKUP(_ha) \ + ((_ha->flags.n2n_ae || _ha->flags.lip_ae) && \ + _ha->flags.fw_started && !_ha->flags.fw_init_done) + /* * Qlogic host adapter specific data structure. */ @@ -3371,9 +3375,11 @@ struct qla_hw_data { uint32_t fawwpn_enabled:1; uint32_t exlogins_enabled:1; uint32_t exchoffld_enabled:1; - /* 35 bits */ + uint32_t lip_ae:1; + uint32_t n2n_ae:1; uint32_t fw_started:1; + uint32_t fw_init_done:1; } flags; /* This spinlock is used to protect "io transactions", you must @@ -3466,7 +3472,6 @@ struct qla_hw_data { #define P2P_LOOP 3 uint8_t interrupts_on; uint32_t isp_abort_cnt; - #define PCI_DEVICE_ID_QLOGIC_ISP2532 0x2532 #define PCI_DEVICE_ID_QLOGIC_ISP8432 0x8432 #define PCI_DEVICE_ID_QLOGIC_ISP8001 0x8001 @@ -3947,6 +3952,7 @@ typedef struct scsi_qla_host { struct list_head vp_fcports; /* list of fcports */ struct list_head work_list; spinlock_t work_lock; + struct work_struct iocb_work; /* Commonly used flags and state information. */ struct Scsi_Host *host; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index b0f6ad3020d3..f9d2fe7b1ade 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -3178,6 +3178,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) } else { ql_dbg(ql_dbg_init, vha, 0x00d3, "Init Firmware -- success.\n"); + ha->flags.fw_started = 1; } return (rval); @@ -4000,6 +4001,7 @@ qla2x00_configure_loop(scsi_qla_host_t *vha) atomic_set(&vha->loop_state, LOOP_READY); ql_dbg(ql_dbg_disc, vha, 0x2069, "LOOP READY.\n"); + ha->flags.fw_init_done = 1; /* * Process any ATIO queue entries that came in @@ -5491,6 +5493,11 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha) if (!(IS_P3P_TYPE(ha))) ha->isp_ops->reset_chip(vha); + ha->flags.n2n_ae = 0; + ha->flags.lip_ae = 0; + ha->current_topology = 0; + ha->flags.fw_started = 0; + ha->flags.fw_init_done = 0; ha->chip_reset++; atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); @@ -6767,6 +6774,8 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha) return; if (!ha->fw_major_version) return; + if (!ha->flags.fw_started) + return; ret = qla2x00_stop_firmware(vha); for (retries = 5; ret != QLA_SUCCESS && ret != QLA_FUNCTION_TIMEOUT && @@ -6780,6 +6789,9 @@ qla2x00_try_to_stop_firmware(scsi_qla_host_t *vha) "Attempting retry of stop-firmware command.\n"); ret = qla2x00_stop_firmware(vha); } + + ha->flags.fw_started = 0; + ha->flags.fw_init_done = 0; } int diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 3953c8d6af69..3203367a4f42 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -708,6 +708,8 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) "mbx7=%xh.\n", mb[1], mb[2], mb[3], mbx); ha->isp_ops->fw_dump(vha, 1); + ha->flags.fw_init_done = 0; + ha->flags.fw_started = 0; if (IS_FWI2_CAPABLE(ha)) { if (mb[1] == 0 && mb[2] == 0) { @@ -761,6 +763,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) break; case MBA_LIP_OCCURRED: /* Loop Initialization Procedure */ + ha->flags.lip_ae = 1; + ha->flags.n2n_ae = 0; + ql_dbg(ql_dbg_async, vha, 0x5009, "LIP occurred (%x).\n", mb[1]); @@ -797,6 +802,10 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) break; case MBA_LOOP_DOWN: /* Loop Down Event */ + ha->flags.n2n_ae = 0; + ha->flags.lip_ae = 0; + ha->current_topology = 0; + mbx = (IS_QLA81XX(ha) || IS_QLA8031(ha)) ? RD_REG_WORD(®24->mailbox4) : 0; mbx = (IS_P3P_TYPE(ha)) ? RD_REG_WORD(®82->mailbox_out[4]) @@ -866,6 +875,9 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) /* case MBA_DCBX_COMPLETE: */ case MBA_POINT_TO_POINT: /* Point-to-Point */ + ha->flags.lip_ae = 0; + ha->flags.n2n_ae = 1; + if (IS_QLA2100(ha)) break; @@ -2706,7 +2718,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha, struct sts_entry_24xx *pkt; struct qla_hw_data *ha = vha->hw; - if (!vha->flags.online) + if (!ha->flags.fw_started) return; while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) { diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 53d9579acc74..a113ab3592a7 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -3638,11 +3638,11 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, if (rptid_entry->format == 0) { /* loop */ - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b7, + ql_dbg(ql_dbg_async, vha, 0x10b7, "Format 0 : Number of VPs setup %d, number of " "VPs acquired %d.\n", rptid_entry->vp_setup, rptid_entry->vp_acquired); - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b8, + ql_dbg(ql_dbg_async, vha, 0x10b8, "Primary port id %02x%02x%02x.\n", rptid_entry->port_id[2], rptid_entry->port_id[1], rptid_entry->port_id[0]); @@ -3651,7 +3651,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, } else if (rptid_entry->format == 1) { /* fabric */ - ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b9, + ql_dbg(ql_dbg_async, vha, 0x10b9, "Format 1: VP[%d] enabled - status %d - with " "port id %02x%02x%02x.\n", rptid_entry->vp_idx, rptid_entry->vp_status, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 344faf59783d..41d5b09f7326 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -2560,6 +2560,20 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time) return atomic_read(&vha->loop_state) == LOOP_READY; } +static void qla2x00_iocb_work_fn(struct work_struct *work) +{ + struct scsi_qla_host *vha = container_of(work, + struct scsi_qla_host, iocb_work); + int cnt = 0; + + while (!list_empty(&vha->work_list)) { + qla2x00_do_work(vha); + cnt++; + if (cnt > 10) + break; + } +} + /* * PCI driver interface */ @@ -3078,6 +3092,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ qla2xxx_wake_dpc(base_vha); + INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn); INIT_WORK(&ha->board_disable, qla2x00_disable_board_on_pci_error); if (IS_QLA8031(ha) || IS_MCTP_CAPABLE(ha)) { @@ -4321,7 +4336,11 @@ qla2x00_post_work(struct scsi_qla_host *vha, struct qla_work_evt *e) spin_lock_irqsave(&vha->work_lock, flags); list_add_tail(&e->list, &vha->work_list); spin_unlock_irqrestore(&vha->work_lock, flags); - qla2xxx_wake_dpc(vha); + + if (QLA_EARLY_LINKUP(vha->hw)) + schedule_work(&vha->iocb_work); + else + qla2xxx_wake_dpc(vha); return QLA_SUCCESS; } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 7278e046bf87..0e03ca2ab3e5 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -638,6 +638,7 @@ int qla24xx_async_notify_ack(scsi_qla_host_t *vha, fc_port_t *fcport, break; case SRB_NACK_PRLI: fcport->fw_login_state = DSC_LS_PRLI_PEND; + fcport->deleted = 0; c = "PRLI"; break; case SRB_NACK_LOGO: @@ -1576,6 +1577,9 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha, request_t *pkt; struct nack_to_isp *nack; + if (!ha->flags.fw_started) + return; + ql_dbg(ql_dbg_tgt, vha, 0xe004, "Sending NOTIFY_ACK (ha=%p)\n", ha); /* Send marker if required */ @@ -3053,7 +3057,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, else vha->tgt_counters.core_qla_que_buf++; - if (!vha->flags.online || cmd->reset_count != ha->chip_reset) { + if (!ha->flags.fw_started || cmd->reset_count != ha->chip_reset) { /* * Either the port is not online or this request was from * previous life, just abort the processing. @@ -3194,7 +3198,7 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) spin_lock_irqsave(&ha->hardware_lock, flags); - if (!vha->flags.online || (cmd->reset_count != ha->chip_reset) || + if (!ha->flags.fw_started || (cmd->reset_count != ha->chip_reset) || (cmd->sess && cmd->sess->deleted)) { /* * Either the port is not online or this request was from @@ -3372,7 +3376,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha, ql_dbg(ql_dbg_tgt_tmr, vha, 0xe01c, "Sending TERM ELS CTIO (ha=%p)\n", ha); - pkt = (request_t *)qla2x00_alloc_iocbs_ready(vha, NULL); + pkt = (request_t *)qla2x00_alloc_iocbs(vha, NULL); if (pkt == NULL) { ql_dbg(ql_dbg_tgt, vha, 0xe080, "qla_target(%d): %s failed: unable to allocate " @@ -4697,7 +4701,8 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, } if (sess != NULL) { - if (sess->fw_login_state == DSC_LS_PLOGI_PEND) { + if (sess->fw_login_state != DSC_LS_PLOGI_PEND && + sess->fw_login_state != DSC_LS_PLOGI_COMP) { /* * Impatient initiator sent PRLI before last * PLOGI could finish. Will force him to re-try, @@ -4736,15 +4741,23 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, /* Make session global (not used in fabric mode) */ if (ha->current_topology != ISP_CFG_F) { - set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); - set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); - qla2xxx_wake_dpc(vha); + if (sess) { + ql_dbg(ql_dbg_disc, vha, 0xffff, + "%s %d %8phC post nack\n", + __func__, __LINE__, sess->port_name); + qla24xx_post_nack_work(vha, sess, iocb, + SRB_NACK_PRLI); + res = 0; + } else { + set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); + set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } } else { if (sess) { ql_dbg(ql_dbg_disc, vha, 0xffff, - "%s %d %8phC post nack\n", - __func__, __LINE__, sess->port_name); - + "%s %d %8phC post nack\n", + __func__, __LINE__, sess->port_name); qla24xx_post_nack_work(vha, sess, iocb, SRB_NACK_PRLI); res = 0; @@ -4752,7 +4765,6 @@ static int qlt_24xx_handle_els(struct scsi_qla_host *vha, } break; - case ELS_TPRLO: if (le16_to_cpu(iocb->u.isp24.flags) & NOTIFY24XX_FLAGS_GLOBAL_TPRLO) { @@ -5222,7 +5234,7 @@ static void qlt_24xx_atio_pkt(struct scsi_qla_host *vha, unsigned long flags; if (unlikely(tgt == NULL)) { - ql_dbg(ql_dbg_io, vha, 0x3064, + ql_dbg(ql_dbg_tgt, vha, 0x3064, "ATIO pkt, but no tgt (ha %p)", ha); return; } @@ -6359,7 +6371,7 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha, uint8_t ha_locked) struct atio_from_isp *pkt; int cnt, i; - if (!vha->flags.online) + if (!ha->flags.fw_started) return; while ((ha->tgt.atio_ring_ptr->signature != ATIO_PROCESSED) || From 6c611d18f386d37cce3afbd921568e2a895bd86e Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Wed, 15 Mar 2017 09:48:56 -0700 Subject: [PATCH 369/384] qla2xxx: Update driver version to 9.00.00.00-k Signed-off-by: Himanshu Madhani signed-off-by: Giridhar Malavali Signed-off-by: Nicholas Bellinger --- drivers/scsi/qla2xxx/qla_version.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 3cb1964b7786..45bc84e8e3bf 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -7,9 +7,9 @@ /* * Driver version */ -#define QLA2XXX_VERSION "8.07.00.38-k" +#define QLA2XXX_VERSION "9.00.00.00-k" -#define QLA_DRIVER_MAJOR_VER 8 -#define QLA_DRIVER_MINOR_VER 7 +#define QLA_DRIVER_MAJOR_VER 9 +#define QLA_DRIVER_MINOR_VER 0 #define QLA_DRIVER_PATCH_VER 0 #define QLA_DRIVER_BETA_VER 0 From 452b94b8c8c7eb7dd0d0fa9a9776e0d02cd73b97 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Mar 2017 19:00:47 -0700 Subject: [PATCH 370/384] mm/swap: don't BUG_ON() due to uninitialized swap slot cache This BUG_ON() triggered for me once at shutdown, and I don't see a reason for the check. The code correctly checks whether the swap slot cache is usable or not, so an uninitialized swap slot cache is not actually problematic afaik. I've temporarily just switched the BUG_ON() to a WARN_ON_ONCE(), since I'm not sure why that seemingly pointless check was there. I suspect the real fix is to just remove it entirely, but for now we'll warn about it but not bring the machine down. Cc: "Huang, Ying" Cc: Tim Chen Cc: Michal Hocko Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_slots.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 9b5bc86f96ad..7ebb23836f68 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -267,7 +267,7 @@ int free_swap_slot(swp_entry_t entry) { struct swap_slots_cache *cache; - BUG_ON(!swap_slot_cache_initialized); + WARN_ON_ONCE(!swap_slot_cache_initialized); cache = &get_cpu_var(swp_slots); if (use_swap_slot_cache && cache->slots_ret) { From 97da3854c526d3a6ee05c849c96e48d21527606c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Mar 2017 19:09:39 -0700 Subject: [PATCH 371/384] Linux 4.11-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b841fb36beb2..b2faa9319372 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Fearless Coyote # *DOCUMENTATION* From 720037f939fa50fc3531035ae61b4cf4b0ff35e5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 6 Mar 2017 11:59:56 -0800 Subject: [PATCH 372/384] f2fs: don't overwrite node block by SSR This patch fixes that SSR can overwrite previous warm node block consisting of a node chain since the last checkpoint. Fixes: 5b6c6be2d878 ("f2fs: use SSR for warm node as well") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4bd7a8b19332..29ef7088c558 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1163,6 +1163,12 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) if (f2fs_discard_en(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; + + /* don't overwrite by SSR to keep node chain */ + if (se->type == CURSEG_WARM_NODE) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) + se->ckpt_valid_blocks++; + } } else { if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { #ifdef CONFIG_F2FS_CHECK_FS From 9f7e4a2c49fd166f17cf4125766a68dce8716764 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 10 Mar 2017 09:39:57 -0800 Subject: [PATCH 373/384] f2fs: declare static functions This is to avoid build warning reported by kbuild test robot. Signed-off-by: Fengguang Wu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 94967171dee8..a0a060c2979b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1823,7 +1823,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set) +static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, + bool set) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); @@ -2383,7 +2384,7 @@ static void __adjust_nat_entry_set(struct nat_entry_set *nes, list_add_tail(&nes->set_list, head); } -void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, +static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, struct page *page) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -2638,7 +2639,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) return 0; } -int init_free_nid_cache(struct f2fs_sb_info *sbi) +static int init_free_nid_cache(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); From 23380b8568b85cd4b7a056891f4dbf131f7b871d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Mar 2017 14:11:06 -0800 Subject: [PATCH 374/384] f2fs: use __set{__clear}_bit_le This patch uses __set{__clear}_bit_le for highter speed. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/node.c | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4650c9b85de7..8d5c62b07b28 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -750,7 +750,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) - clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a0a060c2979b..8c81ff614d1a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -339,7 +339,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, __set_nat_cache_dirty(nm_i, e); if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR) - clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits); + __clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits); /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) @@ -1834,9 +1834,9 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, return; if (set) - set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); else - clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1848,7 +1848,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; - set_bit_le(nat_ofs, nm_i->nat_block_bitmap); + __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); i = start_nid % NAT_ENTRY_PER_BLOCK; @@ -2403,16 +2403,16 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, valid++; } if (valid == 0) { - set_bit_le(nat_index, nm_i->empty_nat_bits); - clear_bit_le(nat_index, nm_i->full_nat_bits); + __set_bit_le(nat_index, nm_i->empty_nat_bits); + __clear_bit_le(nat_index, nm_i->full_nat_bits); return; } - clear_bit_le(nat_index, nm_i->empty_nat_bits); + __clear_bit_le(nat_index, nm_i->empty_nat_bits); if (valid == NAT_ENTRY_PER_BLOCK) - set_bit_le(nat_index, nm_i->full_nat_bits); + __set_bit_le(nat_index, nm_i->full_nat_bits); else - clear_bit_le(nat_index, nm_i->full_nat_bits); + __clear_bit_le(nat_index, nm_i->full_nat_bits); } static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, From 586d1492f301982e349797cfb05d9f343002ffa2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 1 Mar 2017 17:09:07 +0800 Subject: [PATCH 375/384] f2fs: skip scanning free nid bitmap of full NAT blocks This patch adds to account free nids for each NAT blocks, and while scanning all free nid bitmap, do check count and skip lookuping in full NAT block. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 1 + fs/f2fs/f2fs.h | 2 ++ fs/f2fs/node.c | 33 +++++++++++++++++++++++++++------ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a77df377e2e8..ee2d0a485fc3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -196,6 +196,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE; si->base_mem += NM_I(sbi)->nat_blocks / 8; + si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short); get_cache: si->cache_mem = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e849f83d6114..0a6e115562f6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -561,6 +561,8 @@ struct f2fs_nm_info { struct mutex build_lock; /* lock for build free nids */ unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; unsigned char *nat_block_bitmap; + unsigned short *free_nid_count; /* free nid count of NAT block */ + spinlock_t free_nid_lock; /* protect updating of nid count */ /* for checkpoint */ char *nat_bitmap; /* NAT bitmap pointer */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8c81ff614d1a..87a2b1f740cc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1824,7 +1824,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) } static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, - bool set) + bool set, bool build) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); @@ -1837,6 +1837,13 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, __set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); else __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); + + spin_lock(&nm_i->free_nid_lock); + if (set) + nm_i->free_nid_count[nat_ofs]++; + else if (!build) + nm_i->free_nid_count[nat_ofs]--; + spin_unlock(&nm_i->free_nid_lock); } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1848,6 +1855,9 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; + if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap)) + return; + __set_bit_le(nat_ofs, nm_i->nat_block_bitmap); i = start_nid % NAT_ENTRY_PER_BLOCK; @@ -1862,7 +1872,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) freed = add_free_nid(sbi, start_nid, true); - update_free_nid_bitmap(sbi, start_nid, freed); + update_free_nid_bitmap(sbi, start_nid, freed, true); } } @@ -1878,6 +1888,8 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) continue; + if (!nm_i->free_nid_count[i]) + continue; for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) { nid_t nid; @@ -2082,7 +2094,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); nm_i->available_nids--; - update_free_nid_bitmap(sbi, *nid, false); + update_free_nid_bitmap(sbi, *nid, false, false); spin_unlock(&nm_i->nid_list_lock); return true; @@ -2138,7 +2150,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) nm_i->available_nids++; - update_free_nid_bitmap(sbi, nid, true); + update_free_nid_bitmap(sbi, nid, true, false); spin_unlock(&nm_i->nid_list_lock); @@ -2468,11 +2480,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, add_free_nid(sbi, nid, false); spin_lock(&NM_I(sbi)->nid_list_lock); NM_I(sbi)->available_nids++; - update_free_nid_bitmap(sbi, nid, true); + update_free_nid_bitmap(sbi, nid, true, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } else { spin_lock(&NM_I(sbi)->nid_list_lock); - update_free_nid_bitmap(sbi, nid, false); + update_free_nid_bitmap(sbi, nid, false, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } } @@ -2652,6 +2664,14 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) GFP_KERNEL); if (!nm_i->nat_block_bitmap) return -ENOMEM; + + nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks * + sizeof(unsigned short), GFP_KERNEL); + if (!nm_i->free_nid_count) + return -ENOMEM; + + spin_lock_init(&nm_i->free_nid_lock); + return 0; } @@ -2731,6 +2751,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) kvfree(nm_i->nat_block_bitmap); kvfree(nm_i->free_nid_bitmap); + kvfree(nm_i->free_nid_count); kfree(nm_i->nat_bitmap); kfree(nm_i->nat_bits); From 7041d5d286fb54635f540c1bb3b43980ed65513a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 8 Mar 2017 20:07:49 +0800 Subject: [PATCH 376/384] f2fs: combine nat_bits and free_nid_bitmap cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both nat_bits cache and free_nid_bitmap cache provide same functionality as a intermediate cache between free nid cache and disk, but with different granularity of indicating free nid range, and different persistence policy. nat_bits cache provides better persistence ability, and free_nid_bitmap provides better granularity. In this patch we combine advantage of both caches, so finally policy of the intermediate cache would be: - init: load free nid status from nat_bits into free_nid_bitmap - lookup: scan free_nid_bitmap before load NAT blocks - update: update free_nid_bitmap in real-time - persistence: udpate and persist nat_bits in checkpoint This patch also resolves performance regression reported by lkp-robot. commit: 4ac912427c4214d8031d9ad6fbc3bc75e71512df ("f2fs: introduce free nid bitmap") d00030cf9cd0bb96fdccc41e33d3c91dcbb672ba ("f2fs: use __set{__clear}_bit_le") 1382c0f3f9d3f936c8bc42ed1591cf7a593ef9f7 ("f2fs: combine nat_bits and free_nid_bitmap cache") 4ac912427c4214d8 d00030cf9cd0bb96fdccc41e33 1382c0f3f9d3f936c8bc42ed15 ---------------- -------------------------- -------------------------- %stddev %change %stddev %change %stddev \ | \ | \ 77863 ± 0% +2.1% 79485 ± 1% +50.8% 117404 ± 0% aim7.jobs-per-min 231.63 ± 0% -2.0% 227.01 ± 1% -33.6% 153.80 ± 0% aim7.time.elapsed_time 231.63 ± 0% -2.0% 227.01 ± 1% -33.6% 153.80 ± 0% aim7.time.elapsed_time.max 896604 ± 0% -0.8% 889221 ± 3% -20.2% 715260 ± 1% aim7.time.involuntary_context_switches 2394 ± 1% +4.6% 2503 ± 1% +3.7% 2481 ± 2% aim7.time.maximum_resident_set_size 6240 ± 0% -1.5% 6145 ± 1% -14.1% 5360 ± 1% aim7.time.system_time 1111357 ± 3% +1.9% 1132509 ± 2% -6.2% 1041932 ± 2% aim7.time.voluntary_context_switches ... Signed-off-by: Chao Yu Tested-by: Xiaolong Ye Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 125 +++++++++++++++++++------------------------------ 1 file changed, 47 insertions(+), 78 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 87a2b1f740cc..481aa8dc79f4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, IS_CHECKPOINTED, false); __set_nat_cache_dirty(nm_i, e); - if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR) - __clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits); - /* update fsync_mark if its inode nat entry is still alive */ if (ni->nid != ni->ino) e = __lookup_nat_cache(nm_i, ni->ino); @@ -1824,7 +1821,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) } static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, - bool set, bool build) + bool set, bool build, bool locked) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); @@ -1838,12 +1835,14 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, else __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - spin_lock(&nm_i->free_nid_lock); + if (!locked) + spin_lock(&nm_i->free_nid_lock); if (set) nm_i->free_nid_count[nat_ofs]++; else if (!build) nm_i->free_nid_count[nat_ofs]--; - spin_unlock(&nm_i->free_nid_lock); + if (!locked) + spin_unlock(&nm_i->free_nid_lock); } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1872,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) freed = add_free_nid(sbi, start_nid, true); - update_free_nid_bitmap(sbi, start_nid, freed, true); + update_free_nid_bitmap(sbi, start_nid, freed, true, false); } } @@ -1920,58 +1919,6 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) up_read(&nm_i->nat_tree_lock); } -static int scan_nat_bits(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - struct page *page; - unsigned int i = 0; - nid_t nid; - - if (!enabled_nat_bits(sbi, NULL)) - return -EAGAIN; - - down_read(&nm_i->nat_tree_lock); -check_empty: - i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); - if (i >= nm_i->nat_blocks) { - i = 0; - goto check_partial; - } - - for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK; - nid++) { - if (unlikely(nid >= nm_i->max_nid)) - break; - add_free_nid(sbi, nid, true); - } - - if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS) - goto out; - i++; - goto check_empty; - -check_partial: - i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); - if (i >= nm_i->nat_blocks) { - disable_nat_bits(sbi, true); - up_read(&nm_i->nat_tree_lock); - return -EINVAL; - } - - nid = i * NAT_ENTRY_PER_BLOCK; - page = get_current_nat_page(sbi, nid); - scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); - - if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) { - i++; - goto check_partial; - } -out: - up_read(&nm_i->nat_tree_lock); - return 0; -} - static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -1993,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) if (nm_i->nid_cnt[FREE_NID_LIST]) return; - - /* try to find free nids with nat_bits */ - if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST]) - return; - } - - /* find next valid candidate */ - if (enabled_nat_bits(sbi, NULL)) { - int idx = find_next_zero_bit_le(nm_i->full_nat_bits, - nm_i->nat_blocks, 0); - - if (idx >= nm_i->nat_blocks) - set_sbi_flag(sbi, SBI_NEED_FSCK); - else - nid = idx * NAT_ENTRY_PER_BLOCK; } /* readahead nat pages to be scanned */ @@ -2094,7 +2026,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); nm_i->available_nids--; - update_free_nid_bitmap(sbi, *nid, false, false); + update_free_nid_bitmap(sbi, *nid, false, false, false); spin_unlock(&nm_i->nid_list_lock); return true; @@ -2150,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) nm_i->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false); + update_free_nid_bitmap(sbi, nid, true, false, false); spin_unlock(&nm_i->nid_list_lock); @@ -2480,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, add_free_nid(sbi, nid, false); spin_lock(&NM_I(sbi)->nid_list_lock); NM_I(sbi)->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false); + update_free_nid_bitmap(sbi, nid, true, false, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } else { spin_lock(&NM_I(sbi)->nid_list_lock); - update_free_nid_bitmap(sbi, nid, false, false); + update_free_nid_bitmap(sbi, nid, false, false, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } } @@ -2590,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) return 0; } +inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned int i = 0; + nid_t nid, last_nid; + + if (!enabled_nat_bits(sbi, NULL)) + return; + + for (i = 0; i < nm_i->nat_blocks; i++) { + i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i); + if (i >= nm_i->nat_blocks) + break; + + __set_bit_le(i, nm_i->nat_block_bitmap); + + nid = i * NAT_ENTRY_PER_BLOCK; + last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; + + spin_lock(&nm_i->free_nid_lock); + for (; nid < last_nid; nid++) + update_free_nid_bitmap(sbi, nid, true, true, true); + spin_unlock(&nm_i->free_nid_lock); + } + + for (i = 0; i < nm_i->nat_blocks; i++) { + i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i); + if (i >= nm_i->nat_blocks) + break; + + __set_bit_le(i, nm_i->nat_block_bitmap); + } +} + static int init_node_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi); @@ -2691,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi) if (err) return err; + /* load free nid status from nat_bits table */ + load_free_nid_bitmap(sbi); + build_free_nids(sbi, true, true); return 0; } From c3104aae5d8cc443556f8613466e16737326e215 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Mar 2017 17:36:25 +0100 Subject: [PATCH 377/384] remoteproc: qcom: fix QCOM_SMD dependencies qcom_smd_register_edge() is provided by either QCOM_SMD or RPMSG_QCOM_SMD, and if both of them are disabled, it does nothing. The check for the PIL drivers however only checks for QCOM_SMD, so it breaks with QCOM_SMD=n && RPMSG_QCOM_SMD=m: drivers/remoteproc/built-in.o: In function `smd_subdev_remove': qcom_wcnss_iris.c:(.text+0x231c): undefined reference to `qcom_smd_unregister_edge' drivers/remoteproc/built-in.o: In function `smd_subdev_probe': qcom_wcnss_iris.c:(.text+0x2344): undefined reference to `qcom_smd_register_edge' drivers/remoteproc/built-in.o: In function `smd_subdev_probe': qcom_q6v5_pil.c:(.text+0x3538): undefined reference to `qcom_smd_register_edge' qcom_q6v5_pil.c:(.text+0x3538): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `qcom_smd_register_edge' This clarifies the Kconfig dependency. Fixes: 4b48921a8f74 ("remoteproc: qcom: Use common SMD edge handler") Signed-off-by: Arnd Bergmann Signed-off-by: Bjorn Andersson --- drivers/remoteproc/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 65f86bc24c07..1dc43fc5f65f 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -76,7 +76,7 @@ config QCOM_ADSP_PIL depends on OF && ARCH_QCOM depends on REMOTEPROC depends on QCOM_SMEM - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_RPROC_COMMON @@ -93,7 +93,7 @@ config QCOM_Q6V5_PIL depends on OF && ARCH_QCOM depends on QCOM_SMEM depends on REMOTEPROC - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) select MFD_SYSCON select QCOM_RPROC_COMMON select QCOM_SCM @@ -104,7 +104,7 @@ config QCOM_Q6V5_PIL config QCOM_WCNSS_PIL tristate "Qualcomm WCNSS Peripheral Image Loader" depends on OF && ARCH_QCOM - depends on QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n) + depends on RPMSG_QCOM_SMD || QCOM_SMD || (COMPILE_TEST && QCOM_SMD=n && RPMSG_QCOM_SMD=n) depends on QCOM_SMEM depends on REMOTEPROC select QCOM_MDT_LOADER From 49cc4c217c0dbb7d09c402472d1f85a81c093f9f Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Mon, 6 Mar 2017 10:54:57 -0800 Subject: [PATCH 378/384] HID: wacom: Correct Intuos Pro 2 resolution The features struct for the second gen Intuos Pro uses the wrong constant for the resolution. This fix is for commit 4922cd2. Fixes: 4922cd2 ("HID: wacom: Support 2nd-gen Intuos Pro's Bluetooth classic interface") Signed-off-by: Aaron Armstrong Skomra Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 4aa3de9f1163..3d864466d473 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -4197,10 +4197,10 @@ static const struct wacom_features wacom_features_0x343 = WACOM_DTU_OFFSET, WACOM_DTU_OFFSET }; static const struct wacom_features wacom_features_0x360 = { "Wacom Intuos Pro M", 44800, 29600, 8191, 63, - INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 }; + INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 }; static const struct wacom_features wacom_features_0x361 = { "Wacom Intuos Pro L", 62200, 43200, 8191, 63, - INTUOSP2_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 9, .touch_max = 10 }; + INTUOSP2_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 9, .touch_max = 10 }; static const struct wacom_features wacom_features_HID_ANY_ID = { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID }; From b6b1f19b06b7d4dcc261a88d74c5fb0a53988b4e Mon Sep 17 00:00:00 2001 From: Aaron Armstrong Skomra Date: Mon, 6 Mar 2017 10:54:58 -0800 Subject: [PATCH 379/384] HID: wacom: don't manually release resources for the EKR Commit 5b779fc introduces the manual release of resources in wacom_remove() as an addition to the driver's use of devm. The EKR resources can only be released through wacom_remote_destroy_one() so we skip the manual release for it. Fixes: 5b779fc ("HID: wacom: release the resources before leaving despite devm") Signed-off-by: Aaron Armstrong Skomra Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index be8f7e2a026f..994bddc55b82 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2579,7 +2579,9 @@ static void wacom_remove(struct hid_device *hdev) /* make sure we don't trigger the LEDs */ wacom_led_groups_release(wacom); - wacom_release_resources(wacom); + + if (wacom->wacom_wac.features.type != REMOTE) + wacom_release_resources(wacom); hid_set_drvdata(hdev, NULL); } From deaba636997557fce46ca7bcb509bff5ea1b0558 Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Fri, 10 Feb 2017 18:23:00 +0000 Subject: [PATCH 380/384] HID: corsair: support for K65-K70 Rapidfire and Scimitar Pro RGB Add quirks for several corsair gaming devices to avoid long delays on report initialization Supported devices: - Corsair K65RGB Rapidfire Gaming Keyboard - Corsair K70RGB Rapidfire Gaming Keyboard - Corsair Scimitar Pro RGB Gaming Mouse Signed-off-by: Oscar Campos Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index b3df60da0297..0e2e7c571d22 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -278,6 +278,9 @@ #define USB_DEVICE_ID_CORSAIR_K70RGB 0x1b13 #define USB_DEVICE_ID_CORSAIR_STRAFE 0x1b15 #define USB_DEVICE_ID_CORSAIR_K65RGB 0x1b17 +#define USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE 0x1b38 +#define USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE 0x1b39 +#define USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB 0x1b3e #define USB_VENDOR_ID_CREATIVELABS 0x041e #define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51 0x322c diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index d6847a664446..a69a3c88ab29 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -80,6 +80,9 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_STRAFE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, + { USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB, HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET }, { USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT }, From 01adc47e885f1127b29d76d0dfb21d8262f9d6b4 Mon Sep 17 00:00:00 2001 From: Oscar Campos Date: Mon, 6 Mar 2017 21:02:39 +0000 Subject: [PATCH 381/384] HID: corsair: Add driver Scimitar Pro RGB gaming mouse 1b1c:1b3e support to hid-corsair This mouse sold by Corsair as Scimitar PRO RGB defines two consecutive Logical Minimum items in its Application (Consumer.0001) report making it non parseable. This patch fixes the report descriptor overriding byte 77 in rdesc from 0x16 (Logical Minimum with 16 bits value) to 0x26 (Logical Maximum with 16 bits value). Signed-off-by: Oscar Campos Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + drivers/hid/hid-core.c | 1 + drivers/hid/hid-corsair.c | 47 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 8eab3200ac9a..8c54cb8f5d6d 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -190,6 +190,7 @@ config HID_CORSAIR Supported devices: - Vengeance K90 + - Scimitar PRO RGB config HID_PRODIKEYS tristate "Prodikeys PC-MIDI Keyboard support" diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index ae01ae601d74..3ceb4a2af381 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1870,6 +1870,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_AK1D) }, { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90) }, + { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, { HID_USB_DEVICE(USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_PRODIKEYS_PCMIDI) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYGNAL, USB_DEVICE_ID_CYGNAL_CP2112) }, { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_BARCODE_1) }, diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index c0303f61c26a..9ba5d98a1180 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -3,8 +3,10 @@ * * Supported devices: * - Vengeance K90 Keyboard + * - Scimitar PRO RGB Gaming Mouse * * Copyright (c) 2015 Clement Vuchener + * Copyright (c) 2017 Oscar Campos */ /* @@ -670,10 +672,51 @@ static int corsair_input_mapping(struct hid_device *dev, return 0; } +/* + * The report descriptor of Corsair Scimitar RGB Pro gaming mouse is + * non parseable as they define two consecutive Logical Minimum for + * the Usage Page (Consumer) in rdescs bytes 75 and 77 being 77 0x16 + * that should be obviousy 0x26 for Logical Magimum of 16 bits. This + * prevents poper parsing of the report descriptor due Logical + * Minimum being larger than Logical Maximum. + * + * This driver fixes the report descriptor for: + * - USB ID b1c:1b3e, sold as Scimitar RGB Pro Gaming mouse + */ + +static __u8 *corsair_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + + if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { + /* + * Corsair Scimitar RGB Pro report descriptor is broken and + * defines two different Logical Minimum for the Consumer + * Application. The byte 77 should be a 0x26 defining a 16 + * bits integer for the Logical Maximum but it is a 0x16 + * instead (Logical Minimum) + */ + switch (hdev->product) { + case USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB: + if (*rsize >= 172 && rdesc[75] == 0x15 && rdesc[77] == 0x16 + && rdesc[78] == 0xff && rdesc[79] == 0x0f) { + hid_info(hdev, "Fixing up report descriptor\n"); + rdesc[77] = 0x26; + } + break; + } + + } + return rdesc; +} + static const struct hid_device_id corsair_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K90), .driver_data = CORSAIR_USE_K90_MACRO | CORSAIR_USE_K90_BACKLIGHT }, + { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, + USB_DEVICE_ID_CORSAIR_SCIMITAR_PRO_RGB) }, {} }; @@ -686,10 +729,14 @@ static struct hid_driver corsair_driver = { .event = corsair_event, .remove = corsair_remove, .input_mapping = corsair_input_mapping, + .report_fixup = corsair_mouse_report_fixup, }; module_hid_driver(corsair_driver); MODULE_LICENSE("GPL"); +/* Original K90 driver author */ MODULE_AUTHOR("Clement Vuchener"); +/* Scimitar PRO RGB driver author */ +MODULE_AUTHOR("Oscar Campos"); MODULE_DESCRIPTION("HID driver for Corsair devices"); From 6e5364f5f472d4ea66d37459e299071b0190362c Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 14 Mar 2017 17:08:16 -0700 Subject: [PATCH 382/384] HID: wacom: generic: Wacom mouse is only provided for opaque tablets Commit f85c9dc ("Support tool ID and additional tool types") introduced mouse and lens cursor tools to generic codepath, which covers both display (direct) and opaque tablets (indirect devices). However, mouse and lens cursor tools are only provided for opaque tablets. This patch ignores mouse and lens cursor tools if the device is a display tablet. Signed-off-by: Ping Cheng Reviewed-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 3d864466d473..94250c293be2 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -1959,8 +1959,10 @@ static void wacom_wac_pen_usage_mapping(struct hid_device *hdev, input_set_capability(input, EV_KEY, BTN_TOOL_BRUSH); input_set_capability(input, EV_KEY, BTN_TOOL_PENCIL); input_set_capability(input, EV_KEY, BTN_TOOL_AIRBRUSH); - input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE); - input_set_capability(input, EV_KEY, BTN_TOOL_LENS); + if (!(features->device_type & WACOM_DEVICETYPE_DIRECT)) { + input_set_capability(input, EV_KEY, BTN_TOOL_MOUSE); + input_set_capability(input, EV_KEY, BTN_TOOL_LENS); + } break; case WACOM_HID_WD_FINGERWHEEL: wacom_map_usage(input, usage, field, EV_ABS, ABS_WHEEL, 0); From 093b995e3b55a0ae0670226ddfcb05bfbf0099ae Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 20 Mar 2017 14:26:42 +0800 Subject: [PATCH 383/384] mm, swap: Remove WARN_ON_ONCE() in free_swap_slot() Before commit 452b94b8c8c7 ("mm/swap: don't BUG_ON() due to uninitialized swap slot cache"), the following bug is reported, ------------[ cut here ]------------ kernel BUG at mm/swap_slots.c:270! invalid opcode: 0000 [#1] SMP CPU: 5 PID: 1745 Comm: (sd-pam) Not tainted 4.11.0-rc1-00243-g24c534bb161b #1 Hardware name: System manufacturer System Product Name/Z170-K, BIOS 1803 05/06/2016 RIP: 0010:free_swap_slot+0xba/0xd0 Call Trace: swap_free+0x36/0x40 do_swap_page+0x360/0x6d0 __handle_mm_fault+0x880/0x1080 handle_mm_fault+0xd0/0x240 __do_page_fault+0x232/0x4d0 do_page_fault+0x20/0x70 page_fault+0x22/0x30 ---[ end trace aefc9ede53e0ab21 ]--- This is raised by the BUG_ON(!swap_slot_cache_initialized) in free_swap_slot(). This is incorrect, because even if the swap slots cache fails to be initialized, the swap should operate properly without the swap slots cache. And the use_swap_slot_cache check later in the function will protect the uninitialized swap slots cache case. In commit 452b94b8c8c7, the BUG_ON() is replaced by WARN_ON_ONCE(). In the patch, the WARN_ON_ONCE() is removed too. Reported-by: Linus Torvalds Acked-by: Tim Chen Cc: Michal Hocko Signed-off-by: "Huang, Ying" Signed-off-by: Linus Torvalds --- mm/swap_slots.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/swap_slots.c b/mm/swap_slots.c index 7ebb23836f68..b1ccb58ad397 100644 --- a/mm/swap_slots.c +++ b/mm/swap_slots.c @@ -267,8 +267,6 @@ int free_swap_slot(swp_entry_t entry) { struct swap_slots_cache *cache; - WARN_ON_ONCE(!swap_slot_cache_initialized); - cache = &get_cpu_var(swp_slots); if (use_swap_slot_cache && cache->slots_ret) { spin_lock_irq(&cache->free_lock); From 916cda1aa1b412d7cf2991c3af7479544942d121 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 26 Jan 2016 14:10:34 +0100 Subject: [PATCH 384/384] s390: add a system call for guarded storage This adds a new system call to enable the use of guarded storage for user space processes. The system call takes two arguments, a command and pointer to a guarded storage control block: s390_guarded_storage(int command, struct gs_cb *gs_cb); The second argument is relevant only for the GS_SET_BC_CB command. The commands in detail: 0 - GS_ENABLE Enable the guarded storage facility for the current task. The initial content of the guarded storage control block will be all zeros. After the enablement the user space code can use load-guarded-storage-controls instruction (LGSC) to load an arbitrary control block. While a task is enabled the kernel will save and restore the current content of the guarded storage registers on context switch. 1 - GS_DISABLE Disables the use of the guarded storage facility for the current task. The kernel will cease to save and restore the content of the guarded storage registers, the task specific content of these registers is lost. 2 - GS_SET_BC_CB Set a broadcast guarded storage control block. This is called per thread and stores a specific guarded storage control block in the task struct of the current task. This control block will be used for the broadcast event GS_BROADCAST. 3 - GS_CLEAR_BC_CB Clears the broadcast guarded storage control block. The guarded- storage control block is removed from the task struct that was established by GS_SET_BC_CB. 4 - GS_BROADCAST Sends a broadcast to all thread siblings of the current task. Every sibling that has established a broadcast guarded storage control block will load this control block and will be enabled for guarded storage. The broadcast guarded storage control block is used up, a second broadcast without a refresh of the stored control block with GS_SET_BC_CB will not have any effect. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 1 + arch/s390/include/asm/lowcore.h | 9 +- arch/s390/include/asm/nmi.h | 12 +- arch/s390/include/asm/processor.h | 5 + arch/s390/include/asm/setup.h | 2 + arch/s390/include/asm/switch_to.h | 3 + arch/s390/include/asm/thread_info.h | 12 +- arch/s390/include/uapi/asm/Kbuild | 1 + arch/s390/include/uapi/asm/guarded_storage.h | 77 +++++++++++ arch/s390/include/uapi/asm/unistd.h | 2 +- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/asm-offsets.c | 2 +- arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/early.c | 2 + arch/s390/kernel/entry.S | 26 +++- arch/s390/kernel/entry.h | 2 + arch/s390/kernel/guarded_storage.c | 128 +++++++++++++++++++ arch/s390/kernel/machine_kexec.c | 13 +- arch/s390/kernel/nmi.c | 19 ++- arch/s390/kernel/process.c | 7 +- arch/s390/kernel/processor.c | 2 +- arch/s390/kernel/ptrace.c | 84 ++++++++++-- arch/s390/kernel/setup.c | 18 ++- arch/s390/kernel/smp.c | 43 ++++++- arch/s390/kernel/syscalls.S | 2 +- arch/s390/kvm/interrupt.c | 4 +- include/uapi/linux/elf.h | 1 + 27 files changed, 435 insertions(+), 45 deletions(-) create mode 100644 arch/s390/include/uapi/asm/guarded_storage.h create mode 100644 arch/s390/kernel/guarded_storage.c diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1d48880b3cc1..e8f623041769 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -105,6 +105,7 @@ #define HWCAP_S390_VXRS 2048 #define HWCAP_S390_VXRS_BCD 4096 #define HWCAP_S390_VXRS_EXT 8192 +#define HWCAP_S390_GS 16384 /* Internal bits, not exposed via elf */ #define HWCAP_INT_SIE 1UL diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 61261e0e95c0..8a5b082797f8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -157,8 +157,8 @@ struct lowcore { __u64 stfle_fac_list[32]; /* 0x0f00 */ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */ - /* Pointer to vector register save area */ - __u64 vector_save_area_addr; /* 0x11b0 */ + /* Pointer to the machine check extended save area */ + __u64 mcesad; /* 0x11b0 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ @@ -182,10 +182,7 @@ struct lowcore { /* Transaction abort diagnostic block */ __u8 pgm_tdb[256]; /* 0x1800 */ - __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */ - - /* Software defined save area for vector registers */ - __u8 vector_save_area[1024]; /* 0x1c00 */ + __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */ } __packed; #define S390_lowcore (*((struct lowcore *) 0)) diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h index b75fd910386a..e3e8895f5d3e 100644 --- a/arch/s390/include/asm/nmi.h +++ b/arch/s390/include/asm/nmi.h @@ -58,7 +58,9 @@ union mci { u64 ie : 1; /* 32 indirect storage error */ u64 ar : 1; /* 33 access register validity */ u64 da : 1; /* 34 delayed access exception */ - u64 : 7; /* 35-41 */ + u64 : 1; /* 35 */ + u64 gs : 1; /* 36 guarded storage registers */ + u64 : 5; /* 37-41 */ u64 pr : 1; /* 42 tod programmable register validity */ u64 fc : 1; /* 43 fp control register validity */ u64 ap : 1; /* 44 ancillary report */ @@ -69,6 +71,14 @@ union mci { }; }; +#define MCESA_ORIGIN_MASK (~0x3ffUL) +#define MCESA_LC_MASK (0xfUL) + +struct mcesa { + u8 vector_save_area[1024]; + u8 guarded_storage_save_area[32]; +}; + struct pt_regs; extern void s390_handle_mcck(void); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index e4988710aa86..cc101f9371cb 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -135,6 +135,8 @@ struct thread_struct { struct list_head list; /* cpu runtime instrumentation */ struct runtime_instr_cb *ri_cb; + struct gs_cb *gs_cb; /* Current guarded storage cb */ + struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ /* * Warning: 'fpu' is dynamically-sized. It *MUST* be at @@ -215,6 +217,9 @@ void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); +/* Free guarded storage control block for current */ +void exit_thread_gs(void); + /* * Return saved PC of a blocked thread. */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 30bdb5a027f3..383bd8358a8c 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -31,6 +31,7 @@ #define MACHINE_FLAG_VX _BITUL(13) #define MACHINE_FLAG_CAD _BITUL(14) #define MACHINE_FLAG_NX _BITUL(15) +#define MACHINE_FLAG_GS _BITUL(16) #define LPP_MAGIC _BITUL(31) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) @@ -70,6 +71,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) #define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) +#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) /* * Console mode. Override with conmode= diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 12d45f0cfdd9..f6c2b5814ab0 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -10,6 +10,7 @@ #include #include #include +#include extern struct task_struct *__switch_to(void *, void *); extern void update_cr_regs(struct task_struct *task); @@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs) save_fpu_regs(); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ + save_gs_cb(prev->thread.gs_cb); \ } \ if (next->mm) { \ update_cr_regs(next); \ set_cpu_flag(CIF_FPU); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ + restore_gs_cb(next->thread.gs_cb); \ } \ prev = __switch_to(prev,next); \ } while (0) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a5b54a445eb8..f36e6e2b73f0 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define TIF_NOTIFY_RESUME 0 /* callback before returning to user */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */ -#define TIF_SECCOMP 5 /* secure computing */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_UPROBE 7 /* breakpointed or single-stepping */ +#define TIF_UPROBE 3 /* breakpointed or single-stepping */ +#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */ +#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */ +#define TIF_SECCOMP 10 /* secure computing */ +#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */ #define TIF_31BIT 16 /* 32bit process */ #define TIF_MEMDIE 17 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */ @@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define _TIF_UPROBE _BITUL(TIF_UPROBE) #define _TIF_31BIT _BITUL(TIF_31BIT) #define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP) +#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE) #endif /* _ASM_THREAD_INFO_H */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index 6848ba5c1454..86b761e583e3 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -12,6 +12,7 @@ header-y += dasd.h header-y += debug.h header-y += errno.h header-y += fcntl.h +header-y += guarded_storage.h header-y += hypfs.h header-y += ioctl.h header-y += ioctls.h diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h new file mode 100644 index 000000000000..852850e8e17e --- /dev/null +++ b/arch/s390/include/uapi/asm/guarded_storage.h @@ -0,0 +1,77 @@ +#ifndef _GUARDED_STORAGE_H +#define _GUARDED_STORAGE_H + +#include + +struct gs_cb { + __u64 reserved; + __u64 gsd; + __u64 gssm; + __u64 gs_epl_a; +}; + +struct gs_epl { + __u8 pad1; + union { + __u8 gs_eam; + struct { + __u8 : 6; + __u8 e : 1; + __u8 b : 1; + }; + }; + union { + __u8 gs_eci; + struct { + __u8 tx : 1; + __u8 cx : 1; + __u8 : 5; + __u8 in : 1; + }; + }; + union { + __u8 gs_eai; + struct { + __u8 : 1; + __u8 t : 1; + __u8 as : 2; + __u8 ar : 4; + }; + }; + __u32 pad2; + __u64 gs_eha; + __u64 gs_eia; + __u64 gs_eoa; + __u64 gs_eir; + __u64 gs_era; +}; + +#define GS_ENABLE 0 +#define GS_DISABLE 1 +#define GS_SET_BC_CB 2 +#define GS_CLEAR_BC_CB 3 +#define GS_BROADCAST 4 + +static inline void load_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb)); +} + +static inline void store_gs_cb(struct gs_cb *gs_cb) +{ + asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb)); +} + +static inline void save_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + store_gs_cb(gs_cb); +} + +static inline void restore_gs_cb(struct gs_cb *gs_cb) +{ + if (gs_cb) + load_gs_cb(gs_cb); +} + +#endif /* _GUARDED_STORAGE_H */ diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 152de9b796e1..ea42290e7d51 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -313,7 +313,7 @@ #define __NR_copy_file_range 375 #define __NR_preadv2 376 #define __NR_pwritev2 377 -/* Number 378 is reserved for guarded storage */ +#define __NR_s390_guarded_storage 378 #define __NR_statx 379 #define NR_syscalls 380 diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 060ce548fe8b..aa5adbdaf200 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o -obj-y += runtime_instr.o cache.o fpu.o dumpstack.o +obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o obj-y += entry.o reipl.o relocate_kernel.o extra-y += head.o head64.o vmlinux.lds diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index c4b3570ded5b..6bb29633e1f1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -175,7 +175,7 @@ int main(void) /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ - OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr); + OFFSET(__LC_MCESAD, lowcore, mcesad); OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2); OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area); OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area); diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index e89cc2e71db1..986642a3543b 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr, COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len); COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags); COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 4e65c79cc5f2..95298a41076f 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } + if (test_facility(133)) + S390_lowcore.machine_flags |= MACHINE_FLAG_GS; } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 6a7d737d514c..fa8b8f28e08b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ - _TIF_UPROBE) + _TIF_UPROBE | _TIF_GUARDED_STORAGE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) _CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \ @@ -332,6 +332,8 @@ ENTRY(system_call) TSTMSK __TI_flags(%r12),_TIF_UPROBE jo .Lsysc_uprobe_notify #endif + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lsysc_guarded_storage TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP jo .Lsysc_singlestep TSTMSK __TI_flags(%r12),_TIF_SIGPENDING @@ -408,6 +410,14 @@ ENTRY(system_call) jg uprobe_notify_resume #endif +# +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lsysc_guarded_storage: + lgr %r2,%r11 # pass pointer to pt_regs + larl %r14,.Lsysc_return + jg gs_load_bc_cb + # # _PIF_PER_TRAP is set, call do_per_trap # @@ -663,6 +673,8 @@ ENTRY(io_int_handler) jo .Lio_sigpending TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume + TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE + jo .Lio_guarded_storage TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lio_vxrs TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY) @@ -696,6 +708,18 @@ ENTRY(io_int_handler) larl %r14,.Lio_return jg load_fpu_regs +# +# _TIF_GUARDED_STORAGE is set, call guarded_storage_load +# +.Lio_guarded_storage: + # TRACE_IRQS_ON already done at .Lio_return + ssm __LC_SVC_NEW_PSW # reenable interrupts + lgr %r2,%r11 # pass pointer to pt_regs + brasl %r14,gs_load_bc_cb + ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts + TRACE_IRQS_OFF + j .Lio_return + # # _TIF_NEED_RESCHED is set, call schedule # diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 33f901865326..dbf5f7e18246 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -74,12 +74,14 @@ long sys_sigreturn(void); long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); +long sys_s390_guarded_storage(int command, struct gs_cb __user *); long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); DECLARE_PER_CPU(u64, mt_cycles[8]); void verify_facilities(void); +void gs_load_bc_cb(struct pt_regs *regs); void set_fs_fixup(void); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c new file mode 100644 index 000000000000..6f064745c3b1 --- /dev/null +++ b/arch/s390/kernel/guarded_storage.c @@ -0,0 +1,128 @@ +/* + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include "entry.h" + +void exit_thread_gs(void) +{ + kfree(current->thread.gs_cb); + kfree(current->thread.gs_bc_cb); + current->thread.gs_cb = current->thread.gs_bc_cb = NULL; +} + +static int gs_enable(void) +{ + struct gs_cb *gs_cb; + + if (!current->thread.gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + gs_cb->gsd = 25; + preempt_disable(); + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + preempt_enable(); + } + return 0; +} + +static int gs_disable(void) +{ + if (current->thread.gs_cb) { + preempt_disable(); + kfree(current->thread.gs_cb); + current->thread.gs_cb = NULL; + __ctl_clear_bit(2, 4); + preempt_enable(); + } + return 0; +} + +static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + if (!gs_cb) { + gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL); + if (!gs_cb) + return -ENOMEM; + current->thread.gs_bc_cb = gs_cb; + } + if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb))) + return -EFAULT; + return 0; +} + +static int gs_clear_bc_cb(void) +{ + struct gs_cb *gs_cb; + + gs_cb = current->thread.gs_bc_cb; + current->thread.gs_bc_cb = NULL; + kfree(gs_cb); + return 0; +} + +void gs_load_bc_cb(struct pt_regs *regs) +{ + struct gs_cb *gs_cb; + + preempt_disable(); + clear_thread_flag(TIF_GUARDED_STORAGE); + gs_cb = current->thread.gs_bc_cb; + if (gs_cb) { + kfree(current->thread.gs_cb); + current->thread.gs_bc_cb = NULL; + __ctl_set_bit(2, 4); + load_gs_cb(gs_cb); + current->thread.gs_cb = gs_cb; + } + preempt_enable(); +} + +static int gs_broadcast(void) +{ + struct task_struct *sibling; + + read_lock(&tasklist_lock); + for_each_thread(current, sibling) { + if (!sibling->thread.gs_bc_cb) + continue; + if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE)) + kick_process(sibling); + } + read_unlock(&tasklist_lock); + return 0; +} + +SYSCALL_DEFINE2(s390_guarded_storage, int, command, + struct gs_cb __user *, gs_cb) +{ + if (!MACHINE_HAS_GS) + return -EOPNOTSUPP; + switch (command) { + case GS_ENABLE: + return gs_enable(); + case GS_DISABLE: + return gs_disable(); + case GS_SET_BC_CB: + return gs_set_bc_cb(gs_cb); + case GS_CLEAR_BC_CB: + return gs_clear_bc_cb(); + case GS_BROADCAST: + return gs_broadcast(); + default: + return -EINVAL; + } +} diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 3074c1d83829..db5658daf994 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -27,6 +27,7 @@ #include #include #include +#include typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long); @@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image) */ static noinline void __machine_kdump(void *image) { + struct mcesa *mcesa; + unsigned long cr2_old, cr2_new; int this_cpu, cpu; lgr_info_log(); @@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image) continue; } /* Store status of the boot CPU */ + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (MACHINE_HAS_VX) - save_vx_regs((void *) &S390_lowcore.vector_save_area); + save_vx_regs((__vector128 *) mcesa->vector_save_area); + if (MACHINE_HAS_GS) { + __ctl_store(cr2_old, 2, 2); + cr2_new = cr2_old | (1UL << 4); + __ctl_load(cr2_new, 2, 2); + save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area); + __ctl_load(cr2_old, 2, 2); + } /* * To create a good backchain for this CPU in the dump store_status * is passed the address of a function. The address is saved into diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 9bf8327154ee..985589523970 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) int kill_task; u64 zero; void *fpt_save_area; + struct mcesa *mcesa; kill_task = 0; zero = 0; @@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode) : : "Q" (S390_lowcore.fpt_creg_save_area)); } + mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK); if (!MACHINE_HAS_VX) { /* Validate floating point registers */ asm volatile( @@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode) " la 1,%0\n" " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(struct vx_array *) - &S390_lowcore.vector_save_area) : "1"); + : : "Q" (*(struct vx_array *) mcesa->vector_save_area) + : "1"); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Validate access registers */ @@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode) */ kill_task = 1; } + /* Validate guarded storage registers */ + if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) { + if (!mci.gs) + /* + * Guarded storage register can't be restored and + * the current processes uses guarded storage. + * It has to be terminated. + */ + kill_task = 1; + else + load_gs_cb((struct gs_cb *) + mcesa->guarded_storage_save_area); + } /* * We don't even try to validate the TOD register, since we simply * can't write something sensible into that register. diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index f29e41c5e2ec..999d7154bbdc 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -73,8 +73,10 @@ extern void kernel_thread_starter(void); */ void exit_thread(struct task_struct *tsk) { - if (tsk == current) + if (tsk == current) { exit_thread_runtime_instr(); + exit_thread_gs(); + } } void flush_thread(void) @@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, /* Don't copy runtime instrumentation info */ p->thread.ri_cb = NULL; frame->childregs.psw.mask &= ~PSW_MASK_RI; + /* Don't copy guarded storage control block */ + p->thread.gs_cb = NULL; + p->thread.gs_bc_cb = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 928b929a6261..c73709869447 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v) { static const char *hwcap_str[] = { "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", - "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe" + "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs" }; static const char * const int_hwcap_str[] = { "sie" diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index c14df0a1ec3c..c933e255b5d5 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task) struct pt_regs *regs = task_pt_regs(task); struct thread_struct *thread = &task->thread; struct per_regs old, new; + unsigned long cr0_old, cr0_new; + unsigned long cr2_old, cr2_new; + int cr0_changed, cr2_changed; + __ctl_store(cr0_old, 0, 0); + __ctl_store(cr2_old, 2, 2); + cr0_new = cr0_old; + cr2_new = cr2_old; /* Take care of the enable/disable of transactional execution. */ if (MACHINE_HAS_TE) { - unsigned long cr, cr_new; - - __ctl_store(cr, 0, 0); /* Set or clear transaction execution TXC bit 8. */ - cr_new = cr | (1UL << 55); + cr0_new |= (1UL << 55); if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); - if (cr_new != cr) - __ctl_load(cr_new, 0, 0); + cr0_new &= ~(1UL << 55); /* Set or clear transaction execution TDC bits 62 and 63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; + cr2_new &= ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; + cr2_new |= 1UL; else - cr_new |= 2UL; + cr2_new |= 2UL; } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); } + /* Take care of enable/disable of guarded storage. */ + if (MACHINE_HAS_GS) { + cr2_new &= ~(1UL << 4); + if (task->thread.gs_cb) + cr2_new |= (1UL << 4); + } + /* Load control register 0/2 iff changed */ + cr0_changed = cr0_new != cr0_old; + cr2_changed = cr2_new != cr2_old; + if (cr0_changed) + __ctl_load(cr0_new, 0, 0); + if (cr2_changed) + __ctl_load(cr2_new, 2, 2); /* Copy user specified PER registers */ new.control = thread->per_user.control; new.start = thread->per_user.start; @@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target, data, 0, sizeof(unsigned int)); } +static int s390_gs_cb_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + +static int s390_gs_cb_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct gs_cb *data = target->thread.gs_cb; + + if (!MACHINE_HAS_GS) + return -ENODEV; + if (!data) + return -ENODATA; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + data, 0, sizeof(struct gs_cb)); +} + static const struct user_regset s390_regsets[] = { { .core_note_type = NT_PRSTATUS, @@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = { .get = s390_vxrs_high_get, .set = s390_vxrs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_view = { @@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = { .get = s390_compat_regs_high_get, .set = s390_compat_regs_high_set, }, + { + .core_note_type = NT_S390_GS_CB, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_cb_get, + .set = s390_gs_cb_set, + }, }; static const struct user_regset_view user_s390_compat_view = { diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 911dc0b49be0..3ae756c0db3d 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -339,9 +339,15 @@ static void __init setup_lowcore(void) lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, MAX_FACILITY_BIT/8); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + unsigned long bits, size; + + bits = MACHINE_HAS_GS ? 11 : 10; + size = 1UL << bits; + lc->mcesad = (__u64) memblock_virt_alloc(size, size); + if (MACHINE_HAS_GS) + lc->mcesad |= bits; + } lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; lc->sync_enter_timer = S390_lowcore.sync_enter_timer; lc->async_enter_timer = S390_lowcore.async_enter_timer; @@ -779,6 +785,12 @@ static int __init setup_hwcaps(void) elf_hwcap |= HWCAP_S390_VXRS_BCD; } + /* + * Guarded storage support HWCAP_S390_GS is bit 12. + */ + if (MACHINE_HAS_GS) + elf_hwcap |= HWCAP_S390_GS; + get_cpu_id(&cpu_id); add_device_randomness(&cpu_id, sizeof(cpu_id)); switch (cpu_id.machine) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 47a973b5b4f1..286bcee800f4 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "entry.h" enum { @@ -78,6 +79,8 @@ struct pcpu { static u8 boot_core_type; static struct pcpu pcpu_devices[NR_CPUS]; +static struct kmem_cache *pcpu_mcesa_cache; + unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, panic_stack; + unsigned long mcesa_origin, mcesa_bits; struct lowcore *lc; + mcesa_origin = mcesa_bits = 0; if (pcpu != &pcpu_devices[0]) { pcpu->lowcore = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); @@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) panic_stack = __get_free_page(GFP_KERNEL); if (!pcpu->lowcore || !panic_stack || !async_stack) goto out; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = (unsigned long) + kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL); + if (!mcesa_origin) + goto out; + mcesa_bits = MACHINE_HAS_GS ? 11 : 0; + } } else { async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET; + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK; } lc = pcpu->lowcore; memcpy(lc, &S390_lowcore, 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512); lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; + lc->mcesad = mcesa_origin | mcesa_bits; lc->cpu_nr = cpu; lc->spinlock_lockval = arch_spin_lockval(cpu); - if (MACHINE_HAS_VX) - lc->vector_save_area_addr = - (unsigned long) &lc->vector_save_area; if (vdso_alloc_per_cpu(lc)) goto out; lowcore_ptr[cpu] = lc; @@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) return 0; out: if (pcpu != &pcpu_devices[0]) { + if (mcesa_origin) + kmem_cache_free(pcpu_mcesa_cache, + (void *) mcesa_origin); free_page(panic_stack); free_pages(async_stack, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -229,11 +244,17 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) static void pcpu_free_lowcore(struct pcpu *pcpu) { + unsigned long mcesa_origin; + pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); lowcore_ptr[pcpu - pcpu_devices] = NULL; vdso_free_per_cpu(pcpu->lowcore); if (pcpu == &pcpu_devices[0]) return; + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK; + kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin); + } free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER); @@ -550,9 +571,11 @@ int smp_store_status(int cpu) if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; - if (!MACHINE_HAS_VX) + if (!MACHINE_HAS_VX && !MACHINE_HAS_GS) return 0; - pa = __pa(pcpu->lowcore->vector_save_area_addr); + pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK); + if (MACHINE_HAS_GS) + pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK; if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS, pa) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void) void __init smp_prepare_cpus(unsigned int max_cpus) { + unsigned long size; + /* request the 0x1201 emergency signal external interrupt */ if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1201"); /* request the 0x1202 external call external interrupt */ if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt)) panic("Couldn't request external interrupt 0x1202"); + /* create slab cache for the machine-check-extended-save-areas */ + if (MACHINE_HAS_VX || MACHINE_HAS_GS) { + size = 1UL << (MACHINE_HAS_GS ? 11 : 10); + pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas", + size, size, 0, NULL); + if (!pcpu_mcesa_cache) + panic("Couldn't create nmi save area cache"); + } } void __init smp_prepare_boot_cpu(void) diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2659b5cfeddb..54fce7b065de 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2) SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */ SYSCALL(sys_preadv2,compat_sys_preadv2) SYSCALL(sys_pwritev2,compat_sys_pwritev2) -NI_SYSCALL +SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */ SYSCALL(sys_statx,compat_sys_statx) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0f8f14199734..169558dc7daf 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -420,8 +420,8 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, save_access_regs(vcpu->run->s.regs.acrs); /* Extended save area */ - rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr, - sizeof(unsigned long)); + rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, + sizeof(unsigned long)); /* Only bits 0-53 are used for address formation */ ext_sa_addr &= ~0x3ffUL; if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b59ee077a596..8c6d3bdb9a00 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -409,6 +409,7 @@ typedef struct elf64_shdr { #define NT_S390_TDB 0x308 /* s390 transaction diagnostic block */ #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 upper half */ #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ +#define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ #define NT_ARM_TLS 0x401 /* ARM TLS register */ #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */