From 2a596fc9d974bb040eda9ab70bf8756fcaaa6afe Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 10 Jul 2017 16:55:04 +1000 Subject: [PATCH 01/98] drm/sun4i: Implement drm_driver lastclose to restore fbdev console The drm_driver lastclose callback is called when the last userspace DRM client has closed. Call drm_fbdev_cma_restore_mode to restore the fbdev console otherwise the fbdev console will stop working. Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support") Cc: stable@vger.kernel.org Tested-by: Olliver Schinagl Reviewed-by: Chen-Yu Tsai Signed-off-by: Jonathan Liu Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index abc7d8fe06b4..a45a627283a1 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -25,12 +25,20 @@ #include "sun4i_framebuffer.h" #include "sun4i_tcon.h" +static void sun4i_drv_lastclose(struct drm_device *dev) +{ + struct sun4i_drv *drv = dev->dev_private; + + drm_fbdev_cma_restore_mode(drv->fbdev); +} + DEFINE_DRM_GEM_CMA_FOPS(sun4i_drv_fops); static struct drm_driver sun4i_drv_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_ATOMIC, /* Generic Operations */ + .lastclose = sun4i_drv_lastclose, .fops = &sun4i_drv_fops, .name = "sun4i-drm", .desc = "Allwinner sun4i Display Engine", From bc240eec4b074f5dc2753f295e980e66b72c90fb Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 26 Jun 2017 13:50:41 -0600 Subject: [PATCH 02/98] ntb: use correct mw_count function in ntb_tool and ntb_transport After converting to the new API, both ntb_tool and ntb_transport are using ntb_mw_count to iterate through ntb_peer_get_addr when they should be using ntb_peer_mw_count. This probably isn't an issue with the Intel and AMD drivers but this will matter for any future driver with asymetric memory window counts. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason Fixes: 443b9a14ecbe ("NTB: Alter MW API to support multi-ports devices") --- drivers/ntb/ntb_transport.c | 2 +- drivers/ntb/test/ntb_tool.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 9a03c5871efe..b29558ddfe95 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1059,7 +1059,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) int node; int rc, i; - mw_count = ntb_mw_count(ndev, PIDX); + mw_count = ntb_peer_mw_count(ndev); if (!ndev->ops->mw_set_trans) { dev_err(&ndev->dev, "Inbound MW based NTB API is required\n"); diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index f002bf48a08d..a69815c45ce6 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -959,7 +959,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) tc->ntb = ntb; init_waitqueue_head(&tc->link_wq); - tc->mw_count = min(ntb_mw_count(tc->ntb, PIDX), MAX_MWS); + tc->mw_count = min(ntb_peer_mw_count(tc->ntb), MAX_MWS); for (i = 0; i < tc->mw_count; i++) { rc = tool_init_mw(tc, i); if (rc) From eb92b4183d93a6f101a6bd3aaae651de404c119a Mon Sep 17 00:00:00 2001 From: Andreas Klinger Date: Tue, 25 Jul 2017 11:11:14 +0200 Subject: [PATCH 03/98] iio: bmp280: properly initialize device for humidity reading If the device is not initialized at least once it happens that the humidity reading is skipped, which means the special value 0x8000 is delivered. For omitting this case the oversampling of the humidity must be set before the oversampling of the temperature und pressure is set as written in the datasheet of the BME280. Furthermore proper error detection is added in case a skipped value is read from the device. This is done also for pressure and temperature reading. Especially it don't make sense to compensate this value and treat it as regular value. Signed-off-by: Andreas Klinger Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 27 ++++++++++++++++++++++++--- drivers/iio/pressure/bmp280.h | 5 +++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index d82b788374b6..0d2ea3ee371b 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -282,6 +282,11 @@ static int bmp280_read_temp(struct bmp280_data *data, } adc_temp = be32_to_cpu(tmp) >> 12; + if (adc_temp == BMP280_TEMP_SKIPPED) { + /* reading was skipped */ + dev_err(data->dev, "reading temperature skipped\n"); + return -EIO; + } comp_temp = bmp280_compensate_temp(data, adc_temp); /* @@ -317,6 +322,11 @@ static int bmp280_read_press(struct bmp280_data *data, } adc_press = be32_to_cpu(tmp) >> 12; + if (adc_press == BMP280_PRESS_SKIPPED) { + /* reading was skipped */ + dev_err(data->dev, "reading pressure skipped\n"); + return -EIO; + } comp_press = bmp280_compensate_press(data, adc_press); *val = comp_press; @@ -345,6 +355,11 @@ static int bmp280_read_humid(struct bmp280_data *data, int *val, int *val2) } adc_humidity = be16_to_cpu(tmp); + if (adc_humidity == BMP280_HUMIDITY_SKIPPED) { + /* reading was skipped */ + dev_err(data->dev, "reading humidity skipped\n"); + return -EIO; + } comp_humidity = bmp280_compensate_humidity(data, adc_humidity); *val = comp_humidity; @@ -597,14 +612,20 @@ static const struct bmp280_chip_info bmp280_chip_info = { static int bme280_chip_config(struct bmp280_data *data) { - int ret = bmp280_chip_config(data); + int ret; u8 osrs = BMP280_OSRS_HUMIDITIY_X(data->oversampling_humid + 1); + /* + * Oversampling of humidity must be set before oversampling of + * temperature/pressure is set to become effective. + */ + ret = regmap_update_bits(data->regmap, BMP280_REG_CTRL_HUMIDITY, + BMP280_OSRS_HUMIDITY_MASK, osrs); + if (ret < 0) return ret; - return regmap_update_bits(data->regmap, BMP280_REG_CTRL_HUMIDITY, - BMP280_OSRS_HUMIDITY_MASK, osrs); + return bmp280_chip_config(data); } static const struct bmp280_chip_info bme280_chip_info = { diff --git a/drivers/iio/pressure/bmp280.h b/drivers/iio/pressure/bmp280.h index 2c770e13be0e..61347438b779 100644 --- a/drivers/iio/pressure/bmp280.h +++ b/drivers/iio/pressure/bmp280.h @@ -96,6 +96,11 @@ #define BME280_CHIP_ID 0x60 #define BMP280_SOFT_RESET_VAL 0xB6 +/* BMP280 register skipped special values */ +#define BMP280_TEMP_SKIPPED 0x80000 +#define BMP280_PRESS_SKIPPED 0x80000 +#define BMP280_HUMIDITY_SKIPPED 0x8000 + /* Regmap configurations */ extern const struct regmap_config bmp180_regmap_config; extern const struct regmap_config bmp280_regmap_config; From 50b39608efb1e60f334f9b59128fd6d970bfd5a6 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 27 Jul 2017 18:18:57 +0200 Subject: [PATCH 04/98] iio: trigger: stm32-timer: fix quadrature mode get routine Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device") SMS bitfiled is mode + 1. After reset, upon boot, SMS = 0. When reading from sysfs, stm32_get_quadrature_mode() returns -1 (e.g. -EPERM) which is wrong error code here. So, check SMS bitfiled matches valid encoder mode, or return -EINVAL. Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index d22bc56dd9fc..6aa73d6b2882 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -571,11 +571,14 @@ static int stm32_get_quadrature_mode(struct iio_dev *indio_dev, { struct stm32_timer_trigger *priv = iio_priv(indio_dev); u32 smcr; + int mode; regmap_read(priv->regmap, TIM_SMCR, &smcr); - smcr &= TIM_SMCR_SMS; + mode = (smcr & TIM_SMCR_SMS) - 1; + if ((mode < 0) || (mode > ARRAY_SIZE(stm32_quadrature_modes))) + return -EINVAL; - return smcr - 1; + return mode; } static const struct iio_enum stm32_quadrature_mode_enum = { From 1987a08cd989fd9e5690e90a04e70046e93315f4 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 27 Jul 2017 18:18:58 +0200 Subject: [PATCH 05/98] iio: trigger: stm32-timer: fix write_raw return value Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device") IIO core expects zero as return value for write_raw() callback in case of success. Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 6aa73d6b2882..107918b3a90b 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -406,9 +406,8 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: - regmap_write(priv->regmap, TIM_CNT, val); + return regmap_write(priv->regmap, TIM_CNT, val); - return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: /* fixed scale */ return -EINVAL; From 06e3fe89988b1c99a3d9953b1d3b1faf3f047017 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 27 Jul 2017 18:18:59 +0200 Subject: [PATCH 06/98] iio: trigger: stm32-timer: fix get/set down count direction Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device") This fixes two issues: - stm32_set_count_direction: to set down direction - stm32_get_count_direction: to get down direction IIO core provides/expects value to be an index of iio_enum items array. This needs to be turned by these routines into TIM_CR1_DIR (e.g. BIT(4)) value. Also, report error when attempting to write direction, when in encoder mode: in this case, direction is read only (given by encoder inputs). Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 107918b3a90b..d28aa02b85e8 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -594,13 +594,20 @@ static const char *const stm32_count_direction_states[] = { static int stm32_set_count_direction(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, - unsigned int mode) + unsigned int dir) { struct stm32_timer_trigger *priv = iio_priv(indio_dev); + u32 val; + int mode; - regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_DIR, mode); + /* In encoder mode, direction is RO (given by TI1/TI2 signals) */ + regmap_read(priv->regmap, TIM_SMCR, &val); + mode = (val & TIM_SMCR_SMS) - 1; + if ((mode >= 0) || (mode < ARRAY_SIZE(stm32_quadrature_modes))) + return -EBUSY; - return 0; + return regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_DIR, + dir ? TIM_CR1_DIR : 0); } static int stm32_get_count_direction(struct iio_dev *indio_dev, @@ -611,7 +618,7 @@ static int stm32_get_count_direction(struct iio_dev *indio_dev, regmap_read(priv->regmap, TIM_CR1, &cr1); - return (cr1 & TIM_CR1_DIR); + return ((cr1 & TIM_CR1_DIR) ? 1 : 0); } static const struct iio_enum stm32_count_direction_enum = { From 90938ca432e6b8f6bb1c22a24984738fc3d906ed Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 27 Jul 2017 18:19:00 +0200 Subject: [PATCH 07/98] iio: trigger: stm32-timer: add enable attribute In order to use encoder mode, timers needs to be enabled (e.g. CEN bit) along with peripheral clock. Add IIO_CHAN_INFO_ENABLE attribute to handle this. Also, in triggered mode, CEN bit is set automatically in hardware. Then clock must be enabled before starting triggered mode. Signed-off-by: Fabrice Gasnier Acked-by: Benjamin Gaignard Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 57 +++++++++++++++++------ 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index d28aa02b85e8..14e6eb04bbb0 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -366,34 +366,32 @@ static int stm32_counter_read_raw(struct iio_dev *indio_dev, int *val, int *val2, long mask) { struct stm32_timer_trigger *priv = iio_priv(indio_dev); + u32 dat; switch (mask) { case IIO_CHAN_INFO_RAW: - { - u32 cnt; - - regmap_read(priv->regmap, TIM_CNT, &cnt); - *val = cnt; - + regmap_read(priv->regmap, TIM_CNT, &dat); + *val = dat; return IIO_VAL_INT; - } - case IIO_CHAN_INFO_SCALE: - { - u32 smcr; - regmap_read(priv->regmap, TIM_SMCR, &smcr); - smcr &= TIM_SMCR_SMS; + case IIO_CHAN_INFO_ENABLE: + regmap_read(priv->regmap, TIM_CR1, &dat); + *val = (dat & TIM_CR1_CEN) ? 1 : 0; + return IIO_VAL_INT; + + case IIO_CHAN_INFO_SCALE: + regmap_read(priv->regmap, TIM_SMCR, &dat); + dat &= TIM_SMCR_SMS; *val = 1; *val2 = 0; /* in quadrature case scale = 0.25 */ - if (smcr == 3) + if (dat == 3) *val2 = 2; return IIO_VAL_FRACTIONAL_LOG2; } - } return -EINVAL; } @@ -403,6 +401,7 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev, int val, int val2, long mask) { struct stm32_timer_trigger *priv = iio_priv(indio_dev); + u32 dat; switch (mask) { case IIO_CHAN_INFO_RAW: @@ -411,6 +410,22 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_SCALE: /* fixed scale */ return -EINVAL; + + case IIO_CHAN_INFO_ENABLE: + if (val) { + regmap_read(priv->regmap, TIM_CR1, &dat); + if (!(dat & TIM_CR1_CEN)) + clk_enable(priv->clk); + regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, + TIM_CR1_CEN); + } else { + regmap_read(priv->regmap, TIM_CR1, &dat); + regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, + 0); + if (dat & TIM_CR1_CEN) + clk_disable(priv->clk); + } + return 0; } return -EINVAL; @@ -506,9 +521,19 @@ static int stm32_set_enable_mode(struct iio_dev *indio_dev, { struct stm32_timer_trigger *priv = iio_priv(indio_dev); int sms = stm32_enable_mode2sms(mode); + u32 val; if (sms < 0) return sms; + /* + * Triggered mode sets CEN bit automatically by hardware. So, first + * enable counter clock, so it can use it. Keeps it in sync with CEN. + */ + if (sms == 6) { + regmap_read(priv->regmap, TIM_CR1, &val); + if (!(val & TIM_CR1_CEN)) + clk_enable(priv->clk); + } regmap_update_bits(priv->regmap, TIM_SMCR, TIM_SMCR_SMS, sms); @@ -681,7 +706,9 @@ static const struct iio_chan_spec_ext_info stm32_trigger_count_info[] = { static const struct iio_chan_spec stm32_trigger_channel = { .type = IIO_COUNT, .channel = 0, - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE), + .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | + BIT(IIO_CHAN_INFO_ENABLE) | + BIT(IIO_CHAN_INFO_SCALE), .ext_info = stm32_trigger_count_info, .indexed = 1 }; From ff3aa88a4d61468baece3fc2bb54e2a3bea6360f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Wed, 26 Jul 2017 23:32:06 +0200 Subject: [PATCH 08/98] iio: adc: ina219: Avoid underflow for sleeping time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Proper support for the INA219 lowered the minimum sampling period from 2*140us to 2*84us. Subtracting 200us later leads to an underflow and an almost infinite udelay later. Using a signed int for the sampling period provides sufficient range (at most 2*8640*1024us), but catches the underflow when comparing with buffer_us. Fixes: 18edac2e22f4 ("iio: adc: Fix integration time/averaging for INA219/220") Signed-off-by: Stefan Brüns Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ina2xx-adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c index 232c0b80d658..c3f86138cb55 100644 --- a/drivers/iio/adc/ina2xx-adc.c +++ b/drivers/iio/adc/ina2xx-adc.c @@ -644,7 +644,7 @@ static int ina2xx_capture_thread(void *data) { struct iio_dev *indio_dev = data; struct ina2xx_chip_info *chip = iio_priv(indio_dev); - unsigned int sampling_us = SAMPLING_PERIOD(chip); + int sampling_us = SAMPLING_PERIOD(chip); int buffer_us; /* From 50dbe1f4b453b2860ef0e3d48054b9fd24d5ae97 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Mon, 24 Jul 2017 18:10:38 +0200 Subject: [PATCH 09/98] iio: adc: stm32: fix common clock rate ADC clock input is provided to internal prescaler (that decreases its frequency). It's then used as reference clock for conversions. - Fix common clock rate used then by stm32-adc sub-devices. Take common prescaler into account. Currently, rate is used to set "boost" mode. It may unnecessarily be set. This impacts power consumption. - Fix ADC max clock rate on STM32H7 (fADC from datasheet). Currently, prescaler may be set too low. This can result in ADC reference clock used for conversion to exceed max allowed clock frequency. Fixes: 95e339b6e85d ("iio: adc: stm32: add support for STM32H7") Signed-off-by: Fabrice Gasnier Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-adc-core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index e09233b03c05..609676384f5e 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -64,7 +64,7 @@ #define STM32H7_CKMODE_MASK GENMASK(17, 16) /* STM32 H7 maximum analog clock rate (from datasheet) */ -#define STM32H7_ADC_MAX_CLK_RATE 72000000 +#define STM32H7_ADC_MAX_CLK_RATE 36000000 /** * stm32_adc_common_regs - stm32 common registers, compatible dependent data @@ -148,14 +148,14 @@ static int stm32f4_adc_clk_sel(struct platform_device *pdev, return -EINVAL; } - priv->common.rate = rate; + priv->common.rate = rate / stm32f4_pclk_div[i]; val = readl_relaxed(priv->common.base + STM32F4_ADC_CCR); val &= ~STM32F4_ADC_ADCPRE_MASK; val |= i << STM32F4_ADC_ADCPRE_SHIFT; writel_relaxed(val, priv->common.base + STM32F4_ADC_CCR); dev_dbg(&pdev->dev, "Using analog clock source at %ld kHz\n", - rate / (stm32f4_pclk_div[i] * 1000)); + priv->common.rate / 1000); return 0; } @@ -250,7 +250,7 @@ static int stm32h7_adc_clk_sel(struct platform_device *pdev, out: /* rate used later by each ADC instance to control BOOST mode */ - priv->common.rate = rate; + priv->common.rate = rate / div; /* Set common clock mode and prescaler */ val = readl_relaxed(priv->common.base + STM32H7_ADC_CCR); @@ -260,7 +260,7 @@ static int stm32h7_adc_clk_sel(struct platform_device *pdev, writel_relaxed(val, priv->common.base + STM32H7_ADC_CCR); dev_dbg(&pdev->dev, "Using %s clock/%d source at %ld kHz\n", - ckmode ? "bus" : "adc", div, rate / (div * 1000)); + ckmode ? "bus" : "adc", div, priv->common.rate / 1000); return 0; } From f3fd2afed8eee91620d05b69ab94c14793c849d7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 28 Jul 2017 15:10:48 -0700 Subject: [PATCH 10/98] ntb: transport shouldn't disable link due to bogus values in SPADs It seems that under certain scenarios the SPAD can have bogus values caused by an agent (i.e. BIOS or other software) that is not the kernel driver, and that causes memory window setup failure. This should not cause the link to be disabled because if we do that, the driver will never recover again. We have verified in testing that this issue happens and prevents proper link recovery. Signed-off-by: Dave Jiang Acked-by: Allen Hubbe Signed-off-by: Jon Mason Fixes: 84f766855f61 ("ntb: stop link work when we do not have memory") --- drivers/ntb/ntb_transport.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index b29558ddfe95..f58d8e305323 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -924,10 +924,8 @@ static void ntb_transport_link_work(struct work_struct *work) ntb_free_mw(nt, i); /* if there's an actual failure, we should just bail */ - if (rc < 0) { - ntb_link_disable(ndev); + if (rc < 0) return; - } out: if (ntb_link_is_up(ndev, NULL, NULL) == 1) From 0eb46345364d7318b11068c46e8a68d5dc10f65e Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 25 Jul 2017 14:57:42 -0600 Subject: [PATCH 11/98] ntb: ntb_test: ensure the link is up before trying to configure the mws After the link tests, there is a race on one side of the test for the link coming up. It's possible, in some cases, for the test script to write to the 'peer_trans' files before the link has come up. To fix this, we simply use the link event file to ensure both sides see the link as up before continuning. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem") --- tools/testing/selftests/ntb/ntb_test.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh index 1c12b5855e4f..5fc7ad359e21 100755 --- a/tools/testing/selftests/ntb/ntb_test.sh +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -333,6 +333,10 @@ function ntb_tool_tests() link_test $LOCAL_TOOL $REMOTE_TOOL link_test $REMOTE_TOOL $LOCAL_TOOL + #Ensure the link is up on both sides before continuing + write_file Y $LOCAL_TOOL/link_event + write_file Y $REMOTE_TOOL/link_event + for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do PT=$(basename $PEER_TRANS) write_file $MW_SIZE $LOCAL_TOOL/$PT From 7f5770678b2d0cc8f3ffbf7eb73410f2acba7925 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 30 Jul 2017 21:10:44 +0300 Subject: [PATCH 12/98] dmaengine: tegra210-adma: fix of_irq_get() error check of_irq_get() may return 0 as well as negative error number on failure, while the driver only checks for the negative values. The driver would then call request_irq(0, ...) in tegra_adma_alloc_chan_resources() and never get valid channel interrupt. Check for 'tdc->irq <= 0' instead and return -ENXIO from the driver's probe iff of_irq_get() returned 0. Fixes: f46b195799b5 ("dmaengine: tegra-adma: Add support for Tegra210 ADMA") Signed-off-by: Sergei Shtylyov Acked-by: Thierry Reding Acked-by: Jon Hunter Signed-off-by: Vinod Koul --- drivers/dma/tegra210-adma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index b10cbaa82ff5..b26256f23d67 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -717,8 +717,8 @@ static int tegra_adma_probe(struct platform_device *pdev) tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i); tdc->irq = of_irq_get(pdev->dev.of_node, i); - if (tdc->irq < 0) { - ret = tdc->irq; + if (tdc->irq <= 0) { + ret = tdc->irq ?: -ENXIO; goto irq_dispose; } From 0fa375e6bc9023211eead30a6a79963c45a563da Mon Sep 17 00:00:00 2001 From: Jeffy Chen Date: Wed, 9 Aug 2017 18:41:03 +0800 Subject: [PATCH 13/98] drm/rockchip: Fix suspend crash when drm is not bound Currently we are allocating drm_device in rockchip_drm_bind, so if the suspend/resume code access it when drm is not bound, we would hit this crash: [ 253.402836] Unable to handle kernel NULL pointer dereference at virtual address 00000028 [ 253.402837] pgd = ffffffc06c9b0000 [ 253.402841] [00000028] *pgd=0000000000000000, *pud=0000000000000000 [ 253.402844] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 253.402859] Modules linked in: btusb btrtl btbcm btintel bluetooth ath10k_pci ath10k_core ar10k_ath ar10k_mac80211 cfg80211 ip6table_filter asix usbnet mii [ 253.402864] CPU: 4 PID: 1331 Comm: cat Not tainted 4.4.70 #15 [ 253.402865] Hardware name: Google Scarlet (DT) [ 253.402867] task: ffffffc076c0ce00 ti: ffffffc06c2c8000 task.ti: ffffffc06c2c8000 [ 253.402871] PC is at rockchip_drm_sys_suspend+0x20/0x5c Add sanity checks to prevent that. Reported-by: Brian Norris Signed-off-by: Jeffy Chen Signed-off-by: Sean Paul Link: https://patchwork.kernel.org/patch/9890297/ --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index c6b1b7f3a2a3..c16bc0a7115b 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -275,11 +275,15 @@ static void rockchip_drm_fb_resume(struct drm_device *drm) static int rockchip_drm_sys_suspend(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); - struct rockchip_drm_private *priv = drm->dev_private; + struct rockchip_drm_private *priv; + + if (!drm) + return 0; drm_kms_helper_poll_disable(drm); rockchip_drm_fb_suspend(drm); + priv = drm->dev_private; priv->state = drm_atomic_helper_suspend(drm); if (IS_ERR(priv->state)) { rockchip_drm_fb_resume(drm); @@ -293,8 +297,12 @@ static int rockchip_drm_sys_suspend(struct device *dev) static int rockchip_drm_sys_resume(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); - struct rockchip_drm_private *priv = drm->dev_private; + struct rockchip_drm_private *priv; + if (!drm) + return 0; + + priv = drm->dev_private; drm_atomic_helper_resume(drm, priv->state); rockchip_drm_fb_resume(drm); drm_kms_helper_poll_enable(drm); From 2406b296a3a80ba1c78707f205556f2388d474ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 21 Jul 2017 22:56:19 +0200 Subject: [PATCH 14/98] gpu: ipu-v3: add DRM dependency The new PRE/PRG driver code causes a link failure when IPUv3 is built-in, but DRM is built as a module: drivers/gpu/ipu-v3/ipu-pre.o: In function `ipu_pre_configure': ipu-pre.c:(.text.ipu_pre_configure+0x18): undefined reference to `drm_format_info' drivers/gpu/ipu-v3/ipu-prg.o: In function `ipu_prg_format_supported': ipu-prg.c:(.text.ipu_prg_format_supported+0x8): undefined reference to `drm_format_info' Adding a Kconfig dependency on DRM means we don't run into this problem any more. If DRM is disabled altogether, the IPUv3 driver is built without PRE/PRG support. Fixes: ea9c260514c1 ("gpu: ipu-v3: add driver for Prefetch Resolve Gasket") Link: https://patchwork.kernel.org/patch/9636665/ Signed-off-by: Arnd Bergmann [p.zabel@pengutronix.de: changed the dependency from DRM to DRM || !DRM, since the link failure only happens when DRM=m and IPUV3_CORE=y. Modified the commit message to reflect this.] Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig index 08766c6e7856..87a20b3dcf7a 100644 --- a/drivers/gpu/ipu-v3/Kconfig +++ b/drivers/gpu/ipu-v3/Kconfig @@ -1,6 +1,7 @@ config IMX_IPUV3_CORE tristate "IPUv3 core support" depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM + depends on DRM || !DRM # if DRM=m, this can't be 'y' select GENERIC_IRQ_CHIP help Choose this if you have a i.MX5/6 system and want to use the Image From 5be5dd38d4628fdbff7359f235f7cdf0cf9655f1 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 5 Aug 2016 11:55:18 +0200 Subject: [PATCH 15/98] drm/imx: ipuv3-plane: fix YUV framebuffer scanout on the base plane Historically, only RGB framebuffers could be assigned to the primary plane. This changed with universal plane support. Since no colorspace conversion was set up for the IPUv3 full plane, assigning YUV frame buffers to the primary plane caused incorrect output. Fix this by enabling color space conversion also for the primary plane. Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 6276bb834b4f..d3845989a29d 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -545,15 +545,13 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, return; } + ics = ipu_drm_fourcc_to_colorspace(fb->format->format); switch (ipu_plane->dp_flow) { case IPU_DP_FLOW_SYNC_BG: - ipu_dp_setup_channel(ipu_plane->dp, - IPUV3_COLORSPACE_RGB, - IPUV3_COLORSPACE_RGB); + ipu_dp_setup_channel(ipu_plane->dp, ics, IPUV3_COLORSPACE_RGB); ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true); break; case IPU_DP_FLOW_SYNC_FG: - ics = ipu_drm_fourcc_to_colorspace(state->fb->format->format); ipu_dp_setup_channel(ipu_plane->dp, ics, IPUV3_COLORSPACE_UNKNOWN); /* Enable local alpha on partial plane */ From 491ab4700d1b64f5cf2f9055e01613a923df5fab Mon Sep 17 00:00:00 2001 From: Nikhil Mahale Date: Wed, 9 Aug 2017 09:23:01 +0530 Subject: [PATCH 16/98] drm: Fix framebuffer leak Do not leak framebuffer if client provided crtc id found invalid. Signed-off-by: Nikhil Mahale Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1502250781-5779-1-git-send-email-nmahale@nvidia.com --- drivers/gpu/drm/drm_plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 5dc8c4350602..e40c12fabbde 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -601,6 +601,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data, crtc = drm_crtc_find(dev, plane_req->crtc_id); if (!crtc) { + drm_framebuffer_put(fb); DRM_DEBUG_KMS("Unknown crtc ID %d\n", plane_req->crtc_id); return -ENOENT; From 7f5d6dac548b983702dd7aac1d463bd88dff50a8 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 14 Aug 2017 12:07:21 +0200 Subject: [PATCH 17/98] drm/atomic: Handle -EDEADLK with out-fences correctly complete_crtc_signaling is freeing fence_state, but when retrying num_fences and fence_state are not zero'd. This caused duplicate fd's in the fence_state array, followed by a BUG_ON in fs/file.c because we reallocate freed memory, and installing over an existing fd, or potential other fun. Zero fence_state and num_fences correctly in the retry loop, which allows kms_atomic_transition to pass. Fixes: beaf5af48034 ("drm/fence: add out-fences support") Cc: Gustavo Padovan Cc: Brian Starkey (v10) Cc: Sean Paul Cc: Daniel Vetter Cc: Jani Nikula Cc: David Airlie Signed-off-by: Maarten Lankhorst Cc: # v4.10+ Testcase: kms_atomic_transitions.plane-all-modeset-transition-fencing (with CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y) Link: https://patchwork.freedesktop.org/patch/msgid/20170814100721.13340-1-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter #intel-gfx on irc --- drivers/gpu/drm/drm_atomic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index c0f336d23f9c..b43939f24812 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -2167,10 +2167,10 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, struct drm_atomic_state *state; struct drm_modeset_acquire_ctx ctx; struct drm_plane *plane; - struct drm_out_fence_state *fence_state = NULL; + struct drm_out_fence_state *fence_state; unsigned plane_mask; int ret = 0; - unsigned int i, j, num_fences = 0; + unsigned int i, j, num_fences; /* disallow for drivers not supporting atomic: */ if (!drm_core_check_feature(dev, DRIVER_ATOMIC)) @@ -2211,6 +2211,8 @@ int drm_mode_atomic_ioctl(struct drm_device *dev, plane_mask = 0; copied_objs = 0; copied_props = 0; + fence_state = NULL; + num_fences = 0; for (i = 0; i < arg->count_objs; i++) { uint32_t obj_id, count_props; From a23318feeff662c8d25d21623daebdd2e55ec221 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 9 Aug 2017 15:28:22 +0200 Subject: [PATCH 18/98] i2c: designware: Fix system suspend The commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming during system suspend"), may suggest to the PM core to try out the so called direct_complete path for system sleep. In this path, the PM core treats a runtime suspended device as it's already in a proper low power state for system sleep, which makes it skip calling the system sleep callbacks for the device, except for the ->prepare() and the ->complete() callbacks. However, the PM core may unset the direct_complete flag for a parent device, in case its child device are being system suspended before. In this scenario, the PM core invokes the system sleep callbacks, no matter if the device is runtime suspended or not. Particularly in cases of an existing i2c slave device, the above path is triggered, which breaks the assumption that the i2c device is always runtime resumed whenever the dw_i2c_plat_suspend() is being called. More precisely, dw_i2c_plat_suspend() calls clk_core_disable() and clk_core_unprepare(), for an already disabled/unprepared clock, leading to a splat in the log about clocks calls being wrongly balanced and breaking system sleep. To still allow the direct_complete path in cases when it's possible, but also to keep the fix simple, let's runtime resume the i2c device in the ->suspend() callback, before continuing to put the device into low power state. Note, in cases when the i2c device is attached to the ACPI PM domain, this problem doesn't occur, because ACPI's ->suspend() callback, assigned to acpi_subsys_suspend(), already calls pm_runtime_resume() for the device. It should also be noted that this change does not fix commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming during system suspend"). Because for the non-ACPI case, the system sleep support was already broken prior that point. Cc: # v4.4+ Signed-off-by: Ulf Hansson Acked-by: Rafael J. Wysocki Tested-by: John Stultz Tested-by: Jarkko Nikula Acked-by: Jarkko Nikula Reviewed-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 143a8fd582b4..441afc715a90 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -430,7 +430,7 @@ static void dw_i2c_plat_complete(struct device *dev) #endif #ifdef CONFIG_PM -static int dw_i2c_plat_suspend(struct device *dev) +static int dw_i2c_plat_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev); @@ -452,11 +452,21 @@ static int dw_i2c_plat_resume(struct device *dev) return 0; } +#ifdef CONFIG_PM_SLEEP +static int dw_i2c_plat_suspend(struct device *dev) +{ + pm_runtime_resume(dev); + return dw_i2c_plat_runtime_suspend(dev); +} +#endif + static const struct dev_pm_ops dw_i2c_dev_pm_ops = { .prepare = dw_i2c_plat_prepare, .complete = dw_i2c_plat_complete, SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume) - SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL) + SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend, + dw_i2c_plat_resume, + NULL) }; #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops) From 984277a041d5ee4a65aaadf0307d67a7c401e11c Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 11 Aug 2017 14:44:55 +0300 Subject: [PATCH 19/98] i2c: designware: Fix oops from i2c_dw_irq_handler_slave When i2c-designware is initialized in slave mode the i2c-designware-slave.c: i2c_dw_irq_handler_slave() can hit a NULL pointer dereference when I2C slave backend is not registered but code is accessing the struct dw_i2c_dev.slave without testing is it NULL. We might get spurious interrupts from other devices or from IRQ core during unloading the driver when CONFIG_DEBUG_SHIRQ is set. Existing check for enable and IRQ status is not enough since device can be power gated and those bits may read 1. Fix this by handling the interrupt only when also struct dw_i2c_dev.slave is set. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 0548c7ea578c..4b62a3872763 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -272,7 +272,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) slave_activity = ((dw_readl(dev, DW_IC_STATUS) & DW_IC_STATUS_SLAVE_ACTIVITY) >> 6); - if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY)) + if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY) || !dev->slave) return 0; dev_dbg(dev->dev, From 4e2d93de070ceaca5097f7ee5c311731b83208a0 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Wed, 9 Aug 2017 15:24:44 +0300 Subject: [PATCH 20/98] i2c: designware: Fix standard mode speed when configuring the slave mode Code sets bit DW_IC_CON_SPEED_FAST (0x4) always when configuring the slave mode. This results incorrect register value DW_IC_CON_SPEED_HIGH (0x6) when OR'ed together with DW_IC_CON_SPEED_STD (0x2). Remove this and let the code set the speed mode bits according to clock frequency or default to fast mode. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-platdrv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 441afc715a90..57248bccadbc 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -198,8 +198,7 @@ static void i2c_dw_configure_slave(struct dw_i2c_dev *dev) dev->functionality = I2C_FUNC_SLAVE | DW_IC_DEFAULT_FUNCTIONALITY; dev->slave_cfg = DW_IC_CON_RX_FIFO_FULL_HLD_CTRL | - DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED | - DW_IC_CON_SPEED_FAST; + DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED; dev->mode = DW_IC_SLAVE; From f4b17a14faeec4160f97ad75ea7534f571f12404 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 9 Aug 2017 11:21:28 +0200 Subject: [PATCH 21/98] i2c: core: Make comment about I2C table requirement to reflect the code I2C drivers were required to have an I2C device ID table even if were for devices that would only be registered using a specific firmware interface (e.g: OF or ACPI). But commit da10c06a044b ("i2c: Make I2C ID tables non-mandatory for DT'ed devices") changed the I2C core to relax the requirement and allow drivers to avoid defining this table. Unfortunately it only took into account drivers for OF-only devices and forgot about ACPI-only ones, and this was fixed by commit c64ffff7a9d1 ("i2c: core: Allow empty id_table in ACPI case as well"). But the latter didn't update the original comment, so it doesn't reflect what the code does now. Signed-off-by: Javier Martinez Canillas Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 12822a4b8f8f..56e46581b84b 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -353,8 +353,8 @@ static int i2c_device_probe(struct device *dev) } /* - * An I2C ID table is not mandatory, if and only if, a suitable Device - * Tree match table entry is supplied for the probing device. + * An I2C ID table is not mandatory, if and only if, a suitable OF + * or ACPI ID table is supplied for the probing device. */ if (!driver->id_table && !i2c_acpi_match_device(dev->driver->acpi_match_table, client) && From 42543aeb48e3701b49e0a83654248afc38feb88f Mon Sep 17 00:00:00 2001 From: Anton Vasilyev Date: Thu, 10 Aug 2017 18:15:45 +0300 Subject: [PATCH 22/98] i2c: simtec: use release_mem_region instead of release_resource Use api pair of request_mem_region and release_mem_region instead of release_resource. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Anton Vasilyev Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-simtec.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-simtec.c b/drivers/i2c/busses/i2c-simtec.c index b4685bb9b5d7..adca51a99487 100644 --- a/drivers/i2c/busses/i2c-simtec.c +++ b/drivers/i2c/busses/i2c-simtec.c @@ -127,8 +127,7 @@ static int simtec_i2c_probe(struct platform_device *dev) iounmap(pd->reg); err_res: - release_resource(pd->ioarea); - kfree(pd->ioarea); + release_mem_region(pd->ioarea->start, size); err: kfree(pd); @@ -142,8 +141,7 @@ static int simtec_i2c_remove(struct platform_device *dev) i2c_del_adapter(&pd->adap); iounmap(pd->reg); - release_resource(pd->ioarea); - kfree(pd->ioarea); + release_mem_region(pd->ioarea->start, resource_size(pd->ioarea)); kfree(pd); return 0; From f1c0b7e448b9e66dd9a7343bab58a3d3a477e104 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Fri, 28 Jul 2017 18:00:12 -0700 Subject: [PATCH 23/98] i2c: aspeed: fixed potential null pointer dereference Before I skipped null checks when the master is in the STOP state; this fixes that. Signed-off-by: Brendan Higgins Acked-by: Joel Stanley Signed-off-by: Wolfram Sang Fixes: f327c686d3ba ("i2c: aspeed: added driver for Aspeed I2C") --- drivers/i2c/busses/i2c-aspeed.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index f19348328a71..6fdf9231c23c 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -410,10 +410,11 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus) } /* We are in an invalid state; reset bus to a known state. */ - if (!bus->msgs && bus->master_state != ASPEED_I2C_MASTER_STOP) { + if (!bus->msgs) { dev_err(bus->dev, "bus in unknown state"); bus->cmd_err = -EIO; - aspeed_i2c_do_stop(bus); + if (bus->master_state != ASPEED_I2C_MASTER_STOP) + aspeed_i2c_do_stop(bus); goto out_no_complete; } msg = &bus->msgs[bus->msgs_index]; From a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 15 Aug 2017 11:57:06 +0200 Subject: [PATCH 24/98] drm/atomic: If the atomic check fails, return its value first The last part of drm_atomic_check_only is testing whether we need to fail with -EINVAL when modeset is not allowed, but forgets to return the value when atomic_check() fails first. This results in -EDEADLK being replaced by -EINVAL, and the sanity check in drm_modeset_drop_locks kicks in: [ 308.531734] ------------[ cut here ]------------ [ 308.531791] WARNING: CPU: 0 PID: 1886 at drivers/gpu/drm/drm_modeset_lock.c:217 drm_modeset_drop_locks+0x33/0xc0 [drm] [ 308.531828] Modules linked in: [ 308.532050] CPU: 0 PID: 1886 Comm: kms_atomic Tainted: G U W 4.13.0-rc5-patser+ #5225 [ 308.532082] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015 [ 308.532124] task: ffff8800cd9dae00 task.stack: ffff8800ca3b8000 [ 308.532168] RIP: 0010:drm_modeset_drop_locks+0x33/0xc0 [drm] [ 308.532189] RSP: 0018:ffff8800ca3bf980 EFLAGS: 00010282 [ 308.532211] RAX: dffffc0000000000 RBX: ffff8800ca3bfaf8 RCX: 0000000013a171e6 [ 308.532235] RDX: 1ffff10019477f69 RSI: ffffffffa8ba4fa0 RDI: ffff8800ca3bfb48 [ 308.532258] RBP: ffff8800ca3bf998 R08: 0000000000000000 R09: 0000000000000003 [ 308.532281] R10: 0000000079dbe066 R11: 00000000f760b34b R12: 0000000000000001 [ 308.532304] R13: dffffc0000000000 R14: 00000000ffffffea R15: ffff880096889680 [ 308.532328] FS: 00007ff00959cec0(0000) GS:ffff8800d4e00000(0000) knlGS:0000000000000000 [ 308.532359] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 308.532380] CR2: 0000000000000008 CR3: 00000000ca2e3000 CR4: 00000000003406f0 [ 308.532402] Call Trace: [ 308.532440] drm_mode_atomic_ioctl+0x19fa/0x1c00 [drm] [ 308.532488] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532565] ? avc_has_extended_perms+0xc39/0xff0 [ 308.532593] ? lock_downgrade+0x610/0x610 [ 308.532640] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532680] drm_ioctl_kernel+0x154/0x1a0 [drm] [ 308.532755] drm_ioctl+0x624/0x8f0 [drm] [ 308.532858] ? drm_atomic_set_property+0x1220/0x1220 [drm] [ 308.532976] ? drm_getunique+0x210/0x210 [drm] [ 308.533061] do_vfs_ioctl+0xd92/0xe40 [ 308.533121] ? ioctl_preallocate+0x1b0/0x1b0 [ 308.533160] ? selinux_capable+0x20/0x20 [ 308.533191] ? do_fcntl+0x1b1/0xbf0 [ 308.533219] ? kasan_slab_free+0xa2/0xb0 [ 308.533249] ? f_getown+0x4b/0xa0 [ 308.533278] ? putname+0xcf/0xe0 [ 308.533309] ? security_file_ioctl+0x57/0x90 [ 308.533342] SyS_ioctl+0x4e/0x80 [ 308.533374] entry_SYSCALL_64_fastpath+0x18/0xad [ 308.533405] RIP: 0033:0x7ff00779e4d7 [ 308.533431] RSP: 002b:00007fff66a043d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 308.533481] RAX: ffffffffffffffda RBX: 000000e7c7ca5910 RCX: 00007ff00779e4d7 [ 308.533560] RDX: 00007fff66a04430 RSI: 00000000c03864bc RDI: 0000000000000003 [ 308.533608] RBP: 00007ff007a5fb00 R08: 000000e7c7ca4620 R09: 000000e7c7ca5e60 [ 308.533647] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000070 [ 308.533685] R13: 0000000000000000 R14: 0000000000000000 R15: 000000e7c7ca5930 [ 308.533770] Code: ff df 55 48 89 e5 41 55 41 54 53 48 89 fb 48 83 c7 50 48 89 fa 48 c1 ea 03 80 3c 02 00 74 05 e8 94 d4 16 e7 48 83 7b 50 00 74 02 <0f> ff 4c 8d 6b 58 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1 [ 308.534086] ---[ end trace 77f11e53b1df44ad ]--- Solve this by adding the missing return. This is also a bugfix because we could end up rejecting updates with -EINVAL because of a early -EDEADLK, while if atomic_check ran to completion it might have downgraded the modeset to a fastset. Signed-off-by: Maarten Lankhorst Testcase: kms_atomic Link: https://patchwork.freedesktop.org/patch/msgid/20170815095706.23624-1-maarten.lankhorst@linux.intel.com Fixes: d34f20d6e2f2 ("drm: Atomic modeset ioctl") Cc: # v4.0+ Reviewed-by: Daniel Vetter --- drivers/gpu/drm/drm_atomic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index b43939f24812..aed25c4183bb 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1655,6 +1655,9 @@ int drm_atomic_check_only(struct drm_atomic_state *state) if (config->funcs->atomic_check) ret = config->funcs->atomic_check(state->dev, state); + if (ret) + return ret; + if (!state->allow_modeset) { for_each_new_crtc_in_state(state, crtc, crtc_state, i) { if (drm_atomic_crtc_needs_modeset(crtc_state)) { @@ -1665,7 +1668,7 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - return ret; + return 0; } EXPORT_SYMBOL(drm_atomic_check_only); From 2926a2aa5c14fb2add75e6584845b1c03022235f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 14 Aug 2017 17:19:26 +0200 Subject: [PATCH 25/98] iommu: Fix wrong freeing of iommu_device->dev The struct iommu_device has a 'struct device' embedded into it, not as a pointer, but the whole struct. In the conversion of the iommu drivers to use struct iommu_device it was forgotten that the relase function for that struct device simply calls kfree() on the pointer. This frees memory that was never allocated and causes memory corruption. To fix this issue, use a pointer to struct device instead of embedding the whole struct. This needs some updates in the iommu sysfs code as well as the Intel VT-d and AMD IOMMU driver. Reported-by: Sebastian Ott Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device') Cc: stable@vger.kernel.org # >= v4.11 Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_types.h | 4 +++- drivers/iommu/intel-iommu.c | 4 +++- drivers/iommu/iommu-sysfs.c | 32 ++++++++++++++++++++------------ include/linux/iommu.h | 12 +++++++++++- 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 294a409e283b..d6b873b57054 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -574,7 +574,9 @@ struct amd_iommu { static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev) { - return container_of(dev, struct amd_iommu, iommu.dev); + struct iommu_device *iommu = dev_to_iommu_device(dev); + + return container_of(iommu, struct amd_iommu, iommu); } #define ACPIHID_UID_LEN 256 diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 687f18f65cea..3e8636f1220e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4736,7 +4736,9 @@ static void intel_disable_iommus(void) static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) { - return container_of(dev, struct intel_iommu, iommu.dev); + struct iommu_device *iommu_dev = dev_to_iommu_device(dev); + + return container_of(iommu_dev, struct intel_iommu, iommu); } static ssize_t intel_iommu_show_version(struct device *dev, diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c index c58351ed61c1..36d1a7ce7fc4 100644 --- a/drivers/iommu/iommu-sysfs.c +++ b/drivers/iommu/iommu-sysfs.c @@ -62,32 +62,40 @@ int iommu_device_sysfs_add(struct iommu_device *iommu, va_list vargs; int ret; - device_initialize(&iommu->dev); + iommu->dev = kzalloc(sizeof(*iommu->dev), GFP_KERNEL); + if (!iommu->dev) + return -ENOMEM; - iommu->dev.class = &iommu_class; - iommu->dev.parent = parent; - iommu->dev.groups = groups; + device_initialize(iommu->dev); + + iommu->dev->class = &iommu_class; + iommu->dev->parent = parent; + iommu->dev->groups = groups; va_start(vargs, fmt); - ret = kobject_set_name_vargs(&iommu->dev.kobj, fmt, vargs); + ret = kobject_set_name_vargs(&iommu->dev->kobj, fmt, vargs); va_end(vargs); if (ret) goto error; - ret = device_add(&iommu->dev); + ret = device_add(iommu->dev); if (ret) goto error; + dev_set_drvdata(iommu->dev, iommu); + return 0; error: - put_device(&iommu->dev); + put_device(iommu->dev); return ret; } void iommu_device_sysfs_remove(struct iommu_device *iommu) { - device_unregister(&iommu->dev); + dev_set_drvdata(iommu->dev, NULL); + device_unregister(iommu->dev); + iommu->dev = NULL; } /* * IOMMU drivers can indicate a device is managed by a given IOMMU using @@ -102,14 +110,14 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) if (!iommu || IS_ERR(iommu)) return -ENODEV; - ret = sysfs_add_link_to_group(&iommu->dev.kobj, "devices", + ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices", &link->kobj, dev_name(link)); if (ret) return ret; - ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev.kobj, "iommu"); + ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev->kobj, "iommu"); if (ret) - sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", + sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); return ret; @@ -121,5 +129,5 @@ void iommu_device_unlink(struct iommu_device *iommu, struct device *link) return; sysfs_remove_link(&link->kobj, "iommu"); - sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", dev_name(link)); + sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2cb54adc4a33..176f7569d874 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -240,7 +240,7 @@ struct iommu_device { struct list_head list; const struct iommu_ops *ops; struct fwnode_handle *fwnode; - struct device dev; + struct device *dev; }; int iommu_device_register(struct iommu_device *iommu); @@ -265,6 +265,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, iommu->fwnode = fwnode; } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return (struct iommu_device *)dev_get_drvdata(dev); +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -589,6 +594,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu, { } +static inline struct iommu_device *dev_to_iommu_device(struct device *dev) +{ + return NULL; +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } From 211b7aac5443d347ff7b6dabf54702b40228cfaf Mon Sep 17 00:00:00 2001 From: "Balasubramaniam, Hari Chand" Date: Tue, 15 Aug 2017 10:05:46 +0800 Subject: [PATCH 26/98] drm/i915: Initialize 'data' in intel_dsi_dcs_backlight.c variable 'data' may be used uninitialized in this function. thus, 'function dcs_get_backlight' will return unwanted value/fail. Thus, adding NULL initialized to 'data' variable will solve the return failure happening. v2: Change commit message to reflect upstream with proper message Fixes: 90198355b83c ("drm/i915/dsi: Add DCS control for Panel PWM") Cc: Jani Nikula Cc: Daniel Vetter Cc: Yetunde Adebisi Cc: Deepak M Cc: Jani Nikula Signed-off-by: Balasubramaniam, Hari Chand Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1502762746-191826-1-git-send-email-hari.chand.balasubramaniam@intel.com (cherry picked from commit d59814a5b4852442e1d03c569a4542f8b08356a7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c index 6e09ceb71500..150a156f3b1e 100644 --- a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c +++ b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c @@ -46,7 +46,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector) struct intel_encoder *encoder = connector->encoder; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); struct mipi_dsi_device *dsi_device; - u8 data; + u8 data = 0; enum port port; /* FIXME: Need to take care of 16 bit brightness level */ From 7c648bde211baeda7a029bd6be4957e8be48d8c9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 11 Aug 2017 14:39:07 +0300 Subject: [PATCH 27/98] drm/i915/vbt: ignore extraneous child devices for a port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since we've parsed VBT child devices, starting from 6acab15a7b0d ("drm/i915: use the HDMI DDI buffer translations from VBT"), we've ignored the child device information if more than one child device references the same port. The rationale for this seems lost in time. Since commit 311a20949f04 ("drm/i915: don't init DP or HDMI when not supported by DDI port") we started using this information more to skip HDMI/DP init if the port wasn't there per VBT child devices. However, at the same time it added port defaults without further explanation. Thus, if the child device info was skipped due to multiple child devices referencing the same port, the device info would be retrieved from the somewhat arbitrary defaults. Finally, when commit bb1d132935c2 ("drm/i915/vbt: split out defaults that are set when there is no VBT") stopped initializing the defaults whenever VBT is present, thus trusting the VBT more, we stopped initializing ports which were referenced by more than one child device. Apparently at least Asus UX305UA, UX305U, and UX306U laptops have VBT child device blocks which cause this behaviour. Arguably they were shipped with a broken VBT. Relax the rules for multiple references to the same port, and use the first child device info to reference a port. Retain the logic to debug log about this, though. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101745 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196233 Fixes: bb1d132935c2 ("drm/i915/vbt: split out defaults that are set when there is no VBT") Tested-by: Oliver Weißbarth Reported-by: Oliver Weißbarth Reported-by: Didier G Reported-by: Giles Anderson Cc: Manasi Navare Cc: Ville Syrjälä Cc: Paulo Zanoni Cc: # v4.12+ Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170811113907.6716-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit b5273d72750555a673040070bfb23c454a7cd3ef) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 639d45c1dd2e..7ea7fd1e8856 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1120,8 +1120,8 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, bool is_dvi, is_hdmi, is_dp, is_edp, is_crt; uint8_t aux_channel, ddc_pin; /* Each DDI port can have more than one value on the "DVO Port" field, - * so look for all the possible values for each port and abort if more - * than one is found. */ + * so look for all the possible values for each port. + */ int dvo_ports[][3] = { {DVO_PORT_HDMIA, DVO_PORT_DPA, -1}, {DVO_PORT_HDMIB, DVO_PORT_DPB, -1}, @@ -1130,7 +1130,10 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, {DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE}, }; - /* Find the child device to use, abort if more than one found. */ + /* + * Find the first child device to reference the port, report if more + * than one found. + */ for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { it = dev_priv->vbt.child_dev + i; @@ -1140,11 +1143,11 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (it->common.dvo_port == dvo_ports[port][j]) { if (child) { - DRM_DEBUG_KMS("More than one child device for port %c in VBT.\n", + DRM_DEBUG_KMS("More than one child device for port %c in VBT, using the first.\n", port_name(port)); - return; + } else { + child = it; } - child = it; } } } From 31c1a7b8f966470ce136710a95afcf5822fecef8 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 15 Aug 2017 20:04:03 -0700 Subject: [PATCH 28/98] drm/i915/cnl: Fix LSPCON support. When LSPCON support was extended to CNL one part was missed on lspcon_init. So, instead of adding check per platform on lspcon_init let's use HAS_LSPCON that is already there for that purpose. Fixes: ff15947e0f02 ("drm/i915/cnl: LSPCON support is gen9+") Cc: Shashank Sharma Cc: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Jani Nikula Reviewed-by: Shashank Sharma Link: https://patchwork.freedesktop.org/patch/msgid/20170816030403.11368-1-rodrigo.vivi@intel.com (cherry picked from commit acf58d4e965d40fc014252292b0911b4c9fe6697) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lspcon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 5abef482eacf..beb9baaf2f2e 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -210,8 +210,8 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (!IS_GEN9(dev_priv)) { - DRM_ERROR("LSPCON is supported on GEN9 only\n"); + if (!HAS_LSPCON(dev_priv)) { + DRM_ERROR("LSPCON is not supported on this platform\n"); return false; } From 47f078339be902e97d0ad828ca1d614a5a95334b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Aug 2017 18:21:40 +0100 Subject: [PATCH 29/98] Revert "staging: fsl-mc: be consistent when checking strcmp() return" The previous fix removed the equal to zero comparisons by the strcmps and now the function always returns true. Revert this change to restore the original correctly functioning code. Detected by CoverityScan, CID#1452267 ("Constant expression result") This reverts commit b93ad9a067e1515af42da7d56bc61f1a25075f94. Fixes: b93ad9a067e1 ("staging: fsl-mc: be consistent when checking strcmp() return") Signed-off-by: Colin Ian King Signed-off-by: Greg Kroah-Hartman --- drivers/staging/fsl-mc/bus/fsl-mc-allocator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c b/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c index b37a6f48225f..8ea3920400a0 100644 --- a/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c +++ b/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c @@ -16,9 +16,9 @@ static bool __must_check fsl_mc_is_allocatable(const char *obj_type) { - return strcmp(obj_type, "dpbp") || - strcmp(obj_type, "dpmcp") || - strcmp(obj_type, "dpcon"); + return strcmp(obj_type, "dpbp") == 0 || + strcmp(obj_type, "dpmcp") == 0 || + strcmp(obj_type, "dpcon") == 0; } /** From 733f6563979d96dec180c350abb8ac67cc0367ba Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 15 Aug 2017 17:34:44 +0300 Subject: [PATCH 30/98] i2c: designware: Remove needless pm_runtime_put_noidle() call I guess pm_runtime_put_noidle() call in i2c_dw_probe_slave() was copied by accident from similar master mode adapter registration code. It is unbalanced due missing pm_runtime_get_noresume() but harmless since it doesn't decrease dev->power.usage_count below zero. In theory we can hit similar needless runtime suspend/resume cycle during slave mode adapter registration that was happening when registering the master mode adapter. See commit cd998ded5c12 ("i2c: designware: Prevent runtime suspend during adapter registration"). However, since we are slave, we can consider it as a wrong configuration if we have other slaves attached under this adapter and can omit the pm_runtime_get_noresume()/pm_runtime_put_noidle() calls for simplicity. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 4b62a3872763..25fa33927124 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -382,7 +382,6 @@ int i2c_dw_probe_slave(struct dw_i2c_dev *dev) ret = i2c_add_numbered_adapter(adap); if (ret) dev_err(dev->dev, "failure adding adapter: %d\n", ret); - pm_runtime_put_noidle(dev->dev); return ret; } From 2a86cdd2e7d3c75580f41f89f9b9211e225573cc Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Tue, 15 Aug 2017 17:34:45 +0300 Subject: [PATCH 31/98] i2c: designware: Fix runtime PM for I2C slave mode I2C slave controller must be powered and active all the time when I2C slave backend is registered in order to let master address and communicate with us. Now if the controller is runtime PM capable it will be suspended after probe and cannot ever respond to the master or generate interrupts. Fix this by resuming the controller when I2C slave backend is registered and let it suspend after unregistering. Signed-off-by: Jarkko Nikula Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-slave.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 25fa33927124..78d8fb73927d 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -177,6 +177,8 @@ static int i2c_dw_reg_slave(struct i2c_client *slave) return -EBUSY; if (slave->flags & I2C_CLIENT_TEN) return -EAFNOSUPPORT; + pm_runtime_get_sync(dev->dev); + /* * Set slave address in the IC_SAR register, * the address to which the DW_apb_i2c responds. @@ -205,6 +207,7 @@ static int i2c_dw_unreg_slave(struct i2c_client *slave) dev->disable_int(dev); dev->disable(dev); dev->slave = NULL; + pm_runtime_put(dev->dev); return 0; } From 1a92a80ad386a1a6e3b36d576d52a1a456394b70 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 24 Jul 2017 14:28:00 +1000 Subject: [PATCH 32/98] powerpc/mm: Ensure cpumask update is ordered There is no guarantee that the various isync's involved with the context switch will order the update of the CPU mask with the first TLB entry for the new context being loaded by the HW. Be safe here and add a memory barrier to order any subsequent load/store which may bring entries into the TLB. The corresponding barrier on the other side already exists as pte updates use pte_xchg() which uses __cmpxchg_u64 which has a sync after the atomic operation. Cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Nicholas Piggin [mpe: Add comments in the code] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 18 ++++++++++++++++++ arch/powerpc/include/asm/pgtable-be-types.h | 1 + arch/powerpc/include/asm/pgtable-types.h | 1 + 3 files changed, 20 insertions(+) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 0c76675394c5..35bec1c5bd5a 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -90,6 +90,24 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + + /* + * This full barrier orders the store to the cpumask above vs + * a subsequent operation which allows this CPU to begin loading + * translations for next. + * + * When using the radix MMU that operation is the load of the + * MMU context id, which is then moved to SPRN_PID. + * + * For the hash MMU it is either the first load from slb_cache + * in switch_slb(), and/or the store of paca->mm_ctx_id in + * copy_mm_to_paca(). + * + * On the read side the barrier is in pte_xchg(), which orders + * the store to the PTE vs the load of mm_cpumask. + */ + smp_mb(); + new_on_cpu = true; } diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index 9c0f5db5cf46..67e7e3d990f4 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) unsigned long *p = (unsigned long *)ptep; __be64 prev; + /* See comment in switch_mm_irqs_off() */ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old), (__force unsigned long)pte_raw(new)); diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 8bd3b13fe2fb..369a164b545c 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) { unsigned long *p = (unsigned long *)ptep; + /* See comment in switch_mm_irqs_off() */ return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new)); } #endif From f299aec6ebd747298e35934cff7709c6b119ca52 Mon Sep 17 00:00:00 2001 From: Charles Milette Date: Fri, 18 Aug 2017 16:30:34 -0400 Subject: [PATCH 33/98] staging: rtl8188eu: add RNX-N150NUB support Add support for USB Device Rosewill RNX-N150NUB. VendorID: 0x0bda, ProductID: 0xffef Signed-off-by: Charles Milette Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index d283341cfe43..56cd4e5e51b2 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -45,6 +45,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ + {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ }; From 1d2226e45040ed4aee95b633cbd64702bf7fc2a1 Mon Sep 17 00:00:00 2001 From: KT Liao Date: Fri, 18 Aug 2017 16:58:15 -0700 Subject: [PATCH 34/98] Input: elan_i2c - add ELAN0602 ACPI ID to support Lenovo Yoga310 Add ELAN0602 to the list of known ACPI IDs to enable support for ELAN touchpads found in Lenovo Yoga310. Signed-off-by: KT Liao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 714cf7f9b138..cfbc8ba4c96c 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1247,6 +1247,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN0000", 0 }, { "ELAN0100", 0 }, { "ELAN0600", 0 }, + { "ELAN0602", 0 }, { "ELAN0605", 0 }, { "ELAN0608", 0 }, { "ELAN0605", 0 }, From ec667683c532c93fb41e100e5d61a518971060e2 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Fri, 18 Aug 2017 12:17:21 -0700 Subject: [PATCH 35/98] Input: trackpoint - add new trackpoint firmware ID Synaptics add new TP firmware ID: 0x2 and 0x3, for now both lower 2 bits are indicated as TP. Change the constant to bitwise values. This makes trackpoint to be recognized on Lenovo Carbon X1 Gen5 instead of it being identified as "PS/2 Generic Mouse". Signed-off-by: Aaron Ma Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/trackpoint.c | 3 ++- drivers/input/mouse/trackpoint.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 20b5b21c1bba..0871010f18d5 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -265,7 +265,8 @@ static int trackpoint_start_protocol(struct psmouse *psmouse, unsigned char *fir if (ps2_command(&psmouse->ps2dev, param, MAKE_PS2_CMD(0, 2, TP_READ_ID))) return -1; - if (param[0] != TP_MAGIC_IDENT) + /* add new TP ID. */ + if (!(param[0] & TP_MAGIC_IDENT)) return -1; if (firmware_id) diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h index 5617ed3a7d7a..88055755f82e 100644 --- a/drivers/input/mouse/trackpoint.h +++ b/drivers/input/mouse/trackpoint.h @@ -21,8 +21,9 @@ #define TP_COMMAND 0xE2 /* Commands start with this */ #define TP_READ_ID 0xE1 /* Sent for device identification */ -#define TP_MAGIC_IDENT 0x01 /* Sent after a TP_READ_ID followed */ +#define TP_MAGIC_IDENT 0x03 /* Sent after a TP_READ_ID followed */ /* by the firmware ID */ + /* Firmware ID includes 0x1, 0x2, 0x3 */ /* From 0c264af7be2013266c5b4c644f3f366399ee490a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 20 Aug 2017 15:54:26 +0900 Subject: [PATCH 36/98] ALSA: firewire: fix NULL pointer dereference when releasing uninitialized data of iso-resource When calling 'iso_resource_free()' for uninitialized data, this function causes NULL pointer dereference due to its 'unit' member. This occurs when unplugging audio and music units on IEEE 1394 bus at failure of card registration. This commit fixes the bug. The bug exists since kernel v4.5. Fixes: 324540c4e05c ('ALSA: fireface: postpone sound card registration') at v4.12 Fixes: 8865a31e0fd8 ('ALSA: firewire-motu: postpone sound card registration') at v4.12 Fixes: b610386c8afb ('ALSA: firewire-tascam: deleyed registration of sound card') at v4.7 Fixes: 86c8dd7f4da3 ('ALSA: firewire-digi00x: delayed registration of sound card') at v4.7 Fixes: 6c29230e2a5f ('ALSA: oxfw: delayed registration of sound card') at v4.7 Fixes: 7d3c1d5901aa ('ALSA: fireworks: delayed registration of sound card') at v4.7 Fixes: 04a2c73c97eb ('ALSA: bebob: delayed registration of sound card') at v4.7 Fixes: b59fb1900b4f ('ALSA: dice: postpone card registration') at v4.5 Cc: # v4.5+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/iso-resources.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/firewire/iso-resources.c b/sound/firewire/iso-resources.c index f0e4d502d604..066b5df666f4 100644 --- a/sound/firewire/iso-resources.c +++ b/sound/firewire/iso-resources.c @@ -210,9 +210,14 @@ EXPORT_SYMBOL(fw_iso_resources_update); */ void fw_iso_resources_free(struct fw_iso_resources *r) { - struct fw_card *card = fw_parent_device(r->unit)->card; + struct fw_card *card; int bandwidth, channel; + /* Not initialized. */ + if (r->unit == NULL) + return; + card = fw_parent_device(r->unit)->card; + mutex_lock(&r->mutex); if (r->allocated) { From dbd7396b4f24e0c3284fcc05f5def24f52c09884 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 20 Aug 2017 15:55:02 +0900 Subject: [PATCH 37/98] ALSA: firewire-motu: destroy stream data surely at failure of card initialization When failing sound card registration after initializing stream data, this module leaves allocated data in stream data. This commit fixes the bug. Fixes: 9b2bb4f2f4a2 ('ALSA: firewire-motu: add stream management functionality') Cc: # v4.12+ Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai --- sound/firewire/motu/motu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/firewire/motu/motu.c b/sound/firewire/motu/motu.c index bf779cfeef0d..59a270406353 100644 --- a/sound/firewire/motu/motu.c +++ b/sound/firewire/motu/motu.c @@ -128,6 +128,7 @@ static void do_registration(struct work_struct *work) return; error: snd_motu_transaction_unregister(motu); + snd_motu_stream_destroy_duplex(motu); snd_card_free(motu->card); dev_info(&motu->unit->device, "Sound card registration failed: %d\n", err); From f00fd7ae4f409abb7b2e5d099248832548199f0c Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 30 Jul 2017 16:14:42 -0600 Subject: [PATCH 38/98] PATCH] iio: Fix some documentation warnings The kerneldoc description for the trig_readonly field of struct iio_dev lacked a colon, leading to this doc build warning: ./include/linux/iio/iio.h:603: warning: No description found for parameter 'trig_readonly' A similar issue for iio_trigger_set_immutable() in trigger.h yielded: ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'indio_dev' ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'trig' Fix the formatting and silence the warnings. Signed-off-by: Jonathan Corbet Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 2 +- include/linux/iio/trigger.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d68bec297a45..c380daa40c0e 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -535,7 +535,7 @@ struct iio_buffer_setup_ops { * @scan_timestamp: [INTERN] set if any buffers have requested timestamp * @scan_index_timestamp:[INTERN] cache of the index to the timestamp * @trig: [INTERN] current device trigger (buffer modes) - * @trig_readonly [INTERN] mark the current trigger immutable + * @trig_readonly: [INTERN] mark the current trigger immutable * @pollfunc: [DRIVER] function run on trigger being received * @pollfunc_event: [DRIVER] function run on events trigger being received * @channels: [DRIVER] channel specification structure table diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index ea08302f2d7b..7142d8d6e470 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -144,8 +144,8 @@ void devm_iio_trigger_unregister(struct device *dev, /** * iio_trigger_set_immutable() - set an immutable trigger on destination * - * @indio_dev - IIO device structure containing the device - * @trig - trigger to assign to device + * @indio_dev: IIO device structure containing the device + * @trig: trigger to assign to device * **/ int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *trig); From fdd0d32eb95f135041236a6885d9006315aa9a1d Mon Sep 17 00:00:00 2001 From: Dragos Bogdan Date: Fri, 4 Aug 2017 01:37:27 +0300 Subject: [PATCH 39/98] iio: imu: adis16480: Fix acceleration scale factor for adis16480 According to the datasheet, the range of the acceleration is [-10 g, + 10 g], so the scale factor should be 10 instead of 5. Signed-off-by: Dragos Bogdan Acked-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis16480.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index 8cf84d3488b2..12898424d838 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -696,7 +696,7 @@ static const struct adis16480_chip_info adis16480_chip_info[] = { .gyro_max_val = IIO_RAD_TO_DEGREE(22500), .gyro_max_scale = 450, .accel_max_val = IIO_M_S_2_TO_G(12500), - .accel_max_scale = 5, + .accel_max_scale = 10, }, [ADIS16485] = { .channels = adis16485_channels, From a359bb2a55f384bb93349ddf9d30b20b37e02e8a Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Thu, 3 Aug 2017 11:22:17 +0200 Subject: [PATCH 40/98] iio: trigger: stm32-timer: fix get trigger mode Fix reading trigger mode, when other bit-fields are set. SMCR register value must be masked to read SMS (slave mode selection) only. Fixes: 9eba381 ("iio: make stm32 trigger driver use INDIO_HARDWARE_TRIGGERED mode") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 14e6eb04bbb0..25ad6abfee22 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -485,7 +485,7 @@ static int stm32_get_trigger_mode(struct iio_dev *indio_dev, regmap_read(priv->regmap, TIM_SMCR, &smcr); - return smcr == TIM_SMCR_SMS ? 0 : -EINVAL; + return (smcr & TIM_SMCR_SMS) == TIM_SMCR_SMS ? 0 : -EINVAL; } static const struct iio_enum stm32_trigger_mode_enum = { From f1664eaacec31035450132c46ed2915fd2b2049a Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Sat, 12 Aug 2017 09:09:21 -0700 Subject: [PATCH 41/98] iio: hid-sensor-trigger: Fix the race with user space powering up sensors It has been reported for a while that with iio-sensor-proxy service the rotation only works after one suspend/resume cycle. This required a wait in the systemd unit file to avoid race. I found a Yoga 900 where I could reproduce this. The problem scenerio is: - During sensor driver init, enable run time PM and also set a auto-suspend for 3 seconds. This result in one runtime resume. But there is a check to avoid a powerup in this sequence, but rpm is active - User space iio-sensor-proxy tries to power up the sensor. Since rpm is active it will simply return. But sensors were not actually powered up in the prior sequence, so actaully the sensors will not work - After 3 seconds the auto suspend kicks If we add a wait in systemd service file to fire iio-sensor-proxy after 3 seconds, then now everything will work as the runtime resume will actually powerup the sensor as this is a user request. To avoid this: - Remove the check to match user requested state, this will cause a brief powerup, but if the iio-sensor-proxy starts immediately it will still work as the sensors are ON. - Also move the autosuspend delay to place when user requested turn off of sensors, like after user finished raw read or buffer disable Signed-off-by: Srinivas Pandruvada Tested-by: Bastien Nocera Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 16ade0a0327b..0e4b379ada45 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -111,8 +111,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) s32 poll_value = 0; if (state) { - if (!atomic_read(&st->user_requested_state)) - return 0; if (sensor_hub_device_open(st->hsdev)) return -EIO; @@ -161,6 +159,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) &report_val); } + pr_debug("HID_SENSOR %s set power_state %d report_state %d\n", + st->pdev->name, state_val, report_val); + sensor_hub_get_feature(st->hsdev, st->power_state.report_id, st->power_state.index, sizeof(state_val), &state_val); @@ -182,6 +183,7 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state) ret = pm_runtime_get_sync(&st->pdev->dev); else { pm_runtime_mark_last_busy(&st->pdev->dev); + pm_runtime_use_autosuspend(&st->pdev->dev); ret = pm_runtime_put_autosuspend(&st->pdev->dev); } if (ret < 0) { @@ -285,8 +287,6 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name, /* Default to 3 seconds, but can be changed from sysfs */ pm_runtime_set_autosuspend_delay(&attrb->pdev->dev, 3000); - pm_runtime_use_autosuspend(&attrb->pdev->dev); - return ret; error_unreg_trigger: iio_trigger_unregister(trig); From 541ee9b24fca587f510fe1bc58508d5cf40707af Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 16 Aug 2017 19:02:50 +0200 Subject: [PATCH 42/98] iio: magnetometer: st_magn: fix status register address for LSM303AGR Fixes: 97865fe41322 (iio: st_sensors: verify interrupt event to status) Signed-off-by: Lorenzo Bianconi Reviewed-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/st_magn_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index 8e1b0861fbe4..c11f0da86e74 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -358,7 +358,7 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { .mask_int1 = 0x01, .addr_ihl = 0x63, .mask_ihl = 0x04, - .addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR, + .addr_stat_drdy = 0x67, }, .multi_read_bit = false, .bootime = 2, From 8b35a5f87a73842601cd376e0f5b9b25831390f4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 16 Aug 2017 19:02:51 +0200 Subject: [PATCH 43/98] iio: magnetometer: st_magn: remove ihl property for LSM303AGR Remove IRQ active low support for LSM303AGR since the sensor does not support that capability for data-ready line Fixes: a9fd053b56c6 (iio: st_sensors: support active-low interrupts) Signed-off-by: Lorenzo Bianconi Reviewed-by: Linus Walleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/st_magn_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index c11f0da86e74..c38563699984 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -356,8 +356,6 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { .drdy_irq = { .addr = 0x62, .mask_int1 = 0x01, - .addr_ihl = 0x63, - .mask_ihl = 0x04, .addr_stat_drdy = 0x67, }, .multi_read_bit = false, From d912366a59c5384df436fd007667d6e574128b44 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 20 Aug 2017 09:29:03 -0700 Subject: [PATCH 44/98] Input: soc_button_array - silence -ENOENT error on Dell XPS13 9365 The Dell XPS13 9365 has an INT33D2 ACPI node with no GPIOs, causing the following error in dmesg: [ 7.172275] soc_button_array: probe of INT33D2:00 failed with error -2 This commit silences this, by returning -ENODEV when there are no GPIOs. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=196679 Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov --- drivers/input/misc/soc_button_array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c index f600f3a7a3c6..23520df7650f 100644 --- a/drivers/input/misc/soc_button_array.c +++ b/drivers/input/misc/soc_button_array.c @@ -331,7 +331,7 @@ static int soc_button_probe(struct platform_device *pdev) error = gpiod_count(dev, NULL); if (error < 0) { dev_dbg(dev, "no GPIO attached, ignoring...\n"); - return error; + return -ENODEV; } priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); From a93c11527528c951b8d8db638162128a09e09ec2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 17 Aug 2017 13:55:41 +0300 Subject: [PATCH 45/98] drm/i915/bxt: use NULL for GPIO connection ID The commit 213e08ad60ba ("drm/i915/bxt: add bxt dsi gpio element support") enables GPIO support for Broxton based platforms. While using that API we might get into troubles in the future, because we can't rely on label name in the driver since vendor firmware might provide any GPIO pin there, e.g. "reset", and even mark it in _DSD (in which case the request will fail). To avoid inconsistency and potential issues we have two options: a) generate GPIO ACPI mapping table and supply it via acpi_dev_add_driver_gpios(), or b) just pass NULL as connection ID. The b) approach is much simpler and would work since the driver relies on GPIO indices only. Moreover, the _CRS fallback mechanism, when requesting GPIO, has been made stricter, and supplying non-NULL connection ID when neither _DSD, nor GPIO ACPI mapping is present, is making request fail. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101921 Fixes: f10e4bf6632b ("gpio: acpi: Even more tighten up ACPI GPIO lookups") Cc: Mika Kahola Cc: Jani Nikula Tested-by: Mika Kahola Signed-off-by: Andy Shevchenko Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170817105541.63914-1-andriy.shevchenko@linux.intel.com (cherry picked from commit cd55a1fbd21a820b7dd85a208b3170aa0b06adfa) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dsi_vbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 7158c7ce9c09..91c07b0c8db9 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -306,7 +306,7 @@ static void bxt_exec_gpio(struct drm_i915_private *dev_priv, if (!gpio_desc) { gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev, - "panel", gpio_index, + NULL, gpio_index, value ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH); From d41a3c2be178783c85e05025265ab58fbb4d4ce1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Aug 2017 13:19:19 +0100 Subject: [PATCH 46/98] drm/i915: Clear lost context-switch interrupts across reset During a global reset, we disable the irq. As we disable the irq, the hardware may be raising a GT interrupt that we then ignore, leaving it pending in the GTIIR. After the reset, we then re-enable the irq, triggering the pending interrupt. However, that interrupt was for the stale state from before the reset, and the contents of the CSB buffer are now invalid. v2: Add a comment to make it clear that the double clear is purely my paranoia. Reported-by: "Dong, Chuanxiao" Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Signed-off-by: Chris Wilson Cc: "Dong, Chuanxiao" Cc: Tvrtko Ursulin Cc: Michal Winiarski Cc: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20170807121919.30165-1-chris@chris-wilson.co.uk Link: https://patchwork.freedesktop.org/patch/msgid/20170818090509.5363-1-chris@chris-wilson.co.uk Reviewed-by: Michel Thierry (cherry picked from commit 64f09f00caf0a7cb40a8c0b85789bacba0f51d9e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7404cf2aac28..2afa4daa88e8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1221,6 +1221,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } +static u8 gtiir[] = { + [RCS] = 0, + [BCS] = 0, + [VCS] = 1, + [VCS2] = 1, + [VECS] = 3, +}; + static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1245,9 +1253,22 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); - /* After a GPU reset, we may have requests to replay */ + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + /* + * Clear any pending interrupt state. + * + * We do it twice out of paranoia that some of the IIR are double + * buffered, and if we only reset it once there may still be + * an interrupt pending. + */ + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + /* After a GPU reset, we may have requests to replay */ submit = false; for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) { if (!port_isset(&port[n])) From d83c2dbaa90a9bd6346e234d9802080a9c7b2fea Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 18 Aug 2017 09:16:08 +0800 Subject: [PATCH 47/98] mmc: block: prevent propagating R1_OUT_OF_RANGE for open-ending mode We to some extent should tolerate R1_OUT_OF_RANGE for open-ending mode as it is expected behaviour and most of the backup partition tables should be located near some of the last blocks which will always make open-ending read exceed the capacity of cards. Fixes: 9820a5b11101 ("mmc: core: for data errors, take response of stop cmd into account") Fixes: a04e6bae9e6f ("mmc: core: check also R1 response for stop commands") Signed-off-by: Shawn Lin Reviewed-by: Wolfram Sang Tested-by: Shawn Guo Signed-off-by: Ulf Hansson --- drivers/mmc/core/block.c | 49 +++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f1bbfd389367..80d1ec693d2d 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1371,12 +1371,46 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq, R1_CC_ERROR | /* Card controller error */ \ R1_ERROR) /* General/unknown error */ -static bool mmc_blk_has_cmd_err(struct mmc_command *cmd) +static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) { - if (!cmd->error && cmd->resp[0] & CMD_ERRORS) - cmd->error = -EIO; + u32 val; - return cmd->error; + /* + * Per the SD specification(physical layer version 4.10)[1], + * section 4.3.3, it explicitly states that "When the last + * block of user area is read using CMD18, the host should + * ignore OUT_OF_RANGE error that may occur even the sequence + * is correct". And JESD84-B51 for eMMC also has a similar + * statement on section 6.8.3. + * + * Multiple block read/write could be done by either predefined + * method, namely CMD23, or open-ending mode. For open-ending mode, + * we should ignore the OUT_OF_RANGE error as it's normal behaviour. + * + * However the spec[1] doesn't tell us whether we should also + * ignore that for predefined method. But per the spec[1], section + * 4.15 Set Block Count Command, it says"If illegal block count + * is set, out of range error will be indicated during read/write + * operation (For example, data transfer is stopped at user area + * boundary)." In another word, we could expect a out of range error + * in the response for the following CMD18/25. And if argument of + * CMD23 + the argument of CMD18/25 exceed the max number of blocks, + * we could also expect to get a -ETIMEDOUT or any error number from + * the host drivers due to missing data response(for write)/data(for + * read), as the cards will stop the data transfer by itself per the + * spec. So we only need to check R1_OUT_OF_RANGE for open-ending mode. + */ + + if (!brq->stop.error) { + bool oor_with_open_end; + /* If there is no error yet, check R1 response */ + + val = brq->stop.resp[0] & CMD_ERRORS; + oor_with_open_end = val & R1_OUT_OF_RANGE && !brq->mrq.sbc; + + if (val && !oor_with_open_end) + brq->stop.error = -EIO; + } } static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, @@ -1400,8 +1434,11 @@ static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card, * stop.error indicates a problem with the stop command. Data * may have been transferred, or may still be transferring. */ - if (brq->sbc.error || brq->cmd.error || mmc_blk_has_cmd_err(&brq->stop) || - brq->data.error) { + + mmc_blk_eval_resp_error(brq); + + if (brq->sbc.error || brq->cmd.error || + brq->stop.error || brq->data.error) { switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) { case ERR_RETRY: return MMC_BLK_RETRY; From 4a4eefcd0e49f9f339933324c1bde431186a0a7d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 3 Aug 2017 13:05:11 +0200 Subject: [PATCH 48/98] KVM: s390: sthyi: fix sthyi inline assembly The sthyi inline assembly misses register r3 within the clobber list. The sthyi instruction will always write a return code to register "R2+1", which in this case would be r3. Due to that we may have register corruption and see host crashes or data corruption depending on how gcc decided to allocate and use registers during compile time. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Cc: # 4.8+ Reviewed-by: Janosch Frank Signed-off-by: Heiko Carstens Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sthyi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 926b5244263e..2773a2f6a5c4 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -394,7 +394,7 @@ static int sthyi(u64 vaddr) "srl %[cc],28\n" : [cc] "=d" (cc) : [code] "d" (code), [addr] "a" (addr) - : "memory", "cc"); + : "3", "memory", "cc"); return cc; } From 857b8de96795646c5891cf44ae6fb19b9ff74bf9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 3 Aug 2017 14:27:30 +0200 Subject: [PATCH 49/98] KVM: s390: sthyi: fix specification exception detection sthyi should only generate a specification exception if the function code is zero and the response buffer is not on a 4k boundary. The current code would also test for unknown function codes if the response buffer, that is currently only defined for function code 0, is not on a 4k boundary and incorrectly inject a specification exception instead of returning with condition code 3 and return code 4 (unsupported function code). Fix this by moving the boundary check. Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation") Cc: # 4.8+ Reviewed-by: Janosch Frank Signed-off-by: Heiko Carstens Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sthyi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 2773a2f6a5c4..a2e5c24f47a7 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -425,7 +425,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); trace_kvm_s390_handle_sthyi(vcpu, code, addr); - if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) + if (reg1 == reg2 || reg1 & 1 || reg2 & 1) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); if (code & 0xffff) { @@ -433,6 +433,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu) goto out; } + if (addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* * If the page has not yet been faulted in, we want to do that * now and not after all the expensive calculations. From deecd4d71b12626db48544faa66bb897e2cafd07 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 27 Jul 2017 15:56:55 -0500 Subject: [PATCH 50/98] objtool: Fix '-mtune=atom' decoding support in objtool 2.0 With '-mtune=atom', which is enabled with CONFIG_MATOM=y, GCC uses some unusual instructions for setting up the stack. Instead of: mov %rsp, %rbp it does: lea (%rsp), %rbp And instead of: add imm, %rsp it does: lea disp(%rsp), %rsp Add support for these instructions to the objtool decoder. Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0") Link: http://lkml.kernel.org/r/4ea1db896e821226efe1f8e09f270771bde47e65.1501188854.git.jpoimboe@redhat.com [ This is a cherry-picked version of upcoming commit 5b8de48e82ba. ] Signed-off-by: Ingo Molnar --- tools/objtool/arch/x86/decode.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index a36c2eba64e7..4559a21a8de2 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -271,7 +271,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, case 0x8d: if (rex == 0x48 && modrm == 0x65) { - /* lea -disp(%rbp), %rsp */ + /* lea disp(%rbp), %rsp */ *type = INSN_STACK; op->src.type = OP_SRC_ADD; op->src.reg = CFI_BP; @@ -281,6 +281,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, break; } + if (rex == 0x48 && (modrm == 0xa4 || modrm == 0x64) && + sib == 0x24) { + + /* lea disp(%rsp), %rsp */ + *type = INSN_STACK; + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = insn.displacement.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + break; + } + + if (rex == 0x48 && modrm == 0x2c && sib == 0x24) { + + /* lea (%rsp), %rbp */ + *type = INSN_STACK; + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_BP; + break; + } + if (rex == 0x4c && modrm == 0x54 && sib == 0x24 && insn.displacement.value == 8) { From 07b3b5e9ed807a0d2077319b8e43a42e941db818 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Tue, 22 Aug 2017 08:33:53 +0200 Subject: [PATCH 51/98] ALSA: usb-audio: Add delay quirk for H650e/Jabra 550a USB headsets These headsets reports a lot of: cannot set freq 44100 to ep 0x81 and need a small delay between sample rate settings, just like Zoom R16/24. Add both headsets to the Zoom R16/24 quirk for a 1 ms delay between control msgs. Signed-off-by: Joakim Tjernlund Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 6a03f9697039..5d2a63248b1d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1309,10 +1309,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(20); - /* Zoom R16/24 needs a tiny delay here, otherwise requests like - * get/set frequency return as failed despite actually succeeding. + /* Zoom R16/24, Logitech H650e, Jabra 550a needs a tiny delay here, + * otherwise requests like get/set frequency return as failed despite + * actually succeeding. */ - if (chip->usb_id == USB_ID(0x1686, 0x00dd) && + if ((chip->usb_id == USB_ID(0x1686, 0x00dd) || + chip->usb_id == USB_ID(0x046d, 0x0a46) || + chip->usb_id == USB_ID(0x0b0e, 0x0349)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) mdelay(1); } From fe4600a548f2763dec91b3b27a1245c370ceee2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Aug 2017 13:05:58 +0100 Subject: [PATCH 52/98] drm: Release driver tracking before making the object available again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the same bug as we fixed in commit f6cd7daecff5 ("drm: Release driver references to handle before making it available again"), but now the exposure is via the PRIME lookup tables. If we remove the object/handle from the PRIME lut, then a new request for the same object/fd will generate a new handle, thus for a short window that object is known to userspace by two different handles. Fix this by releasing the driver tracking before PRIME. Fixes: 0ff926c7d4f0 ("drm/prime: add exported buffers to current fprivs imported buffer list (v2)") Signed-off-by: Chris Wilson Cc: David Airlie Cc: Daniel Vetter Cc: Rob Clark Cc: Ville Syrjälä Cc: Thierry Reding Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20170819120558.6465-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 8dc11064253d..cdaac37907b1 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -255,13 +255,13 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) struct drm_gem_object *obj = ptr; struct drm_device *dev = obj->dev; + if (dev->driver->gem_close_object) + dev->driver->gem_close_object(obj, file_priv); + if (drm_core_check_feature(dev, DRIVER_PRIME)) drm_gem_remove_prime_handles(obj, file_priv); drm_vma_node_revoke(&obj->vma_node, file_priv); - if (dev->driver->gem_close_object) - dev->driver->gem_close_object(obj, file_priv); - drm_gem_object_handle_put_unlocked(obj); return 0; From 88c54cdf61f508ebcf8da2d819f5dfc03e954d1d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Aug 2017 08:15:13 +0200 Subject: [PATCH 53/98] ALSA: core: Fix unexpected error at replacing user TLV When user tries to replace the user-defined control TLV, the kernel checks the change of its content via memcmp(). The problem is that the kernel passes the return value from memcmp() as is. memcmp() gives a non-zero negative value depending on the comparison result, and this shall be recognized as an error code. The patch covers that corner-case, return 1 properly for the changed TLV. Fixes: 8aa9b586e420 ("[ALSA] Control API - more robust TLV implementation") Cc: Signed-off-by: Takashi Iwai --- sound/core/control.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index 3c6be1452e35..4525e127afd9 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1137,7 +1137,7 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol, mutex_lock(&ue->card->user_ctl_lock); change = ue->tlv_data_size != size; if (!change) - change = memcmp(ue->tlv_data, new_data, size); + change = memcmp(ue->tlv_data, new_data, size) != 0; kfree(ue->tlv_data); ue->tlv_data = new_data; ue->tlv_data_size = size; From b2a6d1b999a4c13e5997bb864694e77172d45250 Mon Sep 17 00:00:00 2001 From: Martijn Coenen Date: Fri, 28 Jul 2017 13:56:08 +0200 Subject: [PATCH 54/98] ANDROID: binder: fix proc->tsk check. Commit c4ea41ba195d ("binder: use group leader instead of open thread")' was incomplete and didn't update a check in binder_mmap(), causing all mmap() calls into the binder driver to fail. Signed-off-by: Martijn Coenen Tested-by: John Stultz Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f7665c31feca..831cdd7d197d 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3362,7 +3362,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma) const char *failure_string; struct binder_buffer *buffer; - if (proc->tsk != current) + if (proc->tsk != current->group_leader) return -EINVAL; if ((vma->vm_end - vma->vm_start) > SZ_4M) From ffeaf9aaf97b4bdaf114d6df52f800d71918768c Mon Sep 17 00:00:00 2001 From: fred gao Date: Wed, 16 Aug 2017 15:48:03 +0800 Subject: [PATCH 55/98] drm/i915/gvt: Fix the kernel null pointer error once error happens in shadow_indirect_ctx function, the variable wa_ctx->indirect_ctx.obj is not initialized but accessed, so the kernel null point panic occurs. Fixes: 894cf7d15634 ("drm/i915/gvt: i915_gem_object_create() returns an error pointer") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: fred gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 713848c36349..e556a46cd4c2 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -2714,7 +2714,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) unmap_src: i915_gem_object_unpin_map(obj); put_obj: - i915_gem_object_put(wa_ctx->indirect_ctx.obj); + i915_gem_object_put(obj); return ret; } From bbba6f9d3da357bbabc6fda81e99ff5584500e76 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 23 Aug 2017 09:30:17 +0200 Subject: [PATCH 56/98] ALSA: hda - Add stereo mic quirk for Lenovo G50-70 (17aa:3978) Lenovo G50-70 (17aa:3978) with Conexant codec chip requires the similar workaround for the inverted stereo dmic like other Lenovo models. Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1020657 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 8c1289963c80..a81aacf684b2 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -947,6 +947,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI), SND_PCI_QUIRK(0x1c06, 0x2011, "Lemote A1004", CXT_PINCFG_LEMOTE_A1004), From 42bec214d8bd432be6d32a1acb0a9079ecd4d142 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Thu, 3 Aug 2017 13:09:03 +0530 Subject: [PATCH 57/98] cifs: Fix df output for users with quota limits The df for a SMB2 share triggers a GetInfo call for FS_FULL_SIZE_INFORMATION. The values returned are used to populate struct statfs. The problem is that none of the information returned by the call contains the total blocks available on the filesystem. Instead we use the blocks available to the user ie. quota limitation when filling out statfs.f_blocks. The information returned does contain Actual free units on the filesystem and is used to populate statfs.f_bfree. For users with quota enabled, it can lead to situations where the total free space reported is more than the total blocks on the system ending up with df reports like the following # df -h /mnt/a Filesystem Size Used Avail Use% Mounted on //192.168.22.10/a 2.5G -2.3G 2.5G - /mnt/a To fix this problem, we instead populate both statfs.f_bfree with the same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This is similar to what is done already in the code for cifs and df now reports the quota information for the user used to mount the share. # df --si /mnt/a Filesystem Size Used Avail Use% Mounted on //192.168.22.10/a 2.7G 101M 2.6G 4% /mnt/a Signed-off-by: Sachin Prabhu Signed-off-by: Pierguido Lambri Signed-off-by: Steve French Cc: --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5fb2fc2d0080..97edb4d376cd 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3219,8 +3219,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); - kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); - kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + kst->f_bfree = kst->f_bavail = + le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); return; } From d3edede29f74d335f81d95a4588f5f136a9f7dcf Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 23 Aug 2017 14:48:14 +1000 Subject: [PATCH 58/98] cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup() Add checking for the path component length and verify it is <= the maximum that the server advertizes via FileFsAttributeInformation. With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT when users to access an overlong path. To test this, try to cd into a (non-existing) directory on a CIFS share that has a too long name: cd /mnt/aaaaaaaaaaaaaaa... and it now should show a good error message from the shell: bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long rh bz 1153996 Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Cc: --- fs/cifs/dir.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 56366e984076..569d3fb736be 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -194,15 +194,20 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix) } /* + * Don't allow path components longer than the server max. * Don't allow the separator character in a path component. * The VFS will not allow "/", but "\" is allowed by posix. */ static int -check_name(struct dentry *direntry) +check_name(struct dentry *direntry, struct cifs_tcon *tcon) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); int i; + if (unlikely(direntry->d_name.len > + tcon->fsAttrInfo.MaxPathNameComponentLength)) + return -ENAMETOOLONG; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { for (i = 0; i < direntry->d_name.len; i++) { if (direntry->d_name.name[i] == '\\') { @@ -500,10 +505,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, return finish_no_open(file, res); } - rc = check_name(direntry); - if (rc) - return rc; - xid = get_xid(); cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", @@ -516,6 +517,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, } tcon = tlink_tcon(tlink); + + rc = check_name(direntry, tcon); + if (rc) + goto out_free_xid; + server = tcon->ses->server; if (server->ops->new_lease_key) @@ -776,7 +782,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); - rc = check_name(direntry); + rc = check_name(direntry, pTcon); if (rc) goto lookup_out; From ea0ea2bc6dd8923d86a0fa98743dbeed98645486 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 18 Aug 2017 16:08:13 -0700 Subject: [PATCH 59/98] blk-throttle: cap discard request size discard request usually is very big and easily use all bandwidth budget of a cgroup. discard request size doesn't really mean the size of data written, so it doesn't make sense to account it into bandwidth budget. Jens pointed out treating the size 0 doesn't make sense too, because discard request does have cost. But it's not easy to find the actual cost. This patch simply makes the size one sector. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-throttle.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a7285bf2831c..80f5481fe9f6 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -382,6 +382,14 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) } \ } while (0) +static inline unsigned int throtl_bio_data_size(struct bio *bio) +{ + /* assume it's one sector */ + if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) + return 512; + return bio->bi_iter.bi_size; +} + static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg) { INIT_LIST_HEAD(&qn->node); @@ -934,6 +942,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, bool rw = bio_data_dir(bio); u64 bytes_allowed, extra_bytes, tmp; unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd; + unsigned int bio_size = throtl_bio_data_size(bio); jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw]; @@ -947,14 +956,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, do_div(tmp, HZ); bytes_allowed = tmp; - if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) { + if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) { if (wait) *wait = 0; return true; } /* Calc approx time to dispatch */ - extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed; + extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed; jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw)); if (!jiffy_wait) @@ -1034,11 +1043,12 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); + unsigned int bio_size = throtl_bio_data_size(bio); /* Charge the bio to the group */ - tg->bytes_disp[rw] += bio->bi_iter.bi_size; + tg->bytes_disp[rw] += bio_size; tg->io_disp[rw]++; - tg->last_bytes_disp[rw] += bio->bi_iter.bi_size; + tg->last_bytes_disp[rw] += bio_size; tg->last_io_disp[rw]++; /* From 1e6ec9ea89d30739b9447c1860fcb07fc29f3aef Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 23 Aug 2017 14:54:59 -0700 Subject: [PATCH 60/98] Revert "loop: support 4k physical blocksize" There's some stuff still up in the air, let's not get stuck with a subpar ABI. I'll follow up with something better for 4.14. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/loop.c | 42 ++++++--------------------------------- drivers/block/loop.h | 1 - include/uapi/linux/loop.h | 3 --- 3 files changed, 6 insertions(+), 40 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ef8334949b42..f321b96405f5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -221,8 +221,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) } static int -figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit, - loff_t logical_blocksize) +figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; @@ -234,12 +233,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit, lo->lo_offset = offset; if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; - if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) { - lo->lo_logical_blocksize = logical_blocksize; - blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize); - blk_queue_logical_block_size(lo->lo_queue, - lo->lo_logical_blocksize); - } set_capacity(lo->lo_disk, x); bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); /* let user-space know about the new size */ @@ -820,7 +813,6 @@ static void loop_config_discard(struct loop_device *lo) struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct request_queue *q = lo->lo_queue; - int lo_bits = 9; /* * We use punch hole to reclaim the free space used by the @@ -840,11 +832,9 @@ static void loop_config_discard(struct loop_device *lo) q->limits.discard_granularity = inode->i_sb->s_blocksize; q->limits.discard_alignment = 0; - if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) - lo_bits = blksize_bits(lo->lo_logical_blocksize); - blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits); - blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits); + blk_queue_max_discard_sectors(q, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); } @@ -938,7 +928,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->use_dio = false; lo->lo_blocksize = lo_blocksize; - lo->lo_logical_blocksize = 512; lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; @@ -1104,7 +1093,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) int err; struct loop_func_table *xfer; kuid_t uid = current_uid(); - int lo_flags = lo->lo_flags; if (lo->lo_encrypt_key_size && !uid_eq(lo->lo_key_owner, uid) && @@ -1137,26 +1125,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (err) goto exit; - if (info->lo_flags & LO_FLAGS_BLOCKSIZE) { - if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE)) - lo->lo_logical_blocksize = 512; - lo->lo_flags |= LO_FLAGS_BLOCKSIZE; - if (LO_INFO_BLOCKSIZE(info) != 512 && - LO_INFO_BLOCKSIZE(info) != 1024 && - LO_INFO_BLOCKSIZE(info) != 2048 && - LO_INFO_BLOCKSIZE(info) != 4096) - return -EINVAL; - if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize) - return -EINVAL; - } - if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit || - lo->lo_flags != lo_flags || - ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) && - lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) { - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit, - LO_INFO_BLOCKSIZE(info))) { + lo->lo_sizelimit != info->lo_sizelimit) { + if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto exit; } @@ -1348,8 +1319,7 @@ static int loop_set_capacity(struct loop_device *lo) if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit, - lo->lo_logical_blocksize); + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int loop_set_dio(struct loop_device *lo, unsigned long arg) diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 2c096b9a17b8..fecd3f97ef8c 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -49,7 +49,6 @@ struct loop_device { struct file * lo_backing_file; struct block_device *lo_device; unsigned lo_blocksize; - unsigned lo_logical_blocksize; void *key_data; gfp_t old_gfp_mask; diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index a3960f98679c..c8125ec1f4f2 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -22,7 +22,6 @@ enum { LO_FLAGS_AUTOCLEAR = 4, LO_FLAGS_PARTSCAN = 8, LO_FLAGS_DIRECT_IO = 16, - LO_FLAGS_BLOCKSIZE = 32, }; #include /* for __kernel_old_dev_t */ @@ -60,8 +59,6 @@ struct loop_info64 { __u64 lo_init[2]; }; -#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0] - /* * Loop filter types */ From 2fe59f507a65dbd734b990a11ebc7488f6f87a24 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Aug 2017 18:43:48 +1000 Subject: [PATCH 61/98] timers: Fix excessive granularity of new timers after a nohz idle When a timer base is idle, it is forwarded when a new timer is added to ensure that granularity does not become excessive. When not idle, the timer tick is expected to increment the base. However there are several problems: - If an existing timer is modified, the base is forwarded only after the index is calculated. - The base is not forwarded by add_timer_on. - There is a window after a timer is restarted from a nohz idle, after it is marked not-idle and before the timer tick on this CPU, where a timer may be added but the ancient base does not get forwarded. These result in excessive granularity (a 1 jiffy timeout can blow out to 100s of jiffies), which cause the rcu lockup detector to trigger, among other things. Fix this by keeping track of whether the timer base has been idle since it was last run or forwarded, and if so then forward it before adding a new timer. There is still a case where mod_timer optimises the case of a pending timer mod with the same expiry time, where the timer can see excessive granularity relative to the new, shorter interval. A comment is added, but it's not changed because it is an important fastpath for networking. This has been tested and found to fix the RCU softlockup messages. Testing was also done with tracing to measure requested versus achieved wakeup latencies for all non-deferrable timers in an idle system (with no lockup watchdogs running). Wakeup latency relative to absolute latency is calculated (note this suffers from round-up skew at low absolute times) and analysed: max avg std upstream 506.0 1.20 4.68 patched 2.0 1.08 0.15 The bug was noticed due to the lockup detector Kconfig changes dropping it out of people's .configs and resulting in larger base clk skew When the lockup detectors are enabled, no CPU can go idle for longer than 4 seconds, which limits the granularity errors. Sub-optimal timer behaviour is observable on a smaller scale in that case: max avg std upstream 9.0 1.05 0.19 patched 2.0 1.04 0.11 Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible") Signed-off-by: Nicholas Piggin Signed-off-by: Thomas Gleixner Tested-by: Jonathan Cameron Tested-by: David Miller Cc: dzickus@redhat.com Cc: sfr@canb.auug.org.au Cc: mpe@ellerman.id.au Cc: Stephen Boyd Cc: linuxarm@huawei.com Cc: abdhalee@linux.vnet.ibm.com Cc: John Stultz Cc: akpm@linux-foundation.org Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com --- kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 8f5d1bf18854..f2674a056c26 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -203,6 +203,7 @@ struct timer_base { bool migration_enabled; bool nohz_active; bool is_idle; + bool must_forward_clk; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { - unsigned long jnow = READ_ONCE(jiffies); + unsigned long jnow; /* - * We only forward the base when it's idle and we have a delta between - * base clock and jiffies. + * We only forward the base when we are idle or have just come out of + * idle (must_forward_clk logic), and have a delta between base clock + * and jiffies. In the common case, run_timers will take care of it. */ - if (!base->is_idle || (long) (jnow - base->clk) < 2) + if (likely(!base->must_forward_clk)) + return; + + jnow = READ_ONCE(jiffies); + base->must_forward_clk = base->is_idle; + if ((long)(jnow - base->clk) < 2) return; /* @@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * same array bucket then just return: */ if (timer_pending(timer)) { + /* + * The downside of this optimization is that it can result in + * larger granularity than you would get from adding a new + * timer with this expiry. + */ if (timer->expires == expires) return 1; @@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * dequeue/enqueue dance. */ base = lock_timer_base(timer, &flags); + forward_timer_base(base); clk = base->clk; idx = calc_wheel_index(expires, clk); @@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } else { base = lock_timer_base(timer, &flags); + forward_timer_base(base); } ret = detach_if_pending(timer, base, false); @@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | base->cpu); + forward_timer_base(base); } } - /* Try to forward a stale timer base clock */ - forward_timer_base(base); - timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance @@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu) WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | cpu); } + forward_timer_base(base); debug_activate(timer, timer->expires); internal_add_timer(base, timer); @@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (!is_max_delta) expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* - * If we expect to sleep more than a tick, mark the base idle: + * If we expect to sleep more than a tick, mark the base idle. + * Also the tick is stopped so any added timer must forward + * the base clk itself to keep granularity small. This idle + * logic is only maintained for the BASE_STD base, deferrable + * timers may still see large granularity skew (by design). */ - if ((expires - basem) > TICK_NSEC) + if ((expires - basem) > TICK_NSEC) { + base->must_forward_clk = true; base->is_idle = true; + } } raw_spin_unlock(&base->lock); @@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + /* + * must_forward_clk must be cleared before running timers so that any + * timer functions that call mod_timer will not try to forward the + * base. idle trcking / clock forwarding logic is only used with + * BASE_STD timers. + * + * The deferrable base does not do idle tracking at all, so we do + * not forward it. This can result in very large variations in + * granularity for deferrable timers, but they can be deferred for + * long periods due to idle. + */ + base->must_forward_clk = false; + __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); From bd0fdb191c8523a9126bb14ac1b22cb47698ebf5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 13 Mar 2017 03:03:49 +1000 Subject: [PATCH 62/98] KVM: PPC: Book3S HV: Use msgsync with hypervisor doorbells on POWER9 When msgsnd is used for IPIs to other cores, msgsync must be executed by the target to order stores performed on the source before its msgsnd (provided the source executes the appropriate sync). Fixes: 1704a81ccebc ("KVM: PPC: Book3S HV: Use msgsnd for IPIs to other cores on POWER9") Signed-off-by: Nicholas Piggin Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c52184a8efdf..9c9c983b864f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Hypervisor doorbell - exit only if host IPI flag set */ cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL bne 3f +BEGIN_FTR_SECTION + PPC_MSGSYNC +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 beq 4f From 2c4fb78f78b6e420604ee1b05bdfb5c1d637869f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Aug 2017 12:10:52 +1000 Subject: [PATCH 63/98] KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug causing IPB bit loss This adds a workaround for a bug in POWER9 DD1 chips where changing the CPPR (Current Processor Priority Register) can cause bits in the IPB (Interrupt Pending Buffer) to get lost. Thankfully it only happens when manually manipulating CPPR which is quite rare. When it does happen it can cause interrupts to be delayed or lost. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive_template.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 4636ca6e7d38..150be86b1018 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -16,7 +16,16 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u8 cppr; u16 ack; - /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */ + /* + * DD1 bug workaround: If PIPR is less favored than CPPR + * ignore the interrupt or we might incorrectly lose an IPB + * bit. + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { + u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); + if (pipr >= xc->hw_cppr) + return; + } /* Perform the acknowledge OS to register cycle. */ ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG)); From bb9b52bd51dcb17b965a30167d0812902c1b9927 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 18 Aug 2017 12:10:58 +1000 Subject: [PATCH 64/98] KVM: PPC: Book3S HV: Add missing barriers to XIVE code and document them This adds missing memory barriers to order updates/tests of the virtual CPPR and MFRR, thus fixing a lost IPI problem. While at it also document all barriers in this file. This fixes a bug causing guest IPIs to occasionally get lost. The symptom then is hangs or stalls in the guest. Signed-off-by: Benjamin Herrenschmidt Tested-by: Guilherme G. Piccoli Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_xive_template.c | 57 ++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index 150be86b1018..d1ed2c41b5d2 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -16,6 +16,12 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) u8 cppr; u16 ack; + /* + * Ensure any previous store to CPPR is ordered vs. + * the subsequent loads from PIPR or ACK. + */ + eieio(); + /* * DD1 bug workaround: If PIPR is less favored than CPPR * ignore the interrupt or we might incorrectly lose an IPB @@ -244,6 +250,11 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc, /* * If we found an interrupt, adjust what the guest CPPR should * be as if we had just fetched that interrupt from HW. + * + * Note: This can only make xc->cppr smaller as the previous + * loop will only exit with hirq != 0 if prio is lower than + * the current xc->cppr. Thus we don't need to re-check xc->mfrr + * for pending IPIs. */ if (hirq) xc->cppr = prio; @@ -389,6 +400,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) old_cppr = xc->cppr; xc->cppr = cppr; + /* + * Order the above update of xc->cppr with the subsequent + * read of xc->mfrr inside push_pending_to_hw() + */ + smp_mb(); + /* * We are masking less, we need to look for pending things * to deliver and set VP pending bits accordingly to trigger @@ -429,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr) * used to signal MFRR changes is EOId when fetched from * the queue. */ - if (irq == XICS_IPI || irq == 0) + if (irq == XICS_IPI || irq == 0) { + /* + * This barrier orders the setting of xc->cppr vs. + * subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); goto bail; + } /* Find interrupt source */ sb = kvmppc_xive_find_source(xive, irq, &src); if (!sb) { pr_devel(" source not found !\n"); rc = H_PARAMETER; + /* Same as above */ + smp_mb(); goto bail; } state = &sb->irq_state[src]; kvmppc_xive_select_irq(state, &hw_num, &xd); state->in_eoi = true; - mb(); + + /* + * This barrier orders both setting of in_eoi above vs, + * subsequent test of guest_priority, and the setting + * of xc->cppr vs. subsquent test of xc->mfrr done inside + * scan_interrupts and push_pending_to_hw + */ + smp_mb(); again: if (state->guest_priority == MASKED) { @@ -470,6 +503,14 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr) } + /* + * This barrier orders the above guest_priority check + * and spin_lock/unlock with clearing in_eoi below. + * + * It also has to be a full mb() as it must ensure + * the MMIOs done in source_eoi() are completed before + * state->in_eoi is visible. + */ mb(); state->in_eoi = false; bail: @@ -504,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server, /* Locklessly write over MFRR */ xc->mfrr = mfrr; + /* + * The load of xc->cppr below and the subsequent MMIO store + * to the IPI must happen after the above mfrr update is + * globally visible so that: + * + * - Synchronize with another CPU doing an H_EOI or a H_CPPR + * updating xc->cppr then reading xc->mfrr. + * + * - The target of the IPI sees the xc->mfrr update + */ + mb(); + /* Shoot the IPI if most favored than target cppr */ if (mfrr < xc->cppr) __x_writeq(0, __x_trig_page(&xc->vp_ipi_data)); From 98cd249cf9d2c7e2322fbf20c454c019e141a28b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 20 Jul 2017 06:58:25 +0200 Subject: [PATCH 65/98] c6x: defconfig: Cleanup from old Kconfig options Remove old, dead Kconfig options (in order appearing in this commit): - EXPERIMENTAL is gone since v3.9; - MISC_DEVICES: commit 7c5763b8453a ("drivers: misc: Remove MISC_DEVICES config option"); Signed-off-by: Krzysztof Kozlowski Signed-off-by: Mark Salter --- arch/c6x/configs/dsk6455_defconfig | 2 -- arch/c6x/configs/evmc6457_defconfig | 2 -- arch/c6x/configs/evmc6472_defconfig | 2 -- arch/c6x/configs/evmc6474_defconfig | 2 -- arch/c6x/configs/evmc6678_defconfig | 2 -- 5 files changed, 10 deletions(-) diff --git a/arch/c6x/configs/dsk6455_defconfig b/arch/c6x/configs/dsk6455_defconfig index 4663487c67a1..d764ea4cce7f 100644 --- a/arch/c6x/configs/dsk6455_defconfig +++ b/arch/c6x/configs/dsk6455_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6455=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -25,7 +24,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/configs/evmc6457_defconfig b/arch/c6x/configs/evmc6457_defconfig index bba40e195ec4..05d0b4a25ab1 100644 --- a/arch/c6x/configs/evmc6457_defconfig +++ b/arch/c6x/configs/evmc6457_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6457=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -26,7 +25,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/configs/evmc6472_defconfig b/arch/c6x/configs/evmc6472_defconfig index 8c46155f6d31..8d81fcf86b0e 100644 --- a/arch/c6x/configs/evmc6472_defconfig +++ b/arch/c6x/configs/evmc6472_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6472=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/configs/evmc6474_defconfig b/arch/c6x/configs/evmc6474_defconfig index 15533f632313..8156a98f3958 100644 --- a/arch/c6x/configs/evmc6474_defconfig +++ b/arch/c6x/configs/evmc6474_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6474=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set diff --git a/arch/c6x/configs/evmc6678_defconfig b/arch/c6x/configs/evmc6678_defconfig index 5f126d4905b1..c4f433c25b69 100644 --- a/arch/c6x/configs/evmc6678_defconfig +++ b/arch/c6x/configs/evmc6678_defconfig @@ -1,5 +1,4 @@ CONFIG_SOC_TMS320C6678=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_SPARSE_IRQ=y @@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=17000 -CONFIG_MISC_DEVICES=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set From 636d42117800db1a994726fcf017e3633db832a5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 18 Jul 2017 16:42:43 -0500 Subject: [PATCH 66/98] c6x: Convert to using %pOF instead of full_name Now that we have a custom printf format specifier, convert users of full_name to use %pOF instead. This is preparation to remove storing of the full path string for each node. Signed-off-by: Rob Herring Cc: Mark Salter Cc: Aurelien Jacquiot Cc: linux-c6x-dev@linux-c6x.org Signed-off-by: Mark Salter --- arch/c6x/platforms/megamod-pic.c | 22 +++++++++++----------- arch/c6x/platforms/plldata.c | 4 ++-- arch/c6x/platforms/timer64.c | 8 ++++---- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c index 43afc03e4125..9519fa5f97d0 100644 --- a/arch/c6x/platforms/megamod-pic.c +++ b/arch/c6x/platforms/megamod-pic.c @@ -208,14 +208,14 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) pic = kzalloc(sizeof(struct megamod_pic), GFP_KERNEL); if (!pic) { - pr_err("%s: Could not alloc PIC structure.\n", np->full_name); + pr_err("%pOF: Could not alloc PIC structure.\n", np); return NULL; } pic->irqhost = irq_domain_add_linear(np, NR_COMBINERS * 32, &megamod_domain_ops, pic); if (!pic->irqhost) { - pr_err("%s: Could not alloc host.\n", np->full_name); + pr_err("%pOF: Could not alloc host.\n", np); goto error_free; } @@ -225,7 +225,7 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) pic->regs = of_iomap(np, 0); if (!pic->regs) { - pr_err("%s: Could not map registers.\n", np->full_name); + pr_err("%pOF: Could not map registers.\n", np); goto error_free; } @@ -253,8 +253,8 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) irq_data = irq_get_irq_data(irq); if (!irq_data) { - pr_err("%s: combiner-%d no irq_data for virq %d!\n", - np->full_name, i, irq); + pr_err("%pOF: combiner-%d no irq_data for virq %d!\n", + np, i, irq); continue; } @@ -265,16 +265,16 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) * of the core priority interrupts (4 - 15). */ if (hwirq < 4 || hwirq >= NR_PRIORITY_IRQS) { - pr_err("%s: combiner-%d core irq %ld out of range!\n", - np->full_name, i, hwirq); + pr_err("%pOF: combiner-%d core irq %ld out of range!\n", + np, i, hwirq); continue; } /* record the mapping */ mapping[hwirq - 4] = i; - pr_debug("%s: combiner-%d cascading to hwirq %ld\n", - np->full_name, i, hwirq); + pr_debug("%pOF: combiner-%d cascading to hwirq %ld\n", + np, i, hwirq); cascade_data[i].pic = pic; cascade_data[i].index = i; @@ -290,8 +290,8 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np) /* Finally, set up the MUX registers */ for (i = 0; i < NR_MUX_OUTPUTS; i++) { if (mapping[i] != IRQ_UNMAPPED) { - pr_debug("%s: setting mux %d to priority %d\n", - np->full_name, mapping[i], i + 4); + pr_debug("%pOF: setting mux %d to priority %d\n", + np, mapping[i], i + 4); set_megamod_mux(pic, mapping[i], i); } } diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c index 755359eb6286..e8b6cc6a7b5a 100644 --- a/arch/c6x/platforms/plldata.c +++ b/arch/c6x/platforms/plldata.c @@ -436,8 +436,8 @@ void __init c64x_setup_clocks(void) err = of_property_read_u32(node, "clock-frequency", &val); if (err || val == 0) { - pr_err("%s: no clock-frequency found! Using %dMHz\n", - node->full_name, (int)val / 1000000); + pr_err("%pOF: no clock-frequency found! Using %dMHz\n", + node, (int)val / 1000000); val = 25000000; } clkin1.rate = val; diff --git a/arch/c6x/platforms/timer64.c b/arch/c6x/platforms/timer64.c index 0bd0452ded80..241a9a607193 100644 --- a/arch/c6x/platforms/timer64.c +++ b/arch/c6x/platforms/timer64.c @@ -204,14 +204,14 @@ void __init timer64_init(void) timer = of_iomap(np, 0); if (!timer) { - pr_debug("%s: Cannot map timer registers.\n", np->full_name); + pr_debug("%pOF: Cannot map timer registers.\n", np); goto out; } - pr_debug("%s: Timer registers=%p.\n", np->full_name, timer); + pr_debug("%pOF: Timer registers=%p.\n", np, timer); cd->irq = irq_of_parse_and_map(np, 0); if (cd->irq == NO_IRQ) { - pr_debug("%s: Cannot find interrupt.\n", np->full_name); + pr_debug("%pOF: Cannot find interrupt.\n", np); iounmap(timer); goto out; } @@ -229,7 +229,7 @@ void __init timer64_init(void) dscr_set_devstate(timer64_devstate_id, DSCR_DEVSTATE_ENABLED); } - pr_debug("%s: Timer irq=%d.\n", np->full_name, cd->irq); + pr_debug("%pOF: Timer irq=%d.\n", np, cd->irq); clockevents_calc_mult_shift(cd, c6x_core_freq / TIMER_DIVISOR, 5); From 50b4d485528d1dbe0bd249f2073140e3444f4a7b Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 24 Aug 2017 01:57:56 +0200 Subject: [PATCH 67/98] bsg-lib: fix kernel panic resulting from missing allocation of reply-buffer Since we split the scsi_request out of struct request bsg fails to provide a reply-buffer for the drivers. This was done via the pointer for sense-data, that is not preallocated anymore. Failing to allocate/assign it results in illegal dereferences because LLDs use this pointer unquestioned. An example panic on s390x, using the zFCP driver, looks like this (I had debugging on, otherwise NULL-pointer dereferences wouldn't even panic on s390x): Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6403 Fault in home space mode while using kernel ASCE. AS:0000000001590007 R3:0000000000000024 Oops: 0038 ilc:2 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.12.0-bsg-regression+ #3 Hardware name: IBM 2964 N96 702 (z/VM 6.4.0) task: 0000000065cb0100 task.stack: 0000000065cb4000 Krnl PSW : 0704e00180000000 000003ff801e4156 (zfcp_fc_ct_els_job_handler+0x16/0x58 [zfcp]) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 0000000000000001 000000005fa9d0d0 000000005fa9d078 0000000000e16866 000003ff00000290 6b6b6b6b6b6b6b6b 0000000059f78f00 000000000000000f 00000000593a0958 00000000593a0958 0000000060d88800 000000005ddd4c38 0000000058b50100 07000000659cba08 000003ff801e8556 00000000659cb9a8 Krnl Code: 000003ff801e4146: e31020500004 lg %r1,80(%r2) 000003ff801e414c: 58402040 l %r4,64(%r2) #000003ff801e4150: e35020200004 lg %r5,32(%r2) >000003ff801e4156: 50405004 st %r4,4(%r5) 000003ff801e415a: e54c50080000 mvhi 8(%r5),0 000003ff801e4160: e33010280012 lt %r3,40(%r1) 000003ff801e4166: a718fffb lhi %r1,-5 000003ff801e416a: 1803 lr %r0,%r3 Call Trace: ([<000003ff801e8556>] zfcp_fsf_req_complete+0x726/0x768 [zfcp]) [<000003ff801ea82a>] zfcp_fsf_reqid_check+0x102/0x180 [zfcp] [<000003ff801eb980>] zfcp_qdio_int_resp+0x230/0x278 [zfcp] [<00000000009b91b6>] qdio_kick_handler+0x2ae/0x2c8 [<00000000009b9e3e>] __tiqdio_inbound_processing+0x406/0xc10 [<00000000001684c2>] tasklet_action+0x15a/0x1d8 [<0000000000bd28ec>] __do_softirq+0x3ec/0x848 [<00000000001675a4>] irq_exit+0x74/0xf8 [<000000000010dd6a>] do_IRQ+0xba/0xf0 [<0000000000bd19e8>] io_int_handler+0x104/0x2d4 [<00000000001033b6>] enabled_wait+0xb6/0x188 ([<000000000010339e>] enabled_wait+0x9e/0x188) [<000000000010396a>] arch_cpu_idle+0x32/0x50 [<0000000000bd0112>] default_idle_call+0x52/0x68 [<00000000001cd0fa>] do_idle+0x102/0x188 [<00000000001cd41e>] cpu_startup_entry+0x3e/0x48 [<0000000000118c64>] smp_start_secondary+0x11c/0x130 [<0000000000bd2016>] restart_int_handler+0x62/0x78 [<0000000000000000>] (null) INFO: lockdep is turned off. Last Breaking-Event-Address: [<000003ff801e41d6>] zfcp_fc_ct_job_handler+0x3e/0x48 [zfcp] Kernel panic - not syncing: Fatal exception in interrupt This patch moves bsg-lib to allocate and setup struct bsg_job ahead of time, including the allocation of a buffer for the reply-data. This means, struct bsg_job is not allocated separately anymore, but as part of struct request allocation - similar to struct scsi_cmd. Reflect this in the function names that used to handle creation/destruction of struct bsg_job. Reported-by: Steffen Maier Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Benjamin Block Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request") Cc: #4.11+ Signed-off-by: Jens Axboe --- block/bsg-lib.c | 74 ++++++++++++++++++++++++----------------- include/linux/blkdev.h | 1 - include/linux/bsg-lib.h | 2 ++ 3 files changed, 46 insertions(+), 31 deletions(-) diff --git a/block/bsg-lib.c b/block/bsg-lib.c index c4513b23f57a..dd56d7460cb9 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -29,26 +29,25 @@ #include /** - * bsg_destroy_job - routine to teardown/delete a bsg job + * bsg_teardown_job - routine to teardown a bsg job * @job: bsg_job that is to be torn down */ -static void bsg_destroy_job(struct kref *kref) +static void bsg_teardown_job(struct kref *kref) { struct bsg_job *job = container_of(kref, struct bsg_job, kref); struct request *rq = job->req; - blk_end_request_all(rq, BLK_STS_OK); - put_device(job->dev); /* release reference for the request */ kfree(job->request_payload.sg_list); kfree(job->reply_payload.sg_list); - kfree(job); + + blk_end_request_all(rq, BLK_STS_OK); } void bsg_job_put(struct bsg_job *job) { - kref_put(&job->kref, bsg_destroy_job); + kref_put(&job->kref, bsg_teardown_job); } EXPORT_SYMBOL_GPL(bsg_job_put); @@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done); */ static void bsg_softirq_done(struct request *rq) { - struct bsg_job *job = rq->special; + struct bsg_job *job = blk_mq_rq_to_pdu(rq); bsg_job_put(job); } @@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) } /** - * bsg_create_job - create the bsg_job structure for the bsg request + * bsg_prepare_job - create the bsg_job structure for the bsg request * @dev: device that is being sent the bsg request * @req: BSG request that needs a job structure */ -static int bsg_create_job(struct device *dev, struct request *req) +static int bsg_prepare_job(struct device *dev, struct request *req) { struct request *rsp = req->next_rq; - struct request_queue *q = req->q; struct scsi_request *rq = scsi_req(req); - struct bsg_job *job; + struct bsg_job *job = blk_mq_rq_to_pdu(req); int ret; - BUG_ON(req->special); - - job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); - if (!job) - return -ENOMEM; - - req->special = job; - job->req = req; - if (q->bsg_job_size) - job->dd_data = (void *)&job[1]; job->request = rq->cmd; job->request_len = rq->cmd_len; - job->reply = rq->sense; - job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer - * allocated */ + if (req->bio) { ret = bsg_map_buffer(&job->request_payload, req); if (ret) @@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q) { struct device *dev = q->queuedata; struct request *req; - struct bsg_job *job; int ret; if (!get_device(dev)) @@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q) break; spin_unlock_irq(q->queue_lock); - ret = bsg_create_job(dev, req); + ret = bsg_prepare_job(dev, req); if (ret) { scsi_req(req)->result = ret; blk_end_request_all(req, BLK_STS_OK); @@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q) continue; } - job = req->special; - ret = q->bsg_job_fn(job); + ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req)); spin_lock_irq(q->queue_lock); if (ret) break; @@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q) spin_lock_irq(q->queue_lock); } +static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + memset(job, 0, sizeof(*job)); + + scsi_req_init(sreq); + sreq->sense_len = SCSI_SENSE_BUFFERSIZE; + sreq->sense = kzalloc(sreq->sense_len, gfp); + if (!sreq->sense) + return -ENOMEM; + + job->req = req; + job->reply = sreq->sense; + job->reply_len = sreq->sense_len; + job->dd_data = job + 1; + + return 0; +} + +static void bsg_exit_rq(struct request_queue *q, struct request *req) +{ + struct bsg_job *job = blk_mq_rq_to_pdu(req); + struct scsi_request *sreq = &job->sreq; + + kfree(sreq->sense); +} + /** * bsg_setup_queue - Create and add the bsg hooks so we can receive requests * @dev: device to attach bsg device to @@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, q = blk_alloc_queue(GFP_KERNEL); if (!q) return ERR_PTR(-ENOMEM); - q->cmd_size = sizeof(struct scsi_request); + q->cmd_size = sizeof(struct bsg_job) + dd_job_size; + q->init_rq_fn = bsg_init_rq; + q->exit_rq_fn = bsg_exit_rq; q->request_fn = bsg_request_fn; ret = blk_init_allocated_queue(q); @@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name, goto out_cleanup_queue; q->queuedata = dev; - q->bsg_job_size = dd_job_size; q->bsg_job_fn = job_fn; queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f6a0cb27d3..2a5d52fa90f5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -568,7 +568,6 @@ struct request_queue { #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; - int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index e34dde2da0ef..637a20cfb237 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -24,6 +24,7 @@ #define _BLK_BSG_ #include +#include struct request; struct device; @@ -37,6 +38,7 @@ struct bsg_buffer { }; struct bsg_job { + struct scsi_request sreq; struct device *dev; struct request *req; From 9ce76511b67be8fbcdff36b7e1662e3887bb7377 Mon Sep 17 00:00:00 2001 From: Tom Rini Date: Tue, 22 Aug 2017 21:51:46 -0400 Subject: [PATCH 68/98] ASoC: rt5677: Reintroduce I2C device IDs Not all devices with ACPI and this combination of sound devices will have the required information provided via ACPI. Reintroduce the I2C device ID to restore sound functionality on on the Chromebook 'Samus' model. [ More background note: the commit a36afb0ab648 ("ASoC: rt5677: Introduce proper table...") moved the i2c ID probed via ACPI ("RT5677CE:00") to a proper acpi_device_id table. Although the action itself is correct per se, the overseen issue is the reference id->driver_data at rt5677_i2c_probe() for retrieving the corresponding chip model for the given id. Since id=NULL is passed for ACPI matching case, we get an Oops now. We already have queued more fixes for 4.14 and they already address the issue, but they are bigger changes that aren't preferable for the late 4.13-rc stage. So, this patch just papers over the bug as a once-off quick fix for a particular ACPI matching. -- tiwai ] Fixes: a36afb0ab648 ("ASoC: rt5677: Introduce proper table for ACPI enumeration") Signed-off-by: Tom Rini Acked-by: Mark Brown Signed-off-by: Takashi Iwai --- sound/soc/codecs/rt5677.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 36e530a36c82..6f629278d982 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -5021,6 +5021,7 @@ static const struct regmap_config rt5677_regmap = { static const struct i2c_device_id rt5677_i2c_id[] = { { "rt5677", RT5677 }, { "rt5676", RT5676 }, + { "RT5677CE:00", RT5677 }, { } }; MODULE_DEVICE_TABLE(i2c, rt5677_i2c_id); From fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Aug 2017 11:12:19 -0400 Subject: [PATCH 69/98] nfsd: Limit end of page list when decoding NFSv4 WRITE When processing an NFSv4 WRITE operation, argp->end should never point past the end of the data in the final page of the page list. Otherwise, nfsd4_decode_compound can walk into uninitialized memory. More critical, nfsd4_decode_write is failing to increment argp->pagelen when it increments argp->pagelist. This can cause later xdr decoders to assume more data is available than really is, which can cause server crashes on malformed requests. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20fbcab97753..5f940d2a136b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -144,7 +144,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); + argp->end = argp->p + XDR_QUADLEN(argp->pagelen); argp->pagelen = 0; } else { argp->end = argp->p + (PAGE_SIZE>>2); @@ -1279,9 +1279,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) argp->pagelen -= pages * PAGE_SIZE; len -= pages * PAGE_SIZE; - argp->p = (__be32 *)page_address(argp->pagelist[0]); - argp->pagelist++; - argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); + next_decode_page(argp); } argp->p += XDR_QUADLEN(len); From eebe53e87f97975ee58a21693e44797608bf679c Mon Sep 17 00:00:00 2001 From: Vadim Lomovtsev Date: Mon, 21 Aug 2017 07:23:07 -0400 Subject: [PATCH 70/98] net: sunrpc: svcsock: fix NULL-pointer exception While running nfs/connectathon tests kernel NULL-pointer exception has been observed due to races in svcsock.c. Race is appear when kernel accepts connection by kernel_accept (which creates new socket) and start queuing ingress packets to new socket. This happens in ksoftirq context which could run concurrently on a different core while new socket setup is not done yet. The fix is to re-order socket user data init sequence and add write/read barrier calls to be sure that we got proper values for callback pointers before actually calling them. Test results: nfs/connectathon reports '0' failed tests for about 200+ iterations. Crash log: ---<-snip->--- [ 6708.638984] Unable to handle kernel NULL pointer dereference at virtual address 00000000 [ 6708.647093] pgd = ffff0000094e0000 [ 6708.650497] [00000000] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff80003, *pte=0000000000000000 [ 6708.660761] Internal error: Oops: 86000005 [#1] SMP [ 6708.665630] Modules linked in: nfsv3 nfnetlink_queue nfnetlink_log nfnetlink rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache overlay xt_CONNSECMARK xt_SECMARK xt_conntrack iptable_security ip_tables ah4 xfrm4_mode_transport sctp tun binfmt_misc ext4 jbd2 mbcache loop tcp_diag udp_diag inet_diag rpcrdma ib_isert iscsi_target_mod ib_iser rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_ucm ib_uverbs ib_umad ib_cm ib_core nls_koi8_u nls_cp932 ts_kmp nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack vfat fat ghash_ce sha2_ce sha1_ce cavium_rng_vf i2c_thunderx sg thunderx_edac i2c_smbus edac_core cavium_rng nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c nicvf nicpf ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops [ 6708.736446] ttm drm i2c_core thunder_bgx thunder_xcv mdio_thunder mdio_cavium dm_mirror dm_region_hash dm_log dm_mod [last unloaded: stap_3c300909c5b3f46dcacd49aab3334af_87021] [ 6708.752275] CPU: 84 PID: 0 Comm: swapper/84 Tainted: G W OE 4.11.0-4.el7.aarch64 #1 [ 6708.760787] Hardware name: www.cavium.com CRB-2S/CRB-2S, BIOS 0.3 Mar 13 2017 [ 6708.767910] task: ffff810006842e80 task.stack: ffff81000689c000 [ 6708.773822] PC is at 0x0 [ 6708.776739] LR is at svc_data_ready+0x38/0x88 [sunrpc] [ 6708.781866] pc : [<0000000000000000>] lr : [] pstate: 60000145 [ 6708.789248] sp : ffff810ffbad3900 [ 6708.792551] x29: ffff810ffbad3900 x28: ffff000008c73d58 [ 6708.797853] x27: 0000000000000000 x26: ffff81000bbe1e00 [ 6708.803156] x25: 0000000000000020 x24: ffff800f7410bf28 [ 6708.808458] x23: ffff000008c63000 x22: ffff000008c63000 [ 6708.813760] x21: ffff800f7410bf28 x20: ffff81000bbe1e00 [ 6708.819063] x19: ffff810012412400 x18: 00000000d82a9df2 [ 6708.824365] x17: 0000000000000000 x16: 0000000000000000 [ 6708.829667] x15: 0000000000000000 x14: 0000000000000001 [ 6708.834969] x13: 0000000000000000 x12: 722e736f622e676e [ 6708.840271] x11: 00000000f814dd99 x10: 0000000000000000 [ 6708.845573] x9 : 7374687225000000 x8 : 0000000000000000 [ 6708.850875] x7 : 0000000000000000 x6 : 0000000000000000 [ 6708.856177] x5 : 0000000000000028 x4 : 0000000000000000 [ 6708.861479] x3 : 0000000000000000 x2 : 00000000e5000000 [ 6708.866781] x1 : 0000000000000000 x0 : ffff81000bbe1e00 [ 6708.872084] [ 6708.873565] Process swapper/84 (pid: 0, stack limit = 0xffff81000689c000) [ 6708.880341] Stack: (0xffff810ffbad3900 to 0xffff8100068a0000) [ 6708.886075] Call trace: [ 6708.888513] Exception stack(0xffff810ffbad3710 to 0xffff810ffbad3840) [ 6708.894942] 3700: ffff810012412400 0001000000000000 [ 6708.902759] 3720: ffff810ffbad3900 0000000000000000 0000000060000145 ffff800f79300000 [ 6708.910577] 3740: ffff000009274d00 00000000000003ea 0000000000000015 ffff000008c63000 [ 6708.918395] 3760: ffff810ffbad3830 ffff800f79300000 000000000000004d 0000000000000000 [ 6708.926212] 3780: ffff810ffbad3890 ffff0000080f88dc ffff800f79300000 000000000000004d [ 6708.934030] 37a0: ffff800f7930093c ffff000008c63000 0000000000000000 0000000000000140 [ 6708.941848] 37c0: ffff000008c2c000 0000000000040b00 ffff81000bbe1e00 0000000000000000 [ 6708.949665] 37e0: 00000000e5000000 0000000000000000 0000000000000000 0000000000000028 [ 6708.957483] 3800: 0000000000000000 0000000000000000 0000000000000000 7374687225000000 [ 6708.965300] 3820: 0000000000000000 00000000f814dd99 722e736f622e676e 0000000000000000 [ 6708.973117] [< (null)>] (null) [ 6708.977824] [] tcp_data_queue+0x754/0xc5c [ 6708.983386] [] tcp_rcv_established+0x1a0/0x67c [ 6708.989384] [] tcp_v4_do_rcv+0x15c/0x22c [ 6708.994858] [] tcp_v4_rcv+0xaf0/0xb58 [ 6709.000077] [] ip_local_deliver_finish+0x10c/0x254 [ 6709.006419] [] ip_local_deliver+0xf0/0xfc [ 6709.011980] [] ip_rcv_finish+0x208/0x3a4 [ 6709.017454] [] ip_rcv+0x2dc/0x3c8 [ 6709.022328] [] __netif_receive_skb_core+0x2f8/0xa0c [ 6709.028758] [] __netif_receive_skb+0x38/0x84 [ 6709.034580] [] netif_receive_skb_internal+0x68/0xdc [ 6709.041010] [] napi_gro_receive+0xcc/0x1a8 [ 6709.046690] [] nicvf_cq_intr_handler+0x59c/0x730 [nicvf] [ 6709.053559] [] nicvf_poll+0x38/0xb8 [nicvf] [ 6709.059295] [] net_rx_action+0x2f8/0x464 [ 6709.064771] [] __do_softirq+0x11c/0x308 [ 6709.070164] [] irq_exit+0x12c/0x174 [ 6709.075206] [] __handle_domain_irq+0x78/0xc4 [ 6709.081027] [] gic_handle_irq+0x94/0x190 [ 6709.086501] Exception stack(0xffff81000689fdf0 to 0xffff81000689ff20) [ 6709.092929] fde0: 0000810ff2ec0000 ffff000008c10000 [ 6709.100747] fe00: ffff000008c70ef4 0000000000000001 0000000000000000 ffff810ffbad9b18 [ 6709.108565] fe20: ffff810ffbad9c70 ffff8100169d3800 ffff810006843ab0 ffff81000689fe80 [ 6709.116382] fe40: 0000000000000bd0 0000ffffdf979cd0 183f5913da192500 0000ffff8a254ce4 [ 6709.124200] fe60: 0000ffff8a254b78 0000aaab10339808 0000000000000000 0000ffff8a0c2a50 [ 6709.132018] fe80: 0000ffffdf979b10 ffff000008d6d450 ffff000008c10000 ffff000008d6d000 [ 6709.139836] fea0: 0000000000000054 ffff000008cd3dbc 0000000000000000 0000000000000000 [ 6709.147653] fec0: 0000000000000000 0000000000000000 0000000000000000 ffff81000689ff20 [ 6709.155471] fee0: ffff000008085240 ffff81000689ff20 ffff000008085244 0000000060000145 [ 6709.163289] ff00: ffff81000689ff10 ffff00000813f1e4 ffffffffffffffff ffff00000813f238 [ 6709.171107] [] el1_irq+0xb4/0x140 [ 6709.175976] [] arch_cpu_idle+0x44/0x11c [ 6709.181368] [] default_idle_call+0x20/0x30 [ 6709.187020] [] do_idle+0x158/0x1e4 [ 6709.191973] [] cpu_startup_entry+0x2c/0x30 [ 6709.197624] [] secondary_start_kernel+0x13c/0x160 [ 6709.203878] [<0000000001bc71c4>] 0x1bc71c4 [ 6709.207967] Code: bad PC value [ 6709.211061] SMP: stopping secondary CPUs [ 6709.218830] Starting crashdump kernel... [ 6709.222749] Bye! ---<-snip>--- Signed-off-by: Vadim Lomovtsev Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2b720fa35c4f..e18500151236 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -421,6 +421,9 @@ static void svc_data_ready(struct sock *sk) dprintk("svc: socket %p(inet %p), busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); @@ -437,6 +440,9 @@ static void svc_write_space(struct sock *sk) if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk) dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); - if (svsk) + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); + } + /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct sock *sk) if (!svsk) printk("svc: socket %p: no user data\n", sk); else { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_ostate(sk); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return ERR_PTR(err); } - inet->sk_user_data = svsk; svsk->sk_sock = sock; svsk->sk_sk = inet; svsk->sk_ostate = inet->sk_state_change; svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; + /* + * This barrier is necessary in order to prevent race condition + * with svc_data_ready(), svc_listen_data_ready() and others + * when calling callbacks above. + */ + wmb(); + inet->sk_user_data = svsk; /* Initialize the socket */ if (sock->type == SOCK_DGRAM) From 4a646580f793d19717f7e034c8d473b509c27d49 Mon Sep 17 00:00:00 2001 From: Masaki Ota Date: Thu, 24 Aug 2017 15:44:36 -0700 Subject: [PATCH 71/98] Input: ALPS - fix two-finger scroll breakage in right side on ALPS touchpad Fixed the issue that two finger scroll does not work correctly on V8 protocol. The cause is that V8 protocol X-coordinate decode is wrong at SS4 PLUS device. I added SS4 PLUS X decode definition. Mote notes: the problem manifests itself by the commit e7348396c6d5 ("Input: ALPS - fix V8+ protocol handling (73 03 28)"), where a fix for the V8+ protocol was applied. Although the culprit must have been present beforehand, the two-finger scroll worked casually even with the wrongly reported values by some reason. It got broken by the commit above just because it changed x_max value, and this made libinput correctly figuring the MT events. Since the X coord is reported as falsely doubled, the events on the right-half side go outside the boundary, thus they are no longer handled. This resulted as a broken two-finger scroll. One finger event is decoded differently, and it didn't suffer from this problem. The problem was only about MT events. --tiwai Fixes: e7348396c6d5 ("Input: ALPS - fix V8+ protocol handling (73 03 28)") Signed-off-by: Masaki Ota Tested-by: Takashi Iwai Tested-by: Paul Donohue Cc: Signed-off-by: Takashi Iwai Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/alps.c | 41 ++++++++++++++++++++++++++++---------- drivers/input/mouse/alps.h | 8 ++++++++ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 262d1057c1da..850b00e3ad8e 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -1215,14 +1215,24 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_TWO: if (priv->flags & ALPS_BUTTONPAD) { - f->mt[0].x = SS4_BTL_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_BTL_MF_X_V2(p, 1); + } f->mt[0].y = SS4_BTL_MF_Y_V2(p, 0); - f->mt[1].x = SS4_BTL_MF_X_V2(p, 1); f->mt[1].y = SS4_BTL_MF_Y_V2(p, 1); } else { - f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + } f->mt[0].y = SS4_STD_MF_Y_V2(p, 0); - f->mt[1].x = SS4_STD_MF_X_V2(p, 1); f->mt[1].y = SS4_STD_MF_Y_V2(p, 1); } f->pressure = SS4_MF_Z_V2(p, 0) ? 0x30 : 0; @@ -1239,16 +1249,27 @@ static int alps_decode_ss4_v2(struct alps_fields *f, case SS4_PACKET_ID_MULTI: if (priv->flags & ALPS_BUTTONPAD) { - f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1); + } else { + f->mt[2].x = SS4_BTL_MF_X_V2(p, 0); + f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); + } + f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0); - f->mt[3].x = SS4_BTL_MF_X_V2(p, 1); f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1); no_data_x = SS4_MFPACKET_NO_AX_BL; no_data_y = SS4_MFPACKET_NO_AY_BL; } else { - f->mt[2].x = SS4_STD_MF_X_V2(p, 0); + if (IS_SS4PLUS_DEV(priv->dev_id)) { + f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1); + } else { + f->mt[0].x = SS4_STD_MF_X_V2(p, 0); + f->mt[1].x = SS4_STD_MF_X_V2(p, 1); + } f->mt[2].y = SS4_STD_MF_Y_V2(p, 0); - f->mt[3].x = SS4_STD_MF_X_V2(p, 1); f->mt[3].y = SS4_STD_MF_Y_V2(p, 1); no_data_x = SS4_MFPACKET_NO_AX; no_data_y = SS4_MFPACKET_NO_AY; @@ -2541,8 +2562,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse *psmouse, memset(otp, 0, sizeof(otp)); - if (alps_get_otp_values_ss4_v2(psmouse, 0, &otp[0][0]) || - alps_get_otp_values_ss4_v2(psmouse, 1, &otp[1][0])) + if (alps_get_otp_values_ss4_v2(psmouse, 1, &otp[1][0]) || + alps_get_otp_values_ss4_v2(psmouse, 0, &otp[0][0])) return -1; alps_update_device_area_ss4_v2(otp, priv); diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h index ed2d6879fa52..c80a7c76cb76 100644 --- a/drivers/input/mouse/alps.h +++ b/drivers/input/mouse/alps.h @@ -100,6 +100,10 @@ enum SS4_PACKET_ID { ((_b[1 + _i * 3] << 5) & 0x1F00) \ ) +#define SS4_PLUS_STD_MF_X_V2(_b, _i) (((_b[0 + (_i) * 3] << 4) & 0x0070) | \ + ((_b[1 + (_i) * 3] << 4) & 0x0F80) \ + ) + #define SS4_STD_MF_Y_V2(_b, _i) (((_b[1 + (_i) * 3] << 3) & 0x0010) | \ ((_b[2 + (_i) * 3] << 5) & 0x01E0) | \ ((_b[2 + (_i) * 3] << 4) & 0x0E00) \ @@ -109,6 +113,10 @@ enum SS4_PACKET_ID { ((_b[0 + (_i) * 3] >> 3) & 0x0010) \ ) +#define SS4_PLUS_BTL_MF_X_V2(_b, _i) (SS4_PLUS_STD_MF_X_V2(_b, _i) | \ + ((_b[0 + (_i) * 3] >> 4) & 0x0008) \ + ) + #define SS4_BTL_MF_Y_V2(_b, _i) (SS4_STD_MF_Y_V2(_b, _i) | \ ((_b[0 + (_i) * 3] >> 3) & 0x0008) \ ) From b974696da1cfc5aa0c29ed97dc8f6c239899e64b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 23 Aug 2017 09:03:04 +0200 Subject: [PATCH 72/98] mtd: nandsim: remove debugfs entries in error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The debugfs entries must be removed before an error is returned in the probe function. Otherwise another try to load the module fails and when the debugfs files are accessed without the module loaded, the kernel still tries to call a function in that module. Fixes: 5346c27c5fed ("mtd: nandsim: Introduce debugfs infrastructure") Signed-off-by: Uwe Kleine-König Reviewed-by: Richard Weinberger Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- drivers/mtd/nand/nandsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 03a0d057bf2f..e4211c3cc49b 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -2373,6 +2373,7 @@ static int __init ns_init_module(void) return 0; err_exit: + nandsim_debugfs_remove(nand); free_nandsim(nand); nand_release(nsmtd); for (i = 0;i < ARRAY_SIZE(nand->partitions); ++i) From be3e83e3471cd0faff2c2d88fe9cfc73d9a9745a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 23 Aug 2017 20:45:01 +0200 Subject: [PATCH 73/98] mtd: nand: atmel: Relax tADL_min constraint Version 4 of the ONFI spec mandates that tADL be at least 400 nanoseconds, but, depending on the master clock rate, 400 ns may not fit in the tADL field of the SMC reg. We need to relax the check and accept the -ERANGE return code. Note that previous versions of the ONFI spec had a lower tADL_min (100 or 200 ns). It's not clear why this timing constraint got increased but it seems most NANDs are fine with values lower than 400ns, so we should be safe. Fixes: f9ce2eddf176 ("mtd: nand: atmel: Add ->setup_data_interface() hooks") Signed-off-by: Boris Brezillon Tested-by: Quentin Schulz Signed-off-by: Brian Norris --- drivers/mtd/nand/atmel/nand-controller.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 2c8baa0c2c4e..ceec21bd30c4 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -1364,7 +1364,18 @@ static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand, ret = atmel_smc_cs_conf_set_timing(smcconf, ATMEL_HSMC_TIMINGS_TADL_SHIFT, ncycles); - if (ret) + /* + * Version 4 of the ONFI spec mandates that tADL be at least 400 + * nanoseconds, but, depending on the master clock rate, 400 ns may not + * fit in the tADL field of the SMC reg. We need to relax the check and + * accept the -ERANGE return code. + * + * Note that previous versions of the ONFI spec had a lower tADL_min + * (100 or 200 ns). It's not clear why this timing constraint got + * increased but it seems most NANDs are fine with values lower than + * 400ns, so we should be safe. + */ + if (ret && ret != -ERANGE) return ret; ncycles = DIV_ROUND_UP(conf->timings.sdr.tAR_min, mckperiodps); From c469268cd523245cc58255f6696e0c295485cb0b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 24 Aug 2017 11:59:31 +0200 Subject: [PATCH 74/98] KVM: x86: block guest protection keys unless the host has them enabled If the host has protection keys disabled, we cannot read and write the guest PKRU---RDPKRU and WRPKRU fail with #GP(0) if CR4.PKE=0. Block the PKU cpuid bit in that case. This ensures that guest_CR4.PKE=1 implies host_CR4.PKE=1. Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59ca2eea522c..19adbb418443 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -469,7 +469,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; cpuid_mask(&entry->ecx, CPUID_7_ECX); /* PKU is not yet implemented for shadow paging. */ - if (!tdp_enabled) + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); From b9dd21e104bcd45e124acfe978a79df71259e59b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 23 Aug 2017 23:14:38 +0200 Subject: [PATCH 75/98] KVM: x86: simplify handling of PKRU Move it to struct kvm_arch_vcpu, replacing guest_pkru_valid with a simple comparison against the host value of the register. The write of PKRU in addition can be skipped if the guest has not enabled the feature. Once we do this, we need not test OSPKE in the host anymore, because guest_CR4.PKE=1 implies host_CR4.PKE=1. The static PKU test is kept to elide the code on older CPUs. Suggested-by: Yang Zhang Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/kvm_cache_regs.h | 5 ----- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/svm.c | 7 ------- arch/x86/kvm/vmx.c | 25 ++++++++----------------- 5 files changed, 10 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 87ac4fba6d8e..f4d120a3e22e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -492,6 +492,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 pkru; u32 hflags; u64 efer; u64 apic_base; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 762cdf2595f9..e1e89ee4af75 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); } -static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu) -{ - return kvm_x86_ops->get_pkru(vcpu); -} - static inline void enter_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags |= HF_GUEST_MASK; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index d7d248a000dd..4b9a3ae6b725 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -185,7 +185,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, * index of the protection domain, so pte_pkey * 2 is * is the index of the first bit for the domain. */ - pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; + pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ offset = (pfec & ~1) + diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 56ba05312759..af256b786a70 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1777,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } -static u32 svm_get_pkru(struct kvm_vcpu *vcpu) -{ - return 0; -} - static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -5413,8 +5408,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .get_pkru = svm_get_pkru, - .tlb_flush = svm_flush_tlb, .run = svm_vcpu_run, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9b21b1223035..c6ef2940119b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -636,8 +636,6 @@ struct vcpu_vmx { u64 current_tsc_ratio; - bool guest_pkru_valid; - u32 guest_pkru; u32 host_pkru; /* @@ -2383,11 +2381,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_vmx(vcpu)->emulation_required = emulation_required(vcpu); } -static u32 vmx_get_pkru(struct kvm_vcpu *vcpu) -{ - return to_vmx(vcpu)->guest_pkru; -} - static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -9020,8 +9013,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vmx_set_interrupt_shadow(vcpu, 0); - if (vmx->guest_pkru_valid) - __write_pkru(vmx->guest_pkru); + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && + vcpu->arch.pkru != vmx->host_pkru) + __write_pkru(vcpu->arch.pkru); atomic_switch_perf_msrs(vmx); debugctlmsr = get_debugctlmsr(); @@ -9169,13 +9164,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * back on host, so it is safe to read guest PKRU from current * XSAVE. */ - if (boot_cpu_has(X86_FEATURE_OSPKE)) { - vmx->guest_pkru = __read_pkru(); - if (vmx->guest_pkru != vmx->host_pkru) { - vmx->guest_pkru_valid = true; + if (static_cpu_has(X86_FEATURE_PKU) && + kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { + vcpu->arch.pkru = __read_pkru(); + if (vcpu->arch.pkru != vmx->host_pkru) __write_pkru(vmx->host_pkru); - } else - vmx->guest_pkru_valid = false; } /* @@ -11682,8 +11675,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, - .get_pkru = vmx_get_pkru, - .tlb_flush = vmx_flush_tlb, .run = vmx_vcpu_run, From 38cfd5e3df9c4f88e76b547eee2087ee5c042ae2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 23 Aug 2017 23:16:29 +0200 Subject: [PATCH 76/98] KVM, pkeys: do not use PKRU value in vcpu->arch.guest_fpu.state The host pkru is restored right after vcpu exit (commit 1be0e61), so KVM_GET_XSAVE will return the host PKRU value instead. Fix this by using the guest PKRU explicitly in fill_xsave and load_xsave. This part is based on a patch by Junkang Fu. The host PKRU data may also not match the value in vcpu->arch.guest_fpu.state, because it could have been changed by userspace since the last time it was saved, so skip loading it in kvm_load_guest_fpu. Reported-by: Junkang Fu Cc: Yang Zhang Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870 Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/fpu/internal.h | 6 +++--- arch/x86/kvm/x86.c | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 255645f60ca2..554cdb205d17 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) +static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask) { if (use_xsave()) { - copy_kernel_to_xregs(&fpstate->xsave, -1); + copy_kernel_to_xregs(&fpstate->xsave, mask); } else { if (use_fxsr()) copy_kernel_to_fxregs(&fpstate->fxsave); @@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) : : [addr] "m" (fpstate)); } - __copy_kernel_to_fpregs(fpstate); + __copy_kernel_to_fpregs(fpstate, -1); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d734aa8c5b4f..05a5e57c6f39 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3245,7 +3245,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) u32 size, offset, ecx, edx; cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); - memcpy(dest + offset, src, size); + if (feature == XFEATURE_MASK_PKRU) + memcpy(dest + offset, &vcpu->arch.pkru, + sizeof(vcpu->arch.pkru)); + else + memcpy(dest + offset, src, size); + } valid -= feature; @@ -3283,7 +3288,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) u32 size, offset, ecx, edx; cpuid_count(XSTATE_CPUID, index, &size, &offset, &ecx, &edx); - memcpy(dest, src + offset, size); + if (feature == XFEATURE_MASK_PKRU) + memcpy(&vcpu->arch.pkru, src + offset, + sizeof(vcpu->arch.pkru)); + else + memcpy(dest, src + offset, size); } valid -= feature; @@ -7633,7 +7642,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); + /* PKRU is separately restored in kvm_x86_ops->run. */ + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state, + ~XFEATURE_MASK_PKRU); trace_kvm_fpu(1); } From ccd5b3235180eef3cfec337df1c8554ab151b5cc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 24 Aug 2017 10:50:29 -0700 Subject: [PATCH 77/98] x86/mm: Fix use-after-free of ldt_struct The following commit: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") renamed init_new_context() to init_new_context_ldt() and added a new init_new_context() which calls init_new_context_ldt(). However, the error code of init_new_context_ldt() was ignored. Consequently, if a memory allocation in alloc_ldt_struct() failed during a fork(), the ->context.ldt of the new task remained the same as that of the old task (due to the memcpy() in dup_mm()). ldt_struct's are not intended to be shared, so a use-after-free occurred after one task exited. Fix the bug by making init_new_context() pass through the error code of init_new_context_ldt(). This bug was found by syzkaller, which encountered the following splat: BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710 CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 print_address_description+0x73/0x250 mm/kasan/report.c:252 kasan_report_error mm/kasan/report.c:351 [inline] kasan_report+0x24e/0x340 mm/kasan/report.c:409 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429 free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116 free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] __mmdrop+0xe9/0x530 kernel/fork.c:889 mmdrop include/linux/sched/mm.h:42 [inline] exec_mmap fs/exec.c:1061 [inline] flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291 load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855 search_binary_handler+0x142/0x6b0 fs/exec.c:1652 exec_binprm fs/exec.c:1694 [inline] do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816 do_execve+0x31/0x40 fs/exec.c:1860 call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100 ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431 Allocated by task 3700: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627 kmalloc include/linux/slab.h:493 [inline] alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67 write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277 sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307 entry_SYSCALL_64_fastpath+0x1f/0xbe Freed by task 3700: save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:447 set_track mm/kasan/kasan.c:459 [inline] kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 __cache_free mm/slab.c:3503 [inline] kfree+0xca/0x250 mm/slab.c:3820 free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121 free_ldt_struct arch/x86/kernel/ldt.c:173 [inline] destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171 destroy_context arch/x86/include/asm/mmu_context.h:157 [inline] __mmdrop+0xe9/0x530 kernel/fork.c:889 mmdrop include/linux/sched/mm.h:42 [inline] __mmput kernel/fork.c:916 [inline] mmput+0x541/0x6e0 kernel/fork.c:927 copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931 copy_process kernel/fork.c:1546 [inline] _do_fork+0x1ef/0xfb0 kernel/fork.c:2025 SYSC_clone kernel/fork.c:2135 [inline] SyS_clone+0x37/0x50 kernel/fork.c:2129 do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287 return_from_SYSCALL_64+0x0/0x7a Here is a C reproducer: #include #include #include #include #include #include #include static void *fork_thread(void *_arg) { fork(); } int main(void) { struct user_desc desc = { .entry_number = 8191 }; syscall(__NR_modify_ldt, 1, &desc, sizeof(desc)); for (;;) { if (fork() == 0) { pthread_t t; srand(getpid()); pthread_create(&t, NULL, fork_thread, NULL); usleep(rand() % 10000); syscall(__NR_exit_group, 0); } wait(NULL); } } Note: the reproducer takes advantage of the fact that alloc_ldt_struct() may use vmalloc() to allocate a large ->entries array, and after commit: 5d17a73a2ebe ("vmalloc: back off when the current task is killed") it is possible for userspace to fail a task's vmalloc() by sending a fatal signal, e.g. via exit_group(). It would be more difficult to reproduce this bug on kernels without that commit. This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y. Signed-off-by: Eric Biggers Acked-by: Dave Hansen Cc: [v4.6+] Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Christoph Hellwig Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Michal Hocko Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tetsuo Handa Cc: Thomas Gleixner Cc: linux-mm@kvack.org Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init") Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 265c907d7d4c..7a234be7e298 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -140,9 +140,7 @@ static inline int init_new_context(struct task_struct *tsk, mm->context.execute_only_pkey = -1; } #endif - init_new_context_ldt(tsk, mm); - - return 0; + return init_new_context_ldt(tsk, mm); } static inline void destroy_context(struct mm_struct *mm) { From 47c5310a8dbe7c2cb9f0083daa43ceed76c257fa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 24 Aug 2017 19:14:47 +1000 Subject: [PATCH 78/98] KVM: PPC: Book3S: Fix race and leak in kvm_vm_ioctl_create_spapr_tce() Nixiaoming pointed out that there is a memory leak in kvm_vm_ioctl_create_spapr_tce() if the call to anon_inode_getfd() fails; the memory allocated for the kvmppc_spapr_tce_table struct is not freed, and nor are the pages allocated for the iommu tables. In addition, we have already incremented the process's count of locked memory pages, and this doesn't get restored on error. David Hildenbrand pointed out that there is a race in that the function checks early on that there is not already an entry in the stt->iommu_tables list with the same LIOBN, but an entry with the same LIOBN could get added between then and when the new entry is added to the list. This fixes all three problems. To simplify things, we now call anon_inode_getfd() before placing the new entry in the list. The check for an existing entry is done while holding the kvm->lock mutex, immediately before adding the new entry to the list. Finally, on failure we now call kvmppc_account_memlimit to decrement the process's count of locked memory pages. Reported-by: Nixiaoming Reported-by: David Hildenbrand Signed-off-by: Paul Mackerras Signed-off-by: Paolo Bonzini --- arch/powerpc/kvm/book3s_64_vio.c | 56 +++++++++++++++++++------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index a160c14304eb..53766e2bc029 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args) { struct kvmppc_spapr_tce_table *stt = NULL; + struct kvmppc_spapr_tce_table *siter; unsigned long npages, size; int ret = -ENOMEM; int i; + int fd = -1; if (!args->size) return -EINVAL; - /* Check this LIOBN hasn't been previously allocated */ - list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) { - if (stt->liobn == args->liobn) - return -EBUSY; - } - size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); npages = kvmppc_tce_pages(size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); - if (ret) { - stt = NULL; - goto fail; - } + if (ret) + return ret; ret = -ENOMEM; stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), GFP_KERNEL); if (!stt) - goto fail; + goto fail_acct; stt->liobn = args->liobn; stt->page_shift = args->page_shift; @@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, goto fail; } - kvm_get_kvm(kvm); + ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, + stt, O_RDWR | O_CLOEXEC); + if (ret < 0) + goto fail; mutex_lock(&kvm->lock); - list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + + /* Check this LIOBN hasn't been previously allocated */ + ret = 0; + list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) { + if (siter->liobn == args->liobn) { + ret = -EBUSY; + break; + } + } + + if (!ret) { + list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); + kvm_get_kvm(kvm); + } mutex_unlock(&kvm->lock); - return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, - stt, O_RDWR | O_CLOEXEC); + if (!ret) + return fd; -fail: - if (stt) { - for (i = 0; i < npages; i++) - if (stt->pages[i]) - __free_page(stt->pages[i]); + put_unused_fd(fd); - kfree(stt); - } + fail: + for (i = 0; i < npages; i++) + if (stt->pages[i]) + __free_page(stt->pages[i]); + + kfree(stt); + fail_acct: + kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); return ret; } From 22d538213ec4fa65b08b1edbf610066d8aab7bbb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 18 Aug 2017 15:52:54 -0700 Subject: [PATCH 79/98] blk-mq-debugfs: Add names for recently added flags The symbolic constants QUEUE_FLAG_SCSI_PASSTHROUGH, QUEUE_FLAG_QUIESCED and REQ_NOWAIT are missing from blk-mq-debugfs.c. Add these to blk-mq-debugfs.c such that these appear as names in debugfs instead of as numbers. Reviewed-by: Omar Sandoval Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 9ebc2945f991..4f927a58dff8 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -75,6 +75,8 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(POLL_STATS), QUEUE_FLAG_NAME(REGISTERED), + QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), + QUEUE_FLAG_NAME(QUIESCED), }; #undef QUEUE_FLAG_NAME @@ -265,6 +267,7 @@ static const char *const cmd_flag_name[] = { CMD_FLAG_NAME(RAHEAD), CMD_FLAG_NAME(BACKGROUND), CMD_FLAG_NAME(NOUNMAP), + CMD_FLAG_NAME(NOWAIT), }; #undef CMD_FLAG_NAME From 1046d304900cf9d4b2c730c6860b8e03cc704377 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 26 Jul 2017 15:32:23 +0100 Subject: [PATCH 80/98] virtio_blk: fix incorrect message when disk is resized The message printed on disk resize is incorrect. The following is printed when resizing to 2 GiB: $ truncate -s 1G test.img $ qemu -device virtio-blk-pci,logical_block_size=4096,... (qemu) block_resize drive1 2G virtio_blk virtio0: new size: 4194304 4096-byte logical blocks (17.2 GB/16.0 GiB) The virtio_blk capacity config field is in 512-byte sector units regardless of logical_block_size as per the VIRTIO specification. Therefore the message should read: virtio_blk virtio0: new size: 524288 4096-byte logical blocks (2.15 GB/2.0 GiB) Note that this only affects the printed message. Thankfully the actual block device has the correct size because the block layer expects capacity in sectors. Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1498b899a593..d3d5523862c2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -381,6 +381,7 @@ static void virtblk_config_changed_work(struct work_struct *work) struct request_queue *q = vblk->disk->queue; char cap_str_2[10], cap_str_10[10]; char *envp[] = { "RESIZE=1", NULL }; + unsigned long long nblocks; u64 capacity; /* Host must always specify the capacity. */ @@ -393,16 +394,19 @@ static void virtblk_config_changed_work(struct work_struct *work) capacity = (sector_t)-1; } - string_get_size(capacity, queue_logical_block_size(q), + nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); + + string_get_size(nblocks, queue_logical_block_size(q), STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); - string_get_size(capacity, queue_logical_block_size(q), + string_get_size(nblocks, queue_logical_block_size(q), STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); dev_notice(&vdev->dev, - "new size: %llu %d-byte logical blocks (%s/%s)\n", - (unsigned long long)capacity, - queue_logical_block_size(q), - cap_str_10, cap_str_2); + "new size: %llu %d-byte logical blocks (%s/%s)\n", + nblocks, + queue_logical_block_size(q), + cap_str_10, + cap_str_2); set_capacity(vblk->disk, capacity); revalidate_disk(vblk->disk); From ba74b6f7fcc07355d087af6939712eed4a454821 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 24 Aug 2017 18:07:02 +0200 Subject: [PATCH 81/98] virtio_pci: fix cpu affinity support Commit 0b0f9dc5 ("Revert "virtio_pci: use shared interrupts for virtqueues"") removed the adjustment of the pre_vectors for the virtio MSI-X vector allocation which was added in commit fb5e31d9 ("virtio: allow drivers to request IRQ affinity when creating VQs"). This will lead to an incorrect assignment of MSI-X vectors, and potential deadlocks when offlining cpus. Signed-off-by: Christoph Hellwig Fixes: 0b0f9dc5 ("Revert "virtio_pci: use shared interrupts for virtqueues") Reported-by: YASUAKI ISHIMATSU Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 007a4f366086..1c4797e53f68 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -107,6 +107,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); + unsigned flags = PCI_IRQ_MSIX; unsigned i, v; int err = -ENOMEM; @@ -126,10 +127,13 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, GFP_KERNEL)) goto error; + if (desc) { + flags |= PCI_IRQ_AFFINITY; + desc->pre_vectors++; /* virtio config vector */ + } + err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, - nvectors, PCI_IRQ_MSIX | - (desc ? PCI_IRQ_AFFINITY : 0), - desc); + nvectors, flags, desc); if (err < 0) goto error; vp_dev->msix_enabled = 1; From 556b969a1cfe2686aae149137fa1dfcac0eefe54 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 25 Aug 2017 15:55:30 -0700 Subject: [PATCH 82/98] PM/hibernate: touch NMI watchdog when creating snapshot There is a problem that when counting the pages for creating the hibernation snapshot will take significant amount of time, especially on system with large memory. Since the counting job is performed with irq disabled, this might lead to NMI lockup. The following warning were found on a system with 1.5TB DRAM: Freezing user space processes ... (elapsed 0.002 seconds) done. OOM killer disabled. PM: Preallocating image memory... NMI watchdog: Watchdog detected hard LOCKUP on cpu 27 CPU: 27 PID: 3128 Comm: systemd-sleep Not tainted 4.13.0-0.rc2.git0.1.fc27.x86_64 #1 task: ffff9f01971ac000 task.stack: ffffb1a3f325c000 RIP: 0010:memory_bm_find_bit+0xf4/0x100 Call Trace: swsusp_set_page_free+0x2b/0x30 mark_free_pages+0x147/0x1c0 count_data_pages+0x41/0xa0 hibernate_preallocate_memory+0x80/0x450 hibernation_snapshot+0x58/0x410 hibernate+0x17c/0x310 state_store+0xdf/0xf0 kobj_attr_store+0xf/0x20 sysfs_kf_write+0x37/0x40 kernfs_fop_write+0x11c/0x1a0 __vfs_write+0x37/0x170 vfs_write+0xb1/0x1a0 SyS_write+0x55/0xc0 entry_SYSCALL_64_fastpath+0x1a/0xa5 ... done (allocated 6590003 pages) PM: Allocated 26360012 kbytes in 19.89 seconds (1325.28 MB/s) It has taken nearly 20 seconds(2.10GHz CPU) thus the NMI lockup was triggered. In case the timeout of the NMI watch dog has been set to 1 second, a safe interval should be 6590003/20 = 320k pages in theory. However there might also be some platforms running at a lower frequency, so feed the watchdog every 100k pages. [yu.c.chen@intel.com: simplification] Link: http://lkml.kernel.org/r/1503460079-29721-1-git-send-email-yu.c.chen@intel.com [yu.c.chen@intel.com: use interval of 128k instead of 100k to avoid modulus] Link: http://lkml.kernel.org/r/1503328098-5120-1-git-send-email-yu.c.chen@intel.com Signed-off-by: Chen Yu Reported-by: Jan Filipcewicz Suggested-by: Michal Hocko Reviewed-by: Michal Hocko Acked-by: Rafael J. Wysocki Cc: Mel Gorman Cc: Vlastimil Babka Cc: Len Brown Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1bad301820c7..7a58eb5757e3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -2535,9 +2536,14 @@ void drain_all_pages(struct zone *zone) #ifdef CONFIG_HIBERNATION +/* + * Touch the watchdog for every WD_PAGE_COUNT pages. + */ +#define WD_PAGE_COUNT (128*1024) + void mark_free_pages(struct zone *zone) { - unsigned long pfn, max_zone_pfn; + unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT; unsigned long flags; unsigned int order, t; struct page *page; @@ -2552,6 +2558,11 @@ void mark_free_pages(struct zone *zone) if (pfn_valid(pfn)) { page = pfn_to_page(pfn); + if (!--page_count) { + touch_nmi_watchdog(); + page_count = WD_PAGE_COUNT; + } + if (page_zone(page) != zone) continue; @@ -2565,8 +2576,13 @@ void mark_free_pages(struct zone *zone) unsigned long i; pfn = page_to_pfn(page); - for (i = 0; i < (1UL << order); i++) + for (i = 0; i < (1UL << order); i++) { + if (!--page_count) { + touch_nmi_watchdog(); + page_count = WD_PAGE_COUNT; + } swsusp_set_page_free(pfn_to_page(pfn + i)); + } } } spin_unlock_irqrestore(&zone->lock, flags); From 435c0b87d661da83771c30ed775f7c37eed193fb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 25 Aug 2017 15:55:33 -0700 Subject: [PATCH 83/98] mm, shmem: fix handling /sys/kernel/mm/transparent_hugepage/shmem_enabled /sys/kernel/mm/transparent_hugepage/shmem_enabled controls if we want to allocate huge pages when allocate pages for private in-kernel shmem mount. Unfortunately, as Dan noticed, I've screwed it up and the only way to make kernel allocate huge page for the mount is to use "force" there. All other values will be effectively ignored. Link: http://lkml.kernel.org/r/20170822144254.66431-1-kirill.shutemov@linux.intel.com Fixes: 5a6e75f8110c ("shmem: prepare huge= mount option and sysfs knob") Signed-off-by: Kirill A. Shutemov Reported-by: Dan Carpenter Cc: stable [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 6540e5982444..fbcb3c96a186 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3967,7 +3967,7 @@ int __init shmem_init(void) } #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE - if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY) + if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; else shmem_huge = 0; /* just in case it was patched */ @@ -4028,7 +4028,7 @@ static ssize_t shmem_enabled_store(struct kobject *kobj, return -EINVAL; shmem_huge = huge; - if (shmem_huge < SHMEM_HUGE_DENY) + if (shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; return count; } From fffa281b48a91ad6dac1a18c5907ece58fa3879b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 25 Aug 2017 15:55:36 -0700 Subject: [PATCH 84/98] dax: fix deadlock due to misaligned PMD faults In DAX there are two separate places where the 2MiB range of a PMD is defined. The first is in the page tables, where a PMD mapping inserted for a given address spans from (vmf->address & PMD_MASK) to ((vmf->address & PMD_MASK) + PMD_SIZE - 1). That is, from the 2MiB boundary below the address to the 2MiB boundary above the address. So, for example, a fault at address 3MiB (0x30 0000) falls within the PMD that ranges from 2MiB (0x20 0000) to 4MiB (0x40 0000). The second PMD range is in the mapping->page_tree, where a given file offset is covered by a radix tree entry that spans from one 2MiB aligned file offset to another 2MiB aligned file offset. So, for example, the file offset for 3MiB (pgoff 768) falls within the PMD range for the order 9 radix tree entry that ranges from 2MiB (pgoff 512) to 4MiB (pgoff 1024). This system works so long as the addresses and file offsets for a given mapping both have the same offsets relative to the start of each PMD. Consider the case where the starting address for a given file isn't 2MiB aligned - say our faulting address is 3 MiB (0x30 0000), but that corresponds to the beginning of our file (pgoff 0). Now all the PMDs in the mapping are misaligned so that the 2MiB range defined in the page tables never matches up with the 2MiB range defined in the radix tree. The current code notices this case for DAX faults to storage with the following test in dax_pmd_insert_mapping(): if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR) goto unlock_fallback; This test makes sure that the pfn we get from the driver is 2MiB aligned, and relies on the assumption that the 2MiB alignment of the pfn we get back from the driver matches the 2MiB alignment of the faulting address. However, faults to holes were not checked and we could hit the problem described above. This was reported in response to the NVML nvml/src/test/pmempool_sync TEST5: $ cd nvml/src/test/pmempool_sync $ make TEST5 You can grab NVML here: https://github.com/pmem/nvml/ The dmesg warning you see when you hit this error is: WARNING: CPU: 13 PID: 2900 at fs/dax.c:641 dax_insert_mapping_entry+0x2df/0x310 Where we notice in dax_insert_mapping_entry() that the radix tree entry we are about to replace doesn't match the locked entry that we had previously inserted into the tree. This happens because the initial insertion was done in grab_mapping_entry() using a pgoff calculated from the faulting address (vmf->address), and the replacement in dax_pmd_load_hole() => dax_insert_mapping_entry() is done using vmf->pgoff. In our failure case those two page offsets (one calculated from vmf->address, one using vmf->pgoff) point to different order 9 radix tree entries. This failure case can result in a deadlock because the radix tree unlock also happens on the pgoff calculated from vmf->address. This means that the locked radix tree entry that we swapped in to the tree in dax_insert_mapping_entry() using vmf->pgoff is never unlocked, so all future faults to that 2MiB range will block forever. Fix this by validating that the faulting address's PMD offset matches the PMD offset from the start of the file. This check is done at the very beginning of the fault and covers faults that would have mapped to storage as well as faults to holes. I left the COLOUR check in dax_pmd_insert_mapping() in place in case we ever hit the insanity condition where the alignment of the pfn we get from the driver doesn't match the alignment of the userspace address. Link: http://lkml.kernel.org/r/20170822222436.18926-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: "Slusarz, Marcin" Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/dax.c b/fs/dax.c index 306c2b603fb8..865d42c63e23 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1383,6 +1383,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); + /* + * Make sure that the faulting address's PMD offset (color) matches + * the PMD offset from the start of the file. This is necessary so + * that a PMD range in the page table overlaps exactly with a PMD + * range in the radix tree. + */ + if ((vmf->pgoff & PG_PMD_COLOUR) != + ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) + goto fallback; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; From 263630e8d176d87308481ebdcd78ef9426739c6b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 25 Aug 2017 15:55:39 -0700 Subject: [PATCH 85/98] mm/madvise.c: fix freeing of locked page with MADV_FREE If madvise(..., MADV_FREE) split a transparent hugepage, it called put_page() before unlock_page(). This was wrong because put_page() can free the page, e.g. if a concurrent madvise(..., MADV_DONTNEED) has removed it from the memory mapping. put_page() then rightfully complained about freeing a locked page. Fix this by moving the unlock_page() before put_page(). This bug was found by syzkaller, which encountered the following splat: BUG: Bad page state in process syzkaller412798 pfn:1bd800 page:ffffea0006f60000 count:0 mapcount:0 mapping: (null) index:0x20a00 flags: 0x200000000040019(locked|uptodate|dirty|swapbacked) raw: 0200000000040019 0000000000000000 0000000000020a00 00000000ffffffff raw: ffffea0006f60020 ffffea0006f60020 0000000000000000 0000000000000000 page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: 0x1(locked) Modules linked in: CPU: 1 PID: 3037 Comm: syzkaller412798 Not tainted 4.13.0-rc5+ #35 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 bad_page+0x230/0x2b0 mm/page_alloc.c:565 free_pages_check_bad+0x1f0/0x2e0 mm/page_alloc.c:943 free_pages_check mm/page_alloc.c:952 [inline] free_pages_prepare mm/page_alloc.c:1043 [inline] free_pcp_prepare mm/page_alloc.c:1068 [inline] free_hot_cold_page+0x8cf/0x12b0 mm/page_alloc.c:2584 __put_single_page mm/swap.c:79 [inline] __put_page+0xfb/0x160 mm/swap.c:113 put_page include/linux/mm.h:814 [inline] madvise_free_pte_range+0x137a/0x1ec0 mm/madvise.c:371 walk_pmd_range mm/pagewalk.c:50 [inline] walk_pud_range mm/pagewalk.c:108 [inline] walk_p4d_range mm/pagewalk.c:134 [inline] walk_pgd_range mm/pagewalk.c:160 [inline] __walk_page_range+0xc3a/0x1450 mm/pagewalk.c:249 walk_page_range+0x200/0x470 mm/pagewalk.c:326 madvise_free_page_range.isra.9+0x17d/0x230 mm/madvise.c:444 madvise_free_single_vma+0x353/0x580 mm/madvise.c:471 madvise_dontneed_free mm/madvise.c:555 [inline] madvise_vma mm/madvise.c:664 [inline] SYSC_madvise mm/madvise.c:832 [inline] SyS_madvise+0x7d3/0x13c0 mm/madvise.c:760 entry_SYSCALL_64_fastpath+0x1f/0xbe Here is a C reproducer: #define _GNU_SOURCE #include #include #include #define MADV_FREE 8 #define PAGE_SIZE 4096 static void *mapping; static const size_t mapping_size = 0x1000000; static void *madvise_thrproc(void *arg) { madvise(mapping, mapping_size, (long)arg); } int main(void) { pthread_t t[2]; for (;;) { mapping = mmap(NULL, mapping_size, PROT_WRITE, MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); munmap(mapping + mapping_size / 2, PAGE_SIZE); pthread_create(&t[0], 0, madvise_thrproc, (void*)MADV_DONTNEED); pthread_create(&t[1], 0, madvise_thrproc, (void*)MADV_FREE); pthread_join(t[0], NULL); pthread_join(t[1], NULL); munmap(mapping, mapping_size); } } Note: to see the splat, CONFIG_TRANSPARENT_HUGEPAGE=y and CONFIG_DEBUG_VM=y are needed. Google Bug Id: 64696096 Link: http://lkml.kernel.org/r/20170823205235.132061-1-ebiggers3@gmail.com Fixes: 854e9ed09ded ("mm: support madvise(MADV_FREE)") Signed-off-by: Eric Biggers Acked-by: David Rientjes Acked-by: Minchan Kim Acked-by: Michal Hocko Cc: Dmitry Vyukov Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: [v4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 47d8d8a25eae..23ed525bc2bc 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -368,8 +368,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, pte_offset_map_lock(mm, pmd, addr, &ptl); goto out; } - put_page(page); unlock_page(page); + put_page(page); pte = pte_offset_map_lock(mm, pmd, addr, &ptl); pte--; addr -= PAGE_SIZE; From 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 25 Aug 2017 15:55:43 -0700 Subject: [PATCH 86/98] fork: fix incorrect fput of ->exe_file causing use-after-free Commit 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") made it possible to kill a forking task while it is waiting to acquire its ->mmap_sem for write, in dup_mmap(). However, it was overlooked that this introduced an new error path before a reference is taken on the mm_struct's ->exe_file. Since the ->exe_file of the new mm_struct was already set to the old ->exe_file by the memcpy() in dup_mm(), it was possible for the mmput() in the error path of dup_mm() to drop a reference to ->exe_file which was never taken. This caused the struct file to later be freed prematurely. Fix it by updating mm_init() to NULL out the ->exe_file, in the same place it clears other things like the list of mmaps. This bug was found by syzkaller. It can be reproduced using the following C program: #define _GNU_SOURCE #include #include #include #include #include #include static void *mmap_thread(void *_arg) { for (;;) { mmap(NULL, 0x1000000, PROT_READ, MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); } } static void *fork_thread(void *_arg) { usleep(rand() % 10000); fork(); } int main(void) { fork(); fork(); fork(); for (;;) { if (fork() == 0) { pthread_t t; pthread_create(&t, NULL, mmap_thread, NULL); pthread_create(&t, NULL, fork_thread, NULL); usleep(rand() % 10000); syscall(__NR_exit_group, 0); } wait(NULL); } } No special kernel config options are needed. It usually causes a NULL pointer dereference in __remove_shared_vm_struct() during exit, or in dup_mmap() (which is usually inlined into copy_process()) during fork. Both are due to a vm_area_struct's ->vm_file being used after it's already been freed. Google Bug Id: 64772007 Link: http://lkml.kernel.org/r/20170823211408.31198-1-ebiggers3@gmail.com Fixes: 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable") Signed-off-by: Eric Biggers Tested-by: Mark Rutland Acked-by: Michal Hocko Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Konstantin Khlebnikov Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Vlastimil Babka Cc: [v4.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/fork.c b/kernel/fork.c index e075b7780421..cbbea277b3fb 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -806,6 +806,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); + RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_mm_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS From 91b540f98872a206ea1c49e4aa6ea8eed0886644 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 25 Aug 2017 15:55:46 -0700 Subject: [PATCH 87/98] mm/memblock.c: reversed logic in memblock_discard() In recently introduced memblock_discard() there is a reversed logic bug. Memory is freed of static array instead of dynamically allocated one. Link: http://lkml.kernel.org/r/1503511441-95478-2-git-send-email-pasha.tatashin@oracle.com Fixes: 3010f876500f ("mm: discard memblock data later") Signed-off-by: Pavel Tatashin Reported-by: Woody Suwalski Tested-by: Woody Suwalski Acked-by: Michal Hocko Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index bf14aea6ab70..91205780e6b1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -299,7 +299,7 @@ void __init memblock_discard(void) __memblock_free_late(addr, size); } - if (memblock.memory.regions == memblock_memory_init_regions) { + if (memblock.memory.regions != memblock_memory_init_regions) { addr = __pa(memblock.memory.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.memory.max); From 8e1101d251647802d0a4ae19eb3d0e1453eaeff4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 25 Aug 2017 18:58:42 -0500 Subject: [PATCH 88/98] PCI/MSI: Don't warn when irq_create_affinity_masks() returns NULL irq_create_affinity_masks() can return NULL on non-SMP systems, when there are not enough "free" vectors available to spread, or if memory allocation for the CPU masks fails. Only the allocation failure is of interest, and even then the system will work just fine except for non-optimally spread vectors. Thus remove the warnings. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Acked-by: David S. Miller --- drivers/pci/msi.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 253d92409bb3..2225afc1cbbb 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -538,12 +538,9 @@ msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd) struct msi_desc *entry; u16 control; - if (affd) { + if (affd) masks = irq_create_affinity_masks(nvec, affd); - if (!masks) - dev_err(&dev->dev, "can't allocate MSI affinity masks for %d vectors\n", - nvec); - } + /* MSI Entry Initialization */ entry = alloc_msi_entry(&dev->dev, nvec, masks); @@ -679,12 +676,8 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, struct msi_desc *entry; int ret, i; - if (affd) { + if (affd) masks = irq_create_affinity_masks(nvec, affd); - if (!masks) - dev_err(&dev->dev, "can't allocate MSI-X affinity masks for %d vectors\n", - nvec); - } for (i = 0, curmsk = masks; i < nvec; i++) { entry = alloc_msi_entry(&dev->dev, 1, curmsk); From 0cc3b0ec23ce4c69e1e890ed2b8d2fa932b14aad Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 12:12:25 -0700 Subject: [PATCH 89/98] Clarify (and fix) MAX_LFS_FILESIZE macros We have a MAX_LFS_FILESIZE macro that is meant to be filled in by filesystems (and other IO targets) that know they are 64-bit clean and don't have any 32-bit limits in their IO path. It turns out that our 32-bit value for that limit was bogus. On 32-bit, the VM layer is limited by the page cache to only 32-bit index values, but our logic for that was confusing and actually wrong. We used to define that value to (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) which is actually odd in several ways: it limits the index to 31 bits, and then it limits files so that they can't have data in that last byte of a page that has the highest 31-bit index (ie page index 0x7fffffff). Neither of those limitations make sense. The index is actually the full 32 bit unsigned value, and we can use that whole full page. So the maximum size of the file would logically be "PAGE_SIZE << BITS_PER_LONG". However, we do wan tto avoid the maximum index, because we have code that iterates over the page indexes, and we don't want that code to overflow. So the maximum size of a file on a 32-bit host should actually be one page less than the full 32-bit index. So the actual limit is ULONG_MAX << PAGE_SHIFT. That means that we will not actually be using the page of that last index (ULONG_MAX), but we can grow a file up to that limit. The wrong value of MAX_LFS_FILESIZE actually caused problems for Doug Nazar, who was still using a 32-bit host, but with a 9.7TB 2 x RAID5 volume. It turns out that our old MAX_LFS_FILESIZE was 8TiB (well, one byte less), but the actual true VM limit is one page less than 16TiB. This was invisible until commit c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()"), which started applying that MAX_LFS_FILESIZE limit to block devices too. NOTE! On 64-bit, the page index isn't a limiter at all, and the limit is actually just the offset type itself (loff_t), which is signed. But for clarity, on 64-bit, just use the maximum signed value, and don't make people have to count the number of 'f' characters in the hex constant. So just use LLONG_MAX for the 64-bit case. That was what the value had been before too, just written out as a hex constant. Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Reported-and-tested-by: Doug Nazar Cc: Andreas Dilger Cc: Mark Fasheh Cc: Joel Becker Cc: Dave Kleikamp Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 6e1fd5d21248..cbfe127bccf8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -907,9 +907,9 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE ((loff_t)ULONG_MAX << PAGE_SHIFT) #elif BITS_PER_LONG==64 -#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) +#define MAX_LFS_FILESIZE ((loff_t)LLONG_MAX) #endif #define FL_POSIX 1 From 3510ca20ece0150af6b10c77a74ff1b5c198e3e2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 13:55:12 -0700 Subject: [PATCH 90/98] Minor page waitqueue cleanups Tim Chen and Kan Liang have been battling a customer load that shows extremely long page wakeup lists. The cause seems to be constant NUMA migration of a hot page that is shared across a lot of threads, but the actual root cause for the exact behavior has not been found. Tim has a patch that batches the wait list traversal at wakeup time, so that we at least don't get long uninterruptible cases where we traverse and wake up thousands of processes and get nasty latency spikes. That is likely 4.14 material, but we're still discussing the page waitqueue specific parts of it. In the meantime, I've tried to look at making the page wait queues less expensive, and failing miserably. If you have thousands of threads waiting for the same page, it will be painful. We'll need to try to figure out the NUMA balancing issue some day, in addition to avoiding the excessive spinlock hold times. That said, having tried to rewrite the page wait queues, I can at least fix up some of the braindamage in the current situation. In particular: (a) we don't want to continue walking the page wait list if the bit we're waiting for already got set again (which seems to be one of the patterns of the bad load). That makes no progress and just causes pointless cache pollution chasing the pointers. (b) we don't want to put the non-locking waiters always on the front of the queue, and the locking waiters always on the back. Not only is that unfair, it means that we wake up thousands of reading threads that will just end up being blocked by the writer later anyway. Also add a comment about the layout of 'struct wait_page_key' - there is an external user of it in the cachefiles code that means that it has to match the layout of 'struct wait_bit_key' in the two first members. It so happens to match, because 'struct page *' and 'unsigned long *' end up having the same values simply because the page flags are the first member in struct page. Cc: Tim Chen Cc: Kan Liang Cc: Mel Gorman Cc: Christopher Lameter Cc: Andi Kleen Cc: Davidlohr Bueso Cc: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/sched/wait.c | 7 ++++--- mm/filemap.c | 11 ++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 17f11c6b0a9f..d6afed6d0752 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -70,9 +70,10 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, list_for_each_entry_safe(curr, next, &wq_head->head, entry) { unsigned flags = curr->flags; - - if (curr->func(curr, mode, wake_flags, key) && - (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) + int ret = curr->func(curr, mode, wake_flags, key); + if (ret < 0) + break; + if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; } } diff --git a/mm/filemap.c b/mm/filemap.c index a49702445ce0..baba290c276b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -885,6 +885,7 @@ void __init pagecache_init(void) page_writeback_init(); } +/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */ struct wait_page_key { struct page *page; int bit_nr; @@ -909,8 +910,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, if (wait_page->bit_nr != key->bit_nr) return 0; + + /* Stop walking if it's locked */ if (test_bit(key->bit_nr, &key->page->flags)) - return 0; + return -1; return autoremove_wake_function(wait, mode, sync, key); } @@ -964,6 +967,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, int ret = 0; init_wait(wait); + wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0; wait->func = wake_page_function; wait_page.page = page; wait_page.bit_nr = bit_nr; @@ -972,10 +976,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, spin_lock_irq(&q->lock); if (likely(list_empty(&wait->entry))) { - if (lock) - __add_wait_queue_entry_tail_exclusive(q, wait); - else - __add_wait_queue(q, wait); + __add_wait_queue_entry_tail(q, wait); SetPageWaiters(page); } From a8b169afbf06a678437632709caac98e16f99263 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 16:25:09 -0700 Subject: [PATCH 91/98] Avoid page waitqueue race leaving possible page locker waiting The "lock_page_killable()" function waits for exclusive access to the page lock bit using the WQ_FLAG_EXCLUSIVE bit in the waitqueue entry set. That means that if it gets woken up, other waiters may have been skipped. That, in turn, means that if it sees the page being unlocked, it *must* take that lock and return success, even if a lethal signal is also pending. So instead of checking for lethal signals first, we need to check for them after we've checked the actual bit that we were waiting for. Even if that might then delay the killing of the process. This matches the order of the old "wait_on_bit_lock()" infrastructure that the page locking used to use (and is still used in a few other areas). Note that if we still return an error after having unsuccessfully tried to acquire the page lock, that is ok: that means that some other thread was able to get ahead of us and lock the page, and when that other thread then unlocks the page, the wakeup event will be repeated. So any other pending waiters will now get properly woken up. Fixes: 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit") Cc: Nick Piggin Cc: Peter Zijlstra Cc: Mel Gorman Cc: Jan Kara Cc: Davidlohr Bueso Cc: Andi Kleen Signed-off-by: Linus Torvalds --- mm/filemap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index baba290c276b..0b41c8cbeabc 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -986,10 +986,6 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, if (likely(test_bit(bit_nr, &page->flags))) { io_schedule(); - if (unlikely(signal_pending_state(state, current))) { - ret = -EINTR; - break; - } } if (lock) { @@ -999,6 +995,11 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, if (!test_bit(bit_nr, &page->flags)) break; } + + if (unlikely(signal_pending_state(state, current))) { + ret = -EINTR; + break; + } } finish_wait(q, wait); From cc4a41fe5541a73019a864883297bd5043aa6d98 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Aug 2017 17:20:40 -0700 Subject: [PATCH 92/98] Linux 4.13-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index dda88e744d5f..8db6be7dca73 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Fearless Coyote # *DOCUMENTATION* From 79de3cbe9a974e03a02b71da80da9ee0eb15a2d0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 23 Aug 2017 22:37:00 +0200 Subject: [PATCH 93/98] fs/select: Fix memory corruption in compat_get_fd_set() Commit 464d62421cb8 ("select: switch compat_{get,put}_fd_set() to compat_{get,put}_bitmap()") changed the calculation on how many bytes need to be zeroed when userspace handed over a NULL pointer for a fdset array in the select syscall. The calculation was changed in compat_get_fd_set() wrongly from memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); to memset(fdset, 0, ALIGN(nr, BITS_PER_LONG)); The ALIGN(nr, BITS_PER_LONG) calculates the number of _bits_ which need to be zeroed in the target fdset array (rounded up to the next full bits for an unsigned long). But the memset() call expects the number of _bytes_ to be zeroed. This leads to clearing more memory than wanted (on the stack area or even at kmalloc()ed memory areas) and to random kernel crashes as we have seen them on the parisc platform. The correct change should have been memset(fdset, 0, (ALIGN(nr, BITS_PER_LONG) / BITS_PER_LONG) * BYTES_PER_LONG); which is the same as can be archieved with a call to zero_fd_set(nr, fdset). Fixes: 464d62421cb8 ("select: switch compat_{get,put}_fd_set() to compat_{get,put}_bitmap()" Acked-by:: Al Viro Signed-off-by: Helge Deller Signed-off-by: Linus Torvalds --- fs/select.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/select.c b/fs/select.c index 9d5f15ed87fe..c6362e38ae92 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1164,11 +1164,7 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, if (ufdset) { return compat_get_bitmap(fdset, ufdset, nr); } else { - /* Tricky, must clear full unsigned long in the - * kernel fdset at the end, ALIGN makes sure that - * actually happens. - */ - memset(fdset, 0, ALIGN(nr, BITS_PER_LONG)); + zero_fd_set(nr, fdset); return 0; } } From e8206d2baa41a4c7cf4590929f8819f8309b564d Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Mon, 28 Aug 2017 15:03:58 -0700 Subject: [PATCH 94/98] ARCv2: SMP: Mask only private-per-core IRQ lines on boot at core intc Recent commit a8ec3ee861b6 "arc: Mask individual IRQ lines during core INTC init" breaks interrupt handling on ARCv2 SMP systems. That commit masked all interrupts at onset, as some controllers on some boards (customer as well as internal), would assert interrutps early before any handlers were installed. For SMP systems, the masking was done at each cpu's core-intc. Later, when the IRQ was actually requested, it was unmasked, but only on the requesting cpu. For "common" interrupts, which were wired up from the 2nd level IDU intc, this was as issue as they needed to be enabled on ALL the cpus (given that IDU IRQs are by default served Round Robin across cpus) So fix that by NOT masking "common" interrupts at core-intc, but instead at the 2nd level IDU intc (latter already being done in idu_of_init()) Fixes: a8ec3ee861b6 ("arc: Mask individual IRQ lines during core INTC init") Signed-off-by: Alexey Brodkin [vgupta: reworked changelog, removed the extraneous idu_irq_mask_raw()] Signed-off-by: Vineet Gupta Signed-off-by: Linus Torvalds --- arch/arc/kernel/intc-arcv2.c | 11 +++++++++-- arch/arc/kernel/intc-compact.c | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index cf90714a676d..067ea362fb3e 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -75,13 +75,20 @@ void arc_init_IRQ(void) * Set a default priority for all available interrupts to prevent * switching of register banks if Fast IRQ and multiple register banks * are supported by CPU. - * Also disable all IRQ lines so faulty external hardware won't + * Also disable private-per-core IRQ lines so faulty external HW won't * trigger interrupt that kernel is not ready to handle. */ for (i = NR_EXCEPTIONS; i < irq_bcr.irqs + NR_EXCEPTIONS; i++) { write_aux_reg(AUX_IRQ_SELECT, i); write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO); - write_aux_reg(AUX_IRQ_ENABLE, 0); + + /* + * Only mask cpu private IRQs here. + * "common" interrupts are masked at IDU, otherwise it would + * need to be unmasked at each cpu, with IPIs + */ + if (i < FIRST_EXT_IRQ) + write_aux_reg(AUX_IRQ_ENABLE, 0); } /* setup status32, don't enable intr yet as kernel doesn't want */ diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c index cef388025adf..47b421fa0147 100644 --- a/arch/arc/kernel/intc-compact.c +++ b/arch/arc/kernel/intc-compact.c @@ -27,7 +27,7 @@ */ void arc_init_IRQ(void) { - int level_mask = 0, i; + unsigned int level_mask = 0, i; /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */ level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ; From b339752d054fb32863418452dff350a1086885b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 28 Aug 2017 14:51:27 -0700 Subject: [PATCH 95/98] cpumask: fix spurious cpumask_of_node() on non-NUMA multi-node configs When !NUMA, cpumask_of_node(@node) equals cpu_online_mask regardless of @node. The assumption seems that if !NUMA, there shouldn't be more than one node and thus reporting cpu_online_mask regardless of @node is correct. However, that assumption was broken years ago to support DISCONTIGMEM and whether a system has multiple nodes or not is separately controlled by NEED_MULTIPLE_NODES. This means that, on a system with !NUMA && NEED_MULTIPLE_NODES, cpumask_of_node() will report cpu_online_mask for all possible nodes, indicating that the CPUs are associated with multiple nodes which is an impossible configuration. This bug has been around forever but doesn't look like it has caused any noticeable symptoms. However, it triggers a WARN recently added to workqueue to verify NUMA affinity configuration. Fix it by reporting empty cpumask on non-zero nodes if !NUMA. Signed-off-by: Tejun Heo Reported-and-tested-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/asm-generic/topology.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index fc824e2828f3..5d2add1a6c96 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -48,7 +48,11 @@ #define parent_node(node) ((void)(node),0) #endif #ifndef cpumask_of_node -#define cpumask_of_node(node) ((void)node, cpu_online_mask) + #ifdef CONFIG_NEED_MULTIPLE_NODES + #define cpumask_of_node(node) ((node) == 0 ? cpu_online_mask : cpu_none_mask) + #else + #define cpumask_of_node(node) ((void)node, cpu_online_mask) + #endif #endif #ifndef pcibus_to_node #define pcibus_to_node(bus) ((void)(bus), -1) From 9c3a815f471a84811cf8021cf64aae3b8081dfde Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 28 Aug 2017 16:45:40 -0700 Subject: [PATCH 96/98] page waitqueue: always add new entries at the end Commit 3510ca20ece0 ("Minor page waitqueue cleanups") made the page queue code always add new waiters to the back of the queue, which helps upcoming patches to batch the wakeups for some horrid loads where the wait queues grow to thousands of entries. However, I forgot about the nasrt add_page_wait_queue() special case code that is only used by the cachefiles code. That one still continued to add the new wait queue entries at the beginning of the list. Fix it, because any sane batched wakeup will require that we don't suddenly start getting new entries at the beginning of the list that we already handled in a previous batch. [ The current code always does the whole list while holding the lock, so wait queue ordering doesn't matter for correctness, but even then it's better to add later entries at the end from a fairness standpoint ] Signed-off-by: Linus Torvalds --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 0b41c8cbeabc..65b4b6e7f7bd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1041,7 +1041,7 @@ void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) unsigned long flags; spin_lock_irqsave(&q->lock, flags); - __add_wait_queue(q, waiter); + __add_wait_queue_entry_tail(q, waiter); SetPageWaiters(page); spin_unlock_irqrestore(&q->lock, flags); } From f12f42acdbb577a12eecfcebbbec41c81505c4dc Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Wed, 23 Aug 2017 17:07:50 -0400 Subject: [PATCH 97/98] perf/core: Fix potential double-fetch bug While examining the kernel source code, I found a dangerous operation that could turn into a double-fetch situation (a race condition bug) where the same userspace memory region are fetched twice into kernel with sanity checks after the first fetch while missing checks after the second fetch. 1. The first fetch happens in line 9573 get_user(size, &uattr->size). 2. Subsequently the 'size' variable undergoes a few sanity checks and transformations (line 9577 to 9584). 3. The second fetch happens in line 9610 copy_from_user(attr, uattr, size) 4. Given that 'uattr' can be fully controlled in userspace, an attacker can race condition to override 'uattr->size' to arbitrary value (say, 0xFFFFFFFF) after the first fetch but before the second fetch. The changed value will be copied to 'attr->size'. 5. There is no further checks on 'attr->size' until the end of this function, and once the function returns, we lose the context to verify that 'attr->size' conforms to the sanity checks performed in step 2 (line 9577 to 9584). 6. My manual analysis shows that 'attr->size' is not used elsewhere later, so, there is no working exploit against it right now. However, this could easily turns to an exploitable one if careless developers start to use 'attr->size' later. To fix this, override 'attr->size' from the second fetch to the one from the first fetch, regardless of what is actually copied in. In this way, it is assured that 'attr->size' is consistent with the checks performed after the first fetch. Signed-off-by: Meng Xu Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: meng.xu@gatech.edu Cc: sanidhya@gatech.edu Cc: taesoo@gatech.edu Link: http://lkml.kernel.org/r/1503522470-35531-1-git-send-email-meng.xu@gatech.edu Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 3504125871d2..ce131d25622a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9611,6 +9611,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (ret) return -EFAULT; + attr->size = size; + if (attr->__reserved_1) return -EINVAL; From 75e8387685f6c65feb195a4556110b58f852b848 Mon Sep 17 00:00:00 2001 From: Zhou Chengming Date: Fri, 25 Aug 2017 21:49:37 +0800 Subject: [PATCH 98/98] perf/ftrace: Fix double traces of perf on ftrace:function When running perf on the ftrace:function tracepoint, there is a bug which can be reproduced by: perf record -e ftrace:function -a sleep 20 & perf record -e ftrace:function ls perf script ls 10304 [005] 171.853235: ftrace:function: perf_output_begin ls 10304 [005] 171.853237: ftrace:function: perf_output_begin ls 10304 [005] 171.853239: ftrace:function: task_tgid_nr_ns ls 10304 [005] 171.853240: ftrace:function: task_tgid_nr_ns ls 10304 [005] 171.853242: ftrace:function: __task_pid_nr_ns ls 10304 [005] 171.853244: ftrace:function: __task_pid_nr_ns We can see that all the function traces are doubled. The problem is caused by the inconsistency of the register function perf_ftrace_event_register() with the probe function perf_ftrace_function_call(). The former registers one probe for every perf_event. And the latter handles all perf_events on the current cpu. So when two perf_events on the current cpu, the traces of them will be doubled. So this patch adds an extra parameter "event" for perf_tp_event, only send sample data to this event when it's not NULL. Signed-off-by: Zhou Chengming Reviewed-by: Jiri Olsa Acked-by: Steven Rostedt (VMware) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: huawei.libin@huawei.com Link: http://lkml.kernel.org/r/1503668977-12526-1-git-send-email-zhouchengming1@huawei.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- include/linux/trace_events.h | 4 ++-- kernel/events/core.c | 13 +++++++++---- kernel/trace/trace_event_perf.c | 4 +++- kernel/trace/trace_kprobe.c | 4 ++-- kernel/trace/trace_syscalls.c | 4 ++-- kernel/trace/trace_uprobe.c | 2 +- 7 files changed, 20 insertions(+), 13 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b14095bcf4bb..c00cd4b02f32 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1201,7 +1201,7 @@ extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task); + struct task_struct *task, struct perf_event *event); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 536c80ff7ad9..5012b524283d 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -508,9 +508,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, - struct task_struct *task) + struct task_struct *task, struct perf_event *event) { - perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } #endif diff --git a/kernel/events/core.c b/kernel/events/core.c index ce131d25622a..03ac9c8b02fb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7906,16 +7906,15 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, } } perf_tp_event(call->event.type, count, raw_data, size, regs, head, - rctx, task); + rctx, task, NULL); } EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit); void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task) + struct task_struct *task, struct perf_event *event) { struct perf_sample_data data; - struct perf_event *event; struct perf_raw_record raw = { .frag = { @@ -7929,9 +7928,15 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, perf_trace_buf_update(record, event_type); - hlist_for_each_entry_rcu(event, head, hlist_entry) { + /* Use the given event instead of the hlist */ + if (event) { if (perf_tp_event_match(event, &data, regs)) perf_swevent_event(event, count, &data, regs); + } else { + hlist_for_each_entry_rcu(event, head, hlist_entry) { + if (perf_tp_event_match(event, &data, regs)) + perf_swevent_event(event, count, &data, regs); + } } /* diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 562fa69df5d3..13ba2d3f6a91 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -306,6 +306,7 @@ static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *pt_regs) { + struct perf_event *event; struct ftrace_entry *entry; struct hlist_head *head; struct pt_regs regs; @@ -329,8 +330,9 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, entry->ip = ip; entry->parent_ip = parent_ip; + event = container_of(ops, struct perf_event, ftrace_ops); perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, - 1, ®s, head, NULL); + 1, ®s, head, NULL, event); #undef ENTRY_SIZE } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c9b5aa10fbf9..8a907e12b6b9 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1200,7 +1200,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL); + head, NULL, NULL); } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1236,7 +1236,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL); + head, NULL, NULL); } NOKPROBE_SYMBOL(kretprobe_perf_func); #endif /* CONFIG_PERF_EVENTS */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5e10395da88e..74d9a86eccc0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -596,7 +596,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_trace_buf_submit(rec, size, rctx, sys_data->enter_event->event.type, 1, regs, - head, NULL); + head, NULL, NULL); } static int perf_sysenter_enable(struct trace_event_call *call) @@ -667,7 +667,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type, - 1, regs, head, NULL); + 1, regs, head, NULL, NULL); } static int perf_sysexit_enable(struct trace_event_call *call) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index a7581fec9681..4525e0271a53 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1156,7 +1156,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, } perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, - head, NULL); + head, NULL, NULL); out: preempt_enable(); }