From 2a596fc9d974bb040eda9ab70bf8756fcaaa6afe Mon Sep 17 00:00:00 2001
From: Jonathan Liu <net147@gmail.com>
Date: Mon, 10 Jul 2017 16:55:04 +1000
Subject: [PATCH 01/98] drm/sun4i: Implement drm_driver lastclose to restore
 fbdev console

The drm_driver lastclose callback is called when the last userspace
DRM client has closed. Call drm_fbdev_cma_restore_mode to restore
the fbdev console otherwise the fbdev console will stop working.

Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Cc: stable@vger.kernel.org
Tested-by: Olliver Schinagl <oliver@schinagl.nl>
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Jonathan Liu <net147@gmail.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 drivers/gpu/drm/sun4i/sun4i_drv.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index abc7d8fe06b4..a45a627283a1 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -25,12 +25,20 @@
 #include "sun4i_framebuffer.h"
 #include "sun4i_tcon.h"
 
+static void sun4i_drv_lastclose(struct drm_device *dev)
+{
+	struct sun4i_drv *drv = dev->dev_private;
+
+	drm_fbdev_cma_restore_mode(drv->fbdev);
+}
+
 DEFINE_DRM_GEM_CMA_FOPS(sun4i_drv_fops);
 
 static struct drm_driver sun4i_drv_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_ATOMIC,
 
 	/* Generic Operations */
+	.lastclose		= sun4i_drv_lastclose,
 	.fops			= &sun4i_drv_fops,
 	.name			= "sun4i-drm",
 	.desc			= "Allwinner sun4i Display Engine",

From bc240eec4b074f5dc2753f295e980e66b72c90fb Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 26 Jun 2017 13:50:41 -0600
Subject: [PATCH 02/98] ntb: use correct mw_count function in ntb_tool and
 ntb_transport

After converting to the new API, both ntb_tool and ntb_transport are
using ntb_mw_count to iterate through ntb_peer_get_addr when they
should be using ntb_peer_mw_count.

This probably isn't an issue with the Intel and AMD drivers but
this will matter for any future driver with asymetric memory window
counts.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Allen Hubbe <Allen.Hubbe@emc.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Fixes: 443b9a14ecbe ("NTB: Alter MW API to support multi-ports devices")
---
 drivers/ntb/ntb_transport.c | 2 +-
 drivers/ntb/test/ntb_tool.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 9a03c5871efe..b29558ddfe95 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -1059,7 +1059,7 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
 	int node;
 	int rc, i;
 
-	mw_count = ntb_mw_count(ndev, PIDX);
+	mw_count = ntb_peer_mw_count(ndev);
 
 	if (!ndev->ops->mw_set_trans) {
 		dev_err(&ndev->dev, "Inbound MW based NTB API is required\n");
diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c
index f002bf48a08d..a69815c45ce6 100644
--- a/drivers/ntb/test/ntb_tool.c
+++ b/drivers/ntb/test/ntb_tool.c
@@ -959,7 +959,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb)
 	tc->ntb = ntb;
 	init_waitqueue_head(&tc->link_wq);
 
-	tc->mw_count = min(ntb_mw_count(tc->ntb, PIDX), MAX_MWS);
+	tc->mw_count = min(ntb_peer_mw_count(tc->ntb), MAX_MWS);
 	for (i = 0; i < tc->mw_count; i++) {
 		rc = tool_init_mw(tc, i);
 		if (rc)

From eb92b4183d93a6f101a6bd3aaae651de404c119a Mon Sep 17 00:00:00 2001
From: Andreas Klinger <ak@it-klinger.de>
Date: Tue, 25 Jul 2017 11:11:14 +0200
Subject: [PATCH 03/98] iio: bmp280: properly initialize device for humidity
 reading

If the device is not initialized at least once it happens that the humidity
reading is skipped, which means the special value 0x8000 is delivered.

For omitting this case the oversampling of the humidity must be set before
the oversampling of the temperature und pressure is set as written in the
datasheet of the BME280.

Furthermore proper error detection is added in case a skipped value is read
from the device. This is done also for pressure and temperature reading.
Especially it don't make sense to compensate this value and treat it as
regular value.

Signed-off-by: Andreas Klinger <ak@it-klinger.de>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/bmp280-core.c | 27 ++++++++++++++++++++++++---
 drivers/iio/pressure/bmp280.h      |  5 +++++
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index d82b788374b6..0d2ea3ee371b 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -282,6 +282,11 @@ static int bmp280_read_temp(struct bmp280_data *data,
 	}
 
 	adc_temp = be32_to_cpu(tmp) >> 12;
+	if (adc_temp == BMP280_TEMP_SKIPPED) {
+		/* reading was skipped */
+		dev_err(data->dev, "reading temperature skipped\n");
+		return -EIO;
+	}
 	comp_temp = bmp280_compensate_temp(data, adc_temp);
 
 	/*
@@ -317,6 +322,11 @@ static int bmp280_read_press(struct bmp280_data *data,
 	}
 
 	adc_press = be32_to_cpu(tmp) >> 12;
+	if (adc_press == BMP280_PRESS_SKIPPED) {
+		/* reading was skipped */
+		dev_err(data->dev, "reading pressure skipped\n");
+		return -EIO;
+	}
 	comp_press = bmp280_compensate_press(data, adc_press);
 
 	*val = comp_press;
@@ -345,6 +355,11 @@ static int bmp280_read_humid(struct bmp280_data *data, int *val, int *val2)
 	}
 
 	adc_humidity = be16_to_cpu(tmp);
+	if (adc_humidity == BMP280_HUMIDITY_SKIPPED) {
+		/* reading was skipped */
+		dev_err(data->dev, "reading humidity skipped\n");
+		return -EIO;
+	}
 	comp_humidity = bmp280_compensate_humidity(data, adc_humidity);
 
 	*val = comp_humidity;
@@ -597,14 +612,20 @@ static const struct bmp280_chip_info bmp280_chip_info = {
 
 static int bme280_chip_config(struct bmp280_data *data)
 {
-	int ret = bmp280_chip_config(data);
+	int ret;
 	u8 osrs = BMP280_OSRS_HUMIDITIY_X(data->oversampling_humid + 1);
 
+	/*
+	 * Oversampling of humidity must be set before oversampling of
+	 * temperature/pressure is set to become effective.
+	 */
+	ret = regmap_update_bits(data->regmap, BMP280_REG_CTRL_HUMIDITY,
+				  BMP280_OSRS_HUMIDITY_MASK, osrs);
+
 	if (ret < 0)
 		return ret;
 
-	return regmap_update_bits(data->regmap, BMP280_REG_CTRL_HUMIDITY,
-				  BMP280_OSRS_HUMIDITY_MASK, osrs);
+	return bmp280_chip_config(data);
 }
 
 static const struct bmp280_chip_info bme280_chip_info = {
diff --git a/drivers/iio/pressure/bmp280.h b/drivers/iio/pressure/bmp280.h
index 2c770e13be0e..61347438b779 100644
--- a/drivers/iio/pressure/bmp280.h
+++ b/drivers/iio/pressure/bmp280.h
@@ -96,6 +96,11 @@
 #define BME280_CHIP_ID			0x60
 #define BMP280_SOFT_RESET_VAL		0xB6
 
+/* BMP280 register skipped special values */
+#define BMP280_TEMP_SKIPPED		0x80000
+#define BMP280_PRESS_SKIPPED		0x80000
+#define BMP280_HUMIDITY_SKIPPED		0x8000
+
 /* Regmap configurations */
 extern const struct regmap_config bmp180_regmap_config;
 extern const struct regmap_config bmp280_regmap_config;

From 50b39608efb1e60f334f9b59128fd6d970bfd5a6 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Thu, 27 Jul 2017 18:18:57 +0200
Subject: [PATCH 04/98] iio: trigger: stm32-timer: fix quadrature mode get
 routine

Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device")

SMS bitfiled is mode + 1. After reset, upon boot, SMS = 0. When reading
from sysfs, stm32_get_quadrature_mode() returns -1 (e.g. -EPERM) which is
wrong error code here. So, check SMS bitfiled matches valid encoder mode,
or return -EINVAL.

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Acked-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/trigger/stm32-timer-trigger.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index d22bc56dd9fc..6aa73d6b2882 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -571,11 +571,14 @@ static int stm32_get_quadrature_mode(struct iio_dev *indio_dev,
 {
 	struct stm32_timer_trigger *priv = iio_priv(indio_dev);
 	u32 smcr;
+	int mode;
 
 	regmap_read(priv->regmap, TIM_SMCR, &smcr);
-	smcr &= TIM_SMCR_SMS;
+	mode = (smcr & TIM_SMCR_SMS) - 1;
+	if ((mode < 0) || (mode > ARRAY_SIZE(stm32_quadrature_modes)))
+		return -EINVAL;
 
-	return smcr - 1;
+	return mode;
 }
 
 static const struct iio_enum stm32_quadrature_mode_enum = {

From 1987a08cd989fd9e5690e90a04e70046e93315f4 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Thu, 27 Jul 2017 18:18:58 +0200
Subject: [PATCH 05/98] iio: trigger: stm32-timer: fix write_raw return value

Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device")

IIO core expects zero as return value for write_raw() callback
in case of success.

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Acked-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/trigger/stm32-timer-trigger.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index 6aa73d6b2882..107918b3a90b 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -406,9 +406,8 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		regmap_write(priv->regmap, TIM_CNT, val);
+		return regmap_write(priv->regmap, TIM_CNT, val);
 
-		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_SCALE:
 		/* fixed scale */
 		return -EINVAL;

From 06e3fe89988b1c99a3d9953b1d3b1faf3f047017 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Thu, 27 Jul 2017 18:18:59 +0200
Subject: [PATCH 06/98] iio: trigger: stm32-timer: fix get/set down count
 direction

Fixes: 4adec7da0536 ("iio: stm32 trigger: Add quadrature encoder device")

This fixes two issues:
- stm32_set_count_direction: to set down direction
- stm32_get_count_direction: to get down direction

IIO core provides/expects value to be an index of iio_enum items array.
This needs to be turned by these routines into TIM_CR1_DIR (e.g. BIT(4))
value.
Also, report error when attempting to write direction, when in encoder
mode: in this case, direction is read only (given by encoder inputs).

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Acked-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/trigger/stm32-timer-trigger.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index 107918b3a90b..d28aa02b85e8 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -594,13 +594,20 @@ static const char *const stm32_count_direction_states[] = {
 
 static int stm32_set_count_direction(struct iio_dev *indio_dev,
 				     const struct iio_chan_spec *chan,
-				     unsigned int mode)
+				     unsigned int dir)
 {
 	struct stm32_timer_trigger *priv = iio_priv(indio_dev);
+	u32 val;
+	int mode;
 
-	regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_DIR, mode);
+	/* In encoder mode, direction is RO (given by TI1/TI2 signals) */
+	regmap_read(priv->regmap, TIM_SMCR, &val);
+	mode = (val & TIM_SMCR_SMS) - 1;
+	if ((mode >= 0) || (mode < ARRAY_SIZE(stm32_quadrature_modes)))
+		return -EBUSY;
 
-	return 0;
+	return regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_DIR,
+				  dir ? TIM_CR1_DIR : 0);
 }
 
 static int stm32_get_count_direction(struct iio_dev *indio_dev,
@@ -611,7 +618,7 @@ static int stm32_get_count_direction(struct iio_dev *indio_dev,
 
 	regmap_read(priv->regmap, TIM_CR1, &cr1);
 
-	return (cr1 & TIM_CR1_DIR);
+	return ((cr1 & TIM_CR1_DIR) ? 1 : 0);
 }
 
 static const struct iio_enum stm32_count_direction_enum = {

From 90938ca432e6b8f6bb1c22a24984738fc3d906ed Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Thu, 27 Jul 2017 18:19:00 +0200
Subject: [PATCH 07/98] iio: trigger: stm32-timer: add enable attribute

In order to use encoder mode, timers needs to be enabled (e.g. CEN bit)
along with peripheral clock.
Add IIO_CHAN_INFO_ENABLE attribute to handle this.
Also, in triggered mode, CEN bit is set automatically in hardware.
Then clock must be enabled before starting triggered mode.

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Acked-by: Benjamin Gaignard <benjamin.gaignard@linaro.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/trigger/stm32-timer-trigger.c | 57 +++++++++++++++++------
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index d28aa02b85e8..14e6eb04bbb0 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -366,34 +366,32 @@ static int stm32_counter_read_raw(struct iio_dev *indio_dev,
 				  int *val, int *val2, long mask)
 {
 	struct stm32_timer_trigger *priv = iio_priv(indio_dev);
+	u32 dat;
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-	{
-		u32 cnt;
-
-		regmap_read(priv->regmap, TIM_CNT, &cnt);
-		*val = cnt;
-
+		regmap_read(priv->regmap, TIM_CNT, &dat);
+		*val = dat;
 		return IIO_VAL_INT;
-	}
-	case IIO_CHAN_INFO_SCALE:
-	{
-		u32 smcr;
 
-		regmap_read(priv->regmap, TIM_SMCR, &smcr);
-		smcr &= TIM_SMCR_SMS;
+	case IIO_CHAN_INFO_ENABLE:
+		regmap_read(priv->regmap, TIM_CR1, &dat);
+		*val = (dat & TIM_CR1_CEN) ? 1 : 0;
+		return IIO_VAL_INT;
+
+	case IIO_CHAN_INFO_SCALE:
+		regmap_read(priv->regmap, TIM_SMCR, &dat);
+		dat &= TIM_SMCR_SMS;
 
 		*val = 1;
 		*val2 = 0;
 
 		/* in quadrature case scale = 0.25 */
-		if (smcr == 3)
+		if (dat == 3)
 			*val2 = 2;
 
 		return IIO_VAL_FRACTIONAL_LOG2;
 	}
-	}
 
 	return -EINVAL;
 }
@@ -403,6 +401,7 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev,
 				   int val, int val2, long mask)
 {
 	struct stm32_timer_trigger *priv = iio_priv(indio_dev);
+	u32 dat;
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
@@ -411,6 +410,22 @@ static int stm32_counter_write_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_SCALE:
 		/* fixed scale */
 		return -EINVAL;
+
+	case IIO_CHAN_INFO_ENABLE:
+		if (val) {
+			regmap_read(priv->regmap, TIM_CR1, &dat);
+			if (!(dat & TIM_CR1_CEN))
+				clk_enable(priv->clk);
+			regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN,
+					   TIM_CR1_CEN);
+		} else {
+			regmap_read(priv->regmap, TIM_CR1, &dat);
+			regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN,
+					   0);
+			if (dat & TIM_CR1_CEN)
+				clk_disable(priv->clk);
+		}
+		return 0;
 	}
 
 	return -EINVAL;
@@ -506,9 +521,19 @@ static int stm32_set_enable_mode(struct iio_dev *indio_dev,
 {
 	struct stm32_timer_trigger *priv = iio_priv(indio_dev);
 	int sms = stm32_enable_mode2sms(mode);
+	u32 val;
 
 	if (sms < 0)
 		return sms;
+	/*
+	 * Triggered mode sets CEN bit automatically by hardware. So, first
+	 * enable counter clock, so it can use it. Keeps it in sync with CEN.
+	 */
+	if (sms == 6) {
+		regmap_read(priv->regmap, TIM_CR1, &val);
+		if (!(val & TIM_CR1_CEN))
+			clk_enable(priv->clk);
+	}
 
 	regmap_update_bits(priv->regmap, TIM_SMCR, TIM_SMCR_SMS, sms);
 
@@ -681,7 +706,9 @@ static const struct iio_chan_spec_ext_info stm32_trigger_count_info[] = {
 static const struct iio_chan_spec stm32_trigger_channel = {
 	.type = IIO_COUNT,
 	.channel = 0,
-	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
+			      BIT(IIO_CHAN_INFO_ENABLE) |
+			      BIT(IIO_CHAN_INFO_SCALE),
 	.ext_info = stm32_trigger_count_info,
 	.indexed = 1
 };

From ff3aa88a4d61468baece3fc2bb54e2a3bea6360f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stefan=20Br=C3=BCns?= <stefan.bruens@rwth-aachen.de>
Date: Wed, 26 Jul 2017 23:32:06 +0200
Subject: [PATCH 08/98] iio: adc: ina219: Avoid underflow for sleeping time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Proper support for the INA219 lowered the minimum sampling period from
2*140us to 2*84us. Subtracting 200us later leads to an underflow and
an almost infinite udelay later.

Using a signed int for the sampling period provides sufficient range
(at most 2*8640*1024us), but catches the underflow when comparing with
buffer_us.

Fixes: 18edac2e22f4 ("iio: adc: Fix integration time/averaging for INA219/220")
Signed-off-by: Stefan Brüns <stefan.bruens@rwth-aachen.de>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ina2xx-adc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c
index 232c0b80d658..c3f86138cb55 100644
--- a/drivers/iio/adc/ina2xx-adc.c
+++ b/drivers/iio/adc/ina2xx-adc.c
@@ -644,7 +644,7 @@ static int ina2xx_capture_thread(void *data)
 {
 	struct iio_dev *indio_dev = data;
 	struct ina2xx_chip_info *chip = iio_priv(indio_dev);
-	unsigned int sampling_us = SAMPLING_PERIOD(chip);
+	int sampling_us = SAMPLING_PERIOD(chip);
 	int buffer_us;
 
 	/*

From 50dbe1f4b453b2860ef0e3d48054b9fd24d5ae97 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Mon, 24 Jul 2017 18:10:38 +0200
Subject: [PATCH 09/98] iio: adc: stm32: fix common clock rate

ADC clock input is provided to internal prescaler (that decreases its
frequency). It's then used as reference clock for conversions.

- Fix common clock rate used then by stm32-adc sub-devices. Take common
  prescaler into account. Currently, rate is used to set "boost" mode.
  It may unnecessarily be set. This impacts power consumption.
- Fix ADC max clock rate on STM32H7 (fADC from datasheet). Currently,
  prescaler may be set too low. This can result in ADC reference
  clock used for conversion to exceed max allowed clock frequency.

Fixes: 95e339b6e85d ("iio: adc: stm32: add support for STM32H7")
Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/stm32-adc-core.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c
index e09233b03c05..609676384f5e 100644
--- a/drivers/iio/adc/stm32-adc-core.c
+++ b/drivers/iio/adc/stm32-adc-core.c
@@ -64,7 +64,7 @@
 #define STM32H7_CKMODE_MASK		GENMASK(17, 16)
 
 /* STM32 H7 maximum analog clock rate (from datasheet) */
-#define STM32H7_ADC_MAX_CLK_RATE	72000000
+#define STM32H7_ADC_MAX_CLK_RATE	36000000
 
 /**
  * stm32_adc_common_regs - stm32 common registers, compatible dependent data
@@ -148,14 +148,14 @@ static int stm32f4_adc_clk_sel(struct platform_device *pdev,
 		return -EINVAL;
 	}
 
-	priv->common.rate = rate;
+	priv->common.rate = rate / stm32f4_pclk_div[i];
 	val = readl_relaxed(priv->common.base + STM32F4_ADC_CCR);
 	val &= ~STM32F4_ADC_ADCPRE_MASK;
 	val |= i << STM32F4_ADC_ADCPRE_SHIFT;
 	writel_relaxed(val, priv->common.base + STM32F4_ADC_CCR);
 
 	dev_dbg(&pdev->dev, "Using analog clock source at %ld kHz\n",
-		rate / (stm32f4_pclk_div[i] * 1000));
+		priv->common.rate / 1000);
 
 	return 0;
 }
@@ -250,7 +250,7 @@ static int stm32h7_adc_clk_sel(struct platform_device *pdev,
 
 out:
 	/* rate used later by each ADC instance to control BOOST mode */
-	priv->common.rate = rate;
+	priv->common.rate = rate / div;
 
 	/* Set common clock mode and prescaler */
 	val = readl_relaxed(priv->common.base + STM32H7_ADC_CCR);
@@ -260,7 +260,7 @@ static int stm32h7_adc_clk_sel(struct platform_device *pdev,
 	writel_relaxed(val, priv->common.base + STM32H7_ADC_CCR);
 
 	dev_dbg(&pdev->dev, "Using %s clock/%d source at %ld kHz\n",
-		ckmode ? "bus" : "adc", div, rate / (div * 1000));
+		ckmode ? "bus" : "adc", div, priv->common.rate / 1000);
 
 	return 0;
 }

From f3fd2afed8eee91620d05b69ab94c14793c849d7 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Fri, 28 Jul 2017 15:10:48 -0700
Subject: [PATCH 10/98] ntb: transport shouldn't disable link due to bogus
 values in SPADs

It seems that under certain scenarios the SPAD can have bogus values caused
by an agent (i.e. BIOS or other software) that is not the kernel driver, and
that causes memory window setup failure. This should not cause the link to
be disabled because if we do that, the driver will never recover again. We
have verified in testing that this issue happens and prevents proper link
recovery.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Fixes: 84f766855f61 ("ntb: stop link work when we do not have memory")
---
 drivers/ntb/ntb_transport.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index b29558ddfe95..f58d8e305323 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -924,10 +924,8 @@ static void ntb_transport_link_work(struct work_struct *work)
 		ntb_free_mw(nt, i);
 
 	/* if there's an actual failure, we should just bail */
-	if (rc < 0) {
-		ntb_link_disable(ndev);
+	if (rc < 0)
 		return;
-	}
 
 out:
 	if (ntb_link_is_up(ndev, NULL, NULL) == 1)

From 0eb46345364d7318b11068c46e8a68d5dc10f65e Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Tue, 25 Jul 2017 14:57:42 -0600
Subject: [PATCH 11/98] ntb: ntb_test: ensure the link is up before trying to
 configure the mws

After the link tests, there is a race on one side of the test for
the link coming up. It's possible, in some cases, for the test script
to write to the 'peer_trans' files before the link has come up.

To fix this, we simply use the link event file to ensure both sides
see the link as up before continuning.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Allen Hubbe <Allen.Hubbe@dell.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Fixes: a9c59ef77458 ("ntb_test: Add a selftest script for the NTB subsystem")
---
 tools/testing/selftests/ntb/ntb_test.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 1c12b5855e4f..5fc7ad359e21 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -333,6 +333,10 @@ function ntb_tool_tests()
 	link_test $LOCAL_TOOL $REMOTE_TOOL
 	link_test $REMOTE_TOOL $LOCAL_TOOL
 
+	#Ensure the link is up on both sides before continuing
+	write_file Y $LOCAL_TOOL/link_event
+	write_file Y $REMOTE_TOOL/link_event
+
 	for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do
 		PT=$(basename $PEER_TRANS)
 		write_file $MW_SIZE $LOCAL_TOOL/$PT

From 7f5770678b2d0cc8f3ffbf7eb73410f2acba7925 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sun, 30 Jul 2017 21:10:44 +0300
Subject: [PATCH 12/98] dmaengine: tegra210-adma: fix of_irq_get() error check

of_irq_get() may return 0 as well as negative error number on failure,
while the driver only checks for the negative values. The driver would then
call request_irq(0, ...) in tegra_adma_alloc_chan_resources() and never get
valid channel interrupt.

Check for 'tdc->irq <= 0' instead and return -ENXIO from the driver's probe
iff of_irq_get() returned 0.

Fixes: f46b195799b5 ("dmaengine: tegra-adma: Add support for Tegra210 ADMA")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/tegra210-adma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
index b10cbaa82ff5..b26256f23d67 100644
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -717,8 +717,8 @@ static int tegra_adma_probe(struct platform_device *pdev)
 		tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i);
 
 		tdc->irq = of_irq_get(pdev->dev.of_node, i);
-		if (tdc->irq < 0) {
-			ret = tdc->irq;
+		if (tdc->irq <= 0) {
+			ret = tdc->irq ?: -ENXIO;
 			goto irq_dispose;
 		}
 

From 0fa375e6bc9023211eead30a6a79963c45a563da Mon Sep 17 00:00:00 2001
From: Jeffy Chen <jeffy.chen@rock-chips.com>
Date: Wed, 9 Aug 2017 18:41:03 +0800
Subject: [PATCH 13/98] drm/rockchip: Fix suspend crash when drm is not bound

Currently we are allocating drm_device in rockchip_drm_bind, so if the
suspend/resume code access it when drm is not bound, we would hit this
crash:

[  253.402836] Unable to handle kernel NULL pointer dereference at virtual address 00000028
[  253.402837] pgd = ffffffc06c9b0000
[  253.402841] [00000028] *pgd=0000000000000000, *pud=0000000000000000
[  253.402844] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[  253.402859] Modules linked in: btusb btrtl btbcm btintel bluetooth ath10k_pci ath10k_core ar10k_ath ar10k_mac80211 cfg80211 ip6table_filter asix usbnet mii
[  253.402864] CPU: 4 PID: 1331 Comm: cat Not tainted 4.4.70 #15
[  253.402865] Hardware name: Google Scarlet (DT)
[  253.402867] task: ffffffc076c0ce00 ti: ffffffc06c2c8000 task.ti: ffffffc06c2c8000
[  253.402871] PC is at rockchip_drm_sys_suspend+0x20/0x5c

Add sanity checks to prevent that.

Reported-by: Brian Norris <briannorris@chromium.com>
Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.kernel.org/patch/9890297/
---
 drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index c6b1b7f3a2a3..c16bc0a7115b 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -275,11 +275,15 @@ static void rockchip_drm_fb_resume(struct drm_device *drm)
 static int rockchip_drm_sys_suspend(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct rockchip_drm_private *priv = drm->dev_private;
+	struct rockchip_drm_private *priv;
+
+	if (!drm)
+		return 0;
 
 	drm_kms_helper_poll_disable(drm);
 	rockchip_drm_fb_suspend(drm);
 
+	priv = drm->dev_private;
 	priv->state = drm_atomic_helper_suspend(drm);
 	if (IS_ERR(priv->state)) {
 		rockchip_drm_fb_resume(drm);
@@ -293,8 +297,12 @@ static int rockchip_drm_sys_suspend(struct device *dev)
 static int rockchip_drm_sys_resume(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct rockchip_drm_private *priv = drm->dev_private;
+	struct rockchip_drm_private *priv;
 
+	if (!drm)
+		return 0;
+
+	priv = drm->dev_private;
 	drm_atomic_helper_resume(drm, priv->state);
 	rockchip_drm_fb_resume(drm);
 	drm_kms_helper_poll_enable(drm);

From 2406b296a3a80ba1c78707f205556f2388d474ff Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 21 Jul 2017 22:56:19 +0200
Subject: [PATCH 14/98] gpu: ipu-v3: add DRM dependency

The new PRE/PRG driver code causes a link failure when IPUv3 is built-in,
but DRM is built as a module:

drivers/gpu/ipu-v3/ipu-pre.o: In function `ipu_pre_configure':
ipu-pre.c:(.text.ipu_pre_configure+0x18): undefined reference to `drm_format_info'
drivers/gpu/ipu-v3/ipu-prg.o: In function `ipu_prg_format_supported':
ipu-prg.c:(.text.ipu_prg_format_supported+0x8): undefined reference to `drm_format_info'

Adding a Kconfig dependency on DRM means we don't run into this problem
any more. If DRM is disabled altogether, the IPUv3 driver is built
without PRE/PRG support.

Fixes: ea9c260514c1 ("gpu: ipu-v3: add driver for Prefetch Resolve Gasket")
Link: https://patchwork.kernel.org/patch/9636665/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[p.zabel@pengutronix.de: changed the dependency from DRM to DRM || !DRM,
 since the link failure only happens when DRM=m and IPUV3_CORE=y.
 Modified the commit message to reflect this.]
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/ipu-v3/Kconfig b/drivers/gpu/ipu-v3/Kconfig
index 08766c6e7856..87a20b3dcf7a 100644
--- a/drivers/gpu/ipu-v3/Kconfig
+++ b/drivers/gpu/ipu-v3/Kconfig
@@ -1,6 +1,7 @@
 config IMX_IPUV3_CORE
 	tristate "IPUv3 core support"
 	depends on SOC_IMX5 || SOC_IMX6Q || ARCH_MULTIPLATFORM
+	depends on DRM || !DRM # if DRM=m, this can't be 'y'
 	select GENERIC_IRQ_CHIP
 	help
 	  Choose this if you have a i.MX5/6 system and want to use the Image

From 5be5dd38d4628fdbff7359f235f7cdf0cf9655f1 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 5 Aug 2016 11:55:18 +0200
Subject: [PATCH 15/98] drm/imx: ipuv3-plane: fix YUV framebuffer scanout on
 the base plane

Historically, only RGB framebuffers could be assigned to the primary
plane. This changed with universal plane support. Since no colorspace
conversion was set up for the IPUv3 full plane, assigning YUV frame
buffers to the primary plane caused incorrect output.
Fix this by enabling color space conversion also for the primary plane.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/drm/imx/ipuv3-plane.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 6276bb834b4f..d3845989a29d 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -545,15 +545,13 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
 		return;
 	}
 
+	ics = ipu_drm_fourcc_to_colorspace(fb->format->format);
 	switch (ipu_plane->dp_flow) {
 	case IPU_DP_FLOW_SYNC_BG:
-		ipu_dp_setup_channel(ipu_plane->dp,
-					IPUV3_COLORSPACE_RGB,
-					IPUV3_COLORSPACE_RGB);
+		ipu_dp_setup_channel(ipu_plane->dp, ics, IPUV3_COLORSPACE_RGB);
 		ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true);
 		break;
 	case IPU_DP_FLOW_SYNC_FG:
-		ics = ipu_drm_fourcc_to_colorspace(state->fb->format->format);
 		ipu_dp_setup_channel(ipu_plane->dp, ics,
 					IPUV3_COLORSPACE_UNKNOWN);
 		/* Enable local alpha on partial plane */

From 491ab4700d1b64f5cf2f9055e01613a923df5fab Mon Sep 17 00:00:00 2001
From: Nikhil Mahale <nmahale@nvidia.com>
Date: Wed, 9 Aug 2017 09:23:01 +0530
Subject: [PATCH 16/98] drm: Fix framebuffer leak

Do not leak framebuffer if client provided crtc id found invalid.

Signed-off-by: Nikhil Mahale <nmahale@nvidia.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1502250781-5779-1-git-send-email-nmahale@nvidia.com
---
 drivers/gpu/drm/drm_plane.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index 5dc8c4350602..e40c12fabbde 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -601,6 +601,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 
 		crtc = drm_crtc_find(dev, plane_req->crtc_id);
 		if (!crtc) {
+			drm_framebuffer_put(fb);
 			DRM_DEBUG_KMS("Unknown crtc ID %d\n",
 				      plane_req->crtc_id);
 			return -ENOENT;

From 7f5d6dac548b983702dd7aac1d463bd88dff50a8 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Mon, 14 Aug 2017 12:07:21 +0200
Subject: [PATCH 17/98] drm/atomic: Handle -EDEADLK with out-fences correctly

complete_crtc_signaling is freeing fence_state, but when retrying
num_fences and fence_state are not zero'd. This caused duplicate
fd's in the fence_state array, followed by a BUG_ON in fs/file.c
because we reallocate freed memory, and installing over an existing
fd, or potential other fun.

Zero fence_state and num_fences correctly in the retry loop, which
allows kms_atomic_transition to pass.

Fixes: beaf5af48034 ("drm/fence: add out-fences support")
Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Cc: Brian Starkey <brian.starkey@arm.com> (v10)
Cc: Sean Paul <seanpaul@chromium.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Testcase: kms_atomic_transitions.plane-all-modeset-transition-fencing
(with CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y)
Link: https://patchwork.freedesktop.org/patch/msgid/20170814100721.13340-1-maarten.lankhorst@linux.intel.com
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> #intel-gfx on irc
---
 drivers/gpu/drm/drm_atomic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index c0f336d23f9c..b43939f24812 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -2167,10 +2167,10 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
 	struct drm_atomic_state *state;
 	struct drm_modeset_acquire_ctx ctx;
 	struct drm_plane *plane;
-	struct drm_out_fence_state *fence_state = NULL;
+	struct drm_out_fence_state *fence_state;
 	unsigned plane_mask;
 	int ret = 0;
-	unsigned int i, j, num_fences = 0;
+	unsigned int i, j, num_fences;
 
 	/* disallow for drivers not supporting atomic: */
 	if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
@@ -2211,6 +2211,8 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
 	plane_mask = 0;
 	copied_objs = 0;
 	copied_props = 0;
+	fence_state = NULL;
+	num_fences = 0;
 
 	for (i = 0; i < arg->count_objs; i++) {
 		uint32_t obj_id, count_props;

From a23318feeff662c8d25d21623daebdd2e55ec221 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 9 Aug 2017 15:28:22 +0200
Subject: [PATCH 18/98] i2c: designware: Fix system suspend

The commit 8503ff166504 ("i2c: designware: Avoid unnecessary resuming
during system suspend"), may suggest to the PM core to try out the so
called direct_complete path for system sleep. In this path, the PM core
treats a runtime suspended device as it's already in a proper low power
state for system sleep, which makes it skip calling the system sleep
callbacks for the device, except for the ->prepare() and the ->complete()
callbacks.

However, the PM core may unset the direct_complete flag for a parent
device, in case its child device are being system suspended before. In this
scenario, the PM core invokes the system sleep callbacks, no matter if the
device is runtime suspended or not.

Particularly in cases of an existing i2c slave device, the above path is
triggered, which breaks the assumption that the i2c device is always
runtime resumed whenever the dw_i2c_plat_suspend() is being called.

More precisely, dw_i2c_plat_suspend() calls clk_core_disable() and
clk_core_unprepare(), for an already disabled/unprepared clock, leading to
a splat in the log about clocks calls being wrongly balanced and breaking
system sleep.

To still allow the direct_complete path in cases when it's possible, but
also to keep the fix simple, let's runtime resume the i2c device in the
->suspend() callback, before continuing to put the device into low power
state.

Note, in cases when the i2c device is attached to the ACPI PM domain, this
problem doesn't occur, because ACPI's ->suspend() callback, assigned to
acpi_subsys_suspend(), already calls pm_runtime_resume() for the device.

It should also be noted that this change does not fix commit 8503ff166504
("i2c: designware: Avoid unnecessary resuming during system suspend").
Because for the non-ACPI case, the system sleep support was already broken
prior that point.

Cc: <stable@vger.kernel.org> # v4.4+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: John Stultz <john.stultz@linaro.org>
Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-platdrv.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 143a8fd582b4..441afc715a90 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -430,7 +430,7 @@ static void dw_i2c_plat_complete(struct device *dev)
 #endif
 
 #ifdef CONFIG_PM
-static int dw_i2c_plat_suspend(struct device *dev)
+static int dw_i2c_plat_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_i2c_dev *i_dev = platform_get_drvdata(pdev);
@@ -452,11 +452,21 @@ static int dw_i2c_plat_resume(struct device *dev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int dw_i2c_plat_suspend(struct device *dev)
+{
+	pm_runtime_resume(dev);
+	return dw_i2c_plat_runtime_suspend(dev);
+}
+#endif
+
 static const struct dev_pm_ops dw_i2c_dev_pm_ops = {
 	.prepare = dw_i2c_plat_prepare,
 	.complete = dw_i2c_plat_complete,
 	SET_SYSTEM_SLEEP_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume)
-	SET_RUNTIME_PM_OPS(dw_i2c_plat_suspend, dw_i2c_plat_resume, NULL)
+	SET_RUNTIME_PM_OPS(dw_i2c_plat_runtime_suspend,
+			   dw_i2c_plat_resume,
+			   NULL)
 };
 
 #define DW_I2C_DEV_PMOPS (&dw_i2c_dev_pm_ops)

From 984277a041d5ee4a65aaadf0307d67a7c401e11c Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Fri, 11 Aug 2017 14:44:55 +0300
Subject: [PATCH 19/98] i2c: designware: Fix oops from i2c_dw_irq_handler_slave

When i2c-designware is initialized in slave mode the
i2c-designware-slave.c: i2c_dw_irq_handler_slave() can hit a NULL
pointer dereference when I2C slave backend is not registered but code is
accessing the struct dw_i2c_dev.slave without testing is it NULL.

We might get spurious interrupts from other devices or from IRQ core
during unloading the driver when CONFIG_DEBUG_SHIRQ is set. Existing
check for enable and IRQ status is not enough since device can be power
gated and those bits may read 1.

Fix this by handling the interrupt only when also struct dw_i2c_dev.slave
is set.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-slave.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 0548c7ea578c..4b62a3872763 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -272,7 +272,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
 	slave_activity = ((dw_readl(dev, DW_IC_STATUS) &
 		DW_IC_STATUS_SLAVE_ACTIVITY) >> 6);
 
-	if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY))
+	if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY) || !dev->slave)
 		return 0;
 
 	dev_dbg(dev->dev,

From 4e2d93de070ceaca5097f7ee5c311731b83208a0 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Wed, 9 Aug 2017 15:24:44 +0300
Subject: [PATCH 20/98] i2c: designware: Fix standard mode speed when
 configuring the slave mode

Code sets bit DW_IC_CON_SPEED_FAST (0x4) always when configuring the slave
mode. This results incorrect register value DW_IC_CON_SPEED_HIGH (0x6)
when OR'ed together with DW_IC_CON_SPEED_STD (0x2).

Remove this and let the code set the speed mode bits according to clock
frequency or default to fast mode.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-platdrv.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 441afc715a90..57248bccadbc 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -198,8 +198,7 @@ static void i2c_dw_configure_slave(struct dw_i2c_dev *dev)
 	dev->functionality = I2C_FUNC_SLAVE | DW_IC_DEFAULT_FUNCTIONALITY;
 
 	dev->slave_cfg = DW_IC_CON_RX_FIFO_FULL_HLD_CTRL |
-			 DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED |
-			 DW_IC_CON_SPEED_FAST;
+			 DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED;
 
 	dev->mode = DW_IC_SLAVE;
 

From f4b17a14faeec4160f97ad75ea7534f571f12404 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Wed, 9 Aug 2017 11:21:28 +0200
Subject: [PATCH 21/98] i2c: core: Make comment about I2C table requirement to
 reflect the code

I2C drivers were required to have an I2C device ID table even if were for
devices that would only be registered using a specific firmware interface
(e.g: OF or ACPI).

But commit da10c06a044b ("i2c: Make I2C ID tables non-mandatory for DT'ed
devices") changed the I2C core to relax the requirement and allow drivers
to avoid defining this table.

Unfortunately it only took into account drivers for OF-only devices and
forgot about ACPI-only ones, and this was fixed by commit c64ffff7a9d1
("i2c: core: Allow empty id_table in ACPI case as well").

But the latter didn't update the original comment, so it doesn't reflect
what the code does now.

Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 12822a4b8f8f..56e46581b84b 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -353,8 +353,8 @@ static int i2c_device_probe(struct device *dev)
 	}
 
 	/*
-	 * An I2C ID table is not mandatory, if and only if, a suitable Device
-	 * Tree match table entry is supplied for the probing device.
+	 * An I2C ID table is not mandatory, if and only if, a suitable OF
+	 * or ACPI ID table is supplied for the probing device.
 	 */
 	if (!driver->id_table &&
 	    !i2c_acpi_match_device(dev->driver->acpi_match_table, client) &&

From 42543aeb48e3701b49e0a83654248afc38feb88f Mon Sep 17 00:00:00 2001
From: Anton Vasilyev <vasilyev@ispras.ru>
Date: Thu, 10 Aug 2017 18:15:45 +0300
Subject: [PATCH 22/98] i2c: simtec: use release_mem_region instead of
 release_resource

Use api pair of request_mem_region and release_mem_region
instead of release_resource.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Anton Vasilyev <vasilyev@ispras.ru>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-simtec.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-simtec.c b/drivers/i2c/busses/i2c-simtec.c
index b4685bb9b5d7..adca51a99487 100644
--- a/drivers/i2c/busses/i2c-simtec.c
+++ b/drivers/i2c/busses/i2c-simtec.c
@@ -127,8 +127,7 @@ static int simtec_i2c_probe(struct platform_device *dev)
 	iounmap(pd->reg);
 
  err_res:
-	release_resource(pd->ioarea);
-	kfree(pd->ioarea);
+	release_mem_region(pd->ioarea->start, size);
 
  err:
 	kfree(pd);
@@ -142,8 +141,7 @@ static int simtec_i2c_remove(struct platform_device *dev)
 	i2c_del_adapter(&pd->adap);
 
 	iounmap(pd->reg);
-	release_resource(pd->ioarea);
-	kfree(pd->ioarea);
+	release_mem_region(pd->ioarea->start, resource_size(pd->ioarea));
 	kfree(pd);
 
 	return 0;

From f1c0b7e448b9e66dd9a7343bab58a3d3a477e104 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Fri, 28 Jul 2017 18:00:12 -0700
Subject: [PATCH 23/98] i2c: aspeed: fixed potential null pointer dereference

Before I skipped null checks when the master is in the STOP state; this
fixes that.

Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: f327c686d3ba ("i2c: aspeed: added driver for Aspeed I2C")
---
 drivers/i2c/busses/i2c-aspeed.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index f19348328a71..6fdf9231c23c 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -410,10 +410,11 @@ static bool aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus)
 	}
 
 	/* We are in an invalid state; reset bus to a known state. */
-	if (!bus->msgs && bus->master_state != ASPEED_I2C_MASTER_STOP) {
+	if (!bus->msgs) {
 		dev_err(bus->dev, "bus in unknown state");
 		bus->cmd_err = -EIO;
-		aspeed_i2c_do_stop(bus);
+		if (bus->master_state != ASPEED_I2C_MASTER_STOP)
+			aspeed_i2c_do_stop(bus);
 		goto out_no_complete;
 	}
 	msg = &bus->msgs[bus->msgs_index];

From a0ffc51e20e90e0c1c2491de2b4b03f48b6caaba Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Tue, 15 Aug 2017 11:57:06 +0200
Subject: [PATCH 24/98] drm/atomic: If the atomic check fails, return its value
 first

The last part of drm_atomic_check_only is testing whether we need to
fail with -EINVAL when modeset is not allowed, but forgets to return
the value when atomic_check() fails first.

This results in -EDEADLK being replaced by -EINVAL, and the sanity
check in drm_modeset_drop_locks kicks in:

[  308.531734] ------------[ cut here ]------------
[  308.531791] WARNING: CPU: 0 PID: 1886 at drivers/gpu/drm/drm_modeset_lock.c:217 drm_modeset_drop_locks+0x33/0xc0 [drm]
[  308.531828] Modules linked in:
[  308.532050] CPU: 0 PID: 1886 Comm: kms_atomic Tainted: G     U  W 4.13.0-rc5-patser+ #5225
[  308.532082] Hardware name: NUC5i7RYB, BIOS RYBDWi35.86A.0246.2015.0309.1355 03/09/2015
[  308.532124] task: ffff8800cd9dae00 task.stack: ffff8800ca3b8000
[  308.532168] RIP: 0010:drm_modeset_drop_locks+0x33/0xc0 [drm]
[  308.532189] RSP: 0018:ffff8800ca3bf980 EFLAGS: 00010282
[  308.532211] RAX: dffffc0000000000 RBX: ffff8800ca3bfaf8 RCX: 0000000013a171e6
[  308.532235] RDX: 1ffff10019477f69 RSI: ffffffffa8ba4fa0 RDI: ffff8800ca3bfb48
[  308.532258] RBP: ffff8800ca3bf998 R08: 0000000000000000 R09: 0000000000000003
[  308.532281] R10: 0000000079dbe066 R11: 00000000f760b34b R12: 0000000000000001
[  308.532304] R13: dffffc0000000000 R14: 00000000ffffffea R15: ffff880096889680
[  308.532328] FS:  00007ff00959cec0(0000) GS:ffff8800d4e00000(0000) knlGS:0000000000000000
[  308.532359] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  308.532380] CR2: 0000000000000008 CR3: 00000000ca2e3000 CR4: 00000000003406f0
[  308.532402] Call Trace:
[  308.532440]  drm_mode_atomic_ioctl+0x19fa/0x1c00 [drm]
[  308.532488]  ? drm_atomic_set_property+0x1220/0x1220 [drm]
[  308.532565]  ? avc_has_extended_perms+0xc39/0xff0
[  308.532593]  ? lock_downgrade+0x610/0x610
[  308.532640]  ? drm_atomic_set_property+0x1220/0x1220 [drm]
[  308.532680]  drm_ioctl_kernel+0x154/0x1a0 [drm]
[  308.532755]  drm_ioctl+0x624/0x8f0 [drm]
[  308.532858]  ? drm_atomic_set_property+0x1220/0x1220 [drm]
[  308.532976]  ? drm_getunique+0x210/0x210 [drm]
[  308.533061]  do_vfs_ioctl+0xd92/0xe40
[  308.533121]  ? ioctl_preallocate+0x1b0/0x1b0
[  308.533160]  ? selinux_capable+0x20/0x20
[  308.533191]  ? do_fcntl+0x1b1/0xbf0
[  308.533219]  ? kasan_slab_free+0xa2/0xb0
[  308.533249]  ? f_getown+0x4b/0xa0
[  308.533278]  ? putname+0xcf/0xe0
[  308.533309]  ? security_file_ioctl+0x57/0x90
[  308.533342]  SyS_ioctl+0x4e/0x80
[  308.533374]  entry_SYSCALL_64_fastpath+0x18/0xad
[  308.533405] RIP: 0033:0x7ff00779e4d7
[  308.533431] RSP: 002b:00007fff66a043d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  308.533481] RAX: ffffffffffffffda RBX: 000000e7c7ca5910 RCX: 00007ff00779e4d7
[  308.533560] RDX: 00007fff66a04430 RSI: 00000000c03864bc RDI: 0000000000000003
[  308.533608] RBP: 00007ff007a5fb00 R08: 000000e7c7ca4620 R09: 000000e7c7ca5e60
[  308.533647] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000070
[  308.533685] R13: 0000000000000000 R14: 0000000000000000 R15: 000000e7c7ca5930
[  308.533770] Code: ff df 55 48 89 e5 41 55 41 54 53 48 89 fb 48 83 c7
50 48 89 fa 48 c1 ea 03 80 3c 02 00 74 05 e8 94 d4 16 e7 48 83 7b 50 00
74 02 <0f> ff 4c 8d 6b 58 48 b8 00 00 00 00 00 fc ff df 4c 89 ea 48 c1
[  308.534086] ---[ end trace 77f11e53b1df44ad ]---

Solve this by adding the missing return.

This is also a bugfix because we could end up rejecting updates with
-EINVAL because of a early -EDEADLK, while if atomic_check ran to
completion it might have downgraded the modeset to a fastset.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Testcase: kms_atomic
Link: https://patchwork.freedesktop.org/patch/msgid/20170815095706.23624-1-maarten.lankhorst@linux.intel.com
Fixes: d34f20d6e2f2 ("drm: Atomic modeset ioctl")
Cc: <stable@vger.kernel.org> # v4.0+
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_atomic.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index b43939f24812..aed25c4183bb 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1655,6 +1655,9 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
 	if (config->funcs->atomic_check)
 		ret = config->funcs->atomic_check(state->dev, state);
 
+	if (ret)
+		return ret;
+
 	if (!state->allow_modeset) {
 		for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
 			if (drm_atomic_crtc_needs_modeset(crtc_state)) {
@@ -1665,7 +1668,7 @@ int drm_atomic_check_only(struct drm_atomic_state *state)
 		}
 	}
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL(drm_atomic_check_only);
 

From 2926a2aa5c14fb2add75e6584845b1c03022235f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 14 Aug 2017 17:19:26 +0200
Subject: [PATCH 25/98] iommu: Fix wrong freeing of iommu_device->dev

The struct iommu_device has a 'struct device' embedded into
it, not as a pointer, but the whole struct. In the
conversion of the iommu drivers to use struct iommu_device
it was forgotten that the relase function for that struct
device simply calls kfree() on the pointer.

This frees memory that was never allocated and causes memory
corruption.

To fix this issue, use a pointer to struct device instead of
embedding the whole struct. This needs some updates in the
iommu sysfs code as well as the Intel VT-d and AMD IOMMU
driver.

Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device')
Cc: stable@vger.kernel.org # >= v4.11
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_types.h |  4 +++-
 drivers/iommu/intel-iommu.c     |  4 +++-
 drivers/iommu/iommu-sysfs.c     | 32 ++++++++++++++++++++------------
 include/linux/iommu.h           | 12 +++++++++++-
 4 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 294a409e283b..d6b873b57054 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -574,7 +574,9 @@ struct amd_iommu {
 
 static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
 {
-	return container_of(dev, struct amd_iommu, iommu.dev);
+	struct iommu_device *iommu = dev_to_iommu_device(dev);
+
+	return container_of(iommu, struct amd_iommu, iommu);
 }
 
 #define ACPIHID_UID_LEN 256
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 687f18f65cea..3e8636f1220e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4736,7 +4736,9 @@ static void intel_disable_iommus(void)
 
 static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
 {
-	return container_of(dev, struct intel_iommu, iommu.dev);
+	struct iommu_device *iommu_dev = dev_to_iommu_device(dev);
+
+	return container_of(iommu_dev, struct intel_iommu, iommu);
 }
 
 static ssize_t intel_iommu_show_version(struct device *dev,
diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c
index c58351ed61c1..36d1a7ce7fc4 100644
--- a/drivers/iommu/iommu-sysfs.c
+++ b/drivers/iommu/iommu-sysfs.c
@@ -62,32 +62,40 @@ int iommu_device_sysfs_add(struct iommu_device *iommu,
 	va_list vargs;
 	int ret;
 
-	device_initialize(&iommu->dev);
+	iommu->dev = kzalloc(sizeof(*iommu->dev), GFP_KERNEL);
+	if (!iommu->dev)
+		return -ENOMEM;
 
-	iommu->dev.class = &iommu_class;
-	iommu->dev.parent = parent;
-	iommu->dev.groups = groups;
+	device_initialize(iommu->dev);
+
+	iommu->dev->class = &iommu_class;
+	iommu->dev->parent = parent;
+	iommu->dev->groups = groups;
 
 	va_start(vargs, fmt);
-	ret = kobject_set_name_vargs(&iommu->dev.kobj, fmt, vargs);
+	ret = kobject_set_name_vargs(&iommu->dev->kobj, fmt, vargs);
 	va_end(vargs);
 	if (ret)
 		goto error;
 
-	ret = device_add(&iommu->dev);
+	ret = device_add(iommu->dev);
 	if (ret)
 		goto error;
 
+	dev_set_drvdata(iommu->dev, iommu);
+
 	return 0;
 
 error:
-	put_device(&iommu->dev);
+	put_device(iommu->dev);
 	return ret;
 }
 
 void iommu_device_sysfs_remove(struct iommu_device *iommu)
 {
-	device_unregister(&iommu->dev);
+	dev_set_drvdata(iommu->dev, NULL);
+	device_unregister(iommu->dev);
+	iommu->dev = NULL;
 }
 /*
  * IOMMU drivers can indicate a device is managed by a given IOMMU using
@@ -102,14 +110,14 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link)
 	if (!iommu || IS_ERR(iommu))
 		return -ENODEV;
 
-	ret = sysfs_add_link_to_group(&iommu->dev.kobj, "devices",
+	ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices",
 				      &link->kobj, dev_name(link));
 	if (ret)
 		return ret;
 
-	ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev.kobj, "iommu");
+	ret = sysfs_create_link_nowarn(&link->kobj, &iommu->dev->kobj, "iommu");
 	if (ret)
-		sysfs_remove_link_from_group(&iommu->dev.kobj, "devices",
+		sysfs_remove_link_from_group(&iommu->dev->kobj, "devices",
 					     dev_name(link));
 
 	return ret;
@@ -121,5 +129,5 @@ void iommu_device_unlink(struct iommu_device *iommu, struct device *link)
 		return;
 
 	sysfs_remove_link(&link->kobj, "iommu");
-	sysfs_remove_link_from_group(&iommu->dev.kobj, "devices", dev_name(link));
+	sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link));
 }
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2cb54adc4a33..176f7569d874 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -240,7 +240,7 @@ struct iommu_device {
 	struct list_head list;
 	const struct iommu_ops *ops;
 	struct fwnode_handle *fwnode;
-	struct device dev;
+	struct device *dev;
 };
 
 int  iommu_device_register(struct iommu_device *iommu);
@@ -265,6 +265,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu,
 	iommu->fwnode = fwnode;
 }
 
+static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
+{
+	return (struct iommu_device *)dev_get_drvdata(dev);
+}
+
 #define IOMMU_GROUP_NOTIFY_ADD_DEVICE		1 /* Device added */
 #define IOMMU_GROUP_NOTIFY_DEL_DEVICE		2 /* Pre Device removed */
 #define IOMMU_GROUP_NOTIFY_BIND_DRIVER		3 /* Pre Driver bind */
@@ -589,6 +594,11 @@ static inline void iommu_device_set_fwnode(struct iommu_device *iommu,
 {
 }
 
+static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
+{
+	return NULL;
+}
+
 static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
 }

From 211b7aac5443d347ff7b6dabf54702b40228cfaf Mon Sep 17 00:00:00 2001
From: "Balasubramaniam, Hari Chand" <hari.chand.balasubramaniam@intel.com>
Date: Tue, 15 Aug 2017 10:05:46 +0800
Subject: [PATCH 26/98] drm/i915: Initialize 'data' in
 intel_dsi_dcs_backlight.c

variable 'data' may be used uninitialized in this function. thus,
'function dcs_get_backlight' will return unwanted value/fail.

Thus, adding NULL initialized to 'data' variable will solve the return
failure happening.

v2: Change commit message to reflect upstream with proper message

Fixes: 90198355b83c ("drm/i915/dsi: Add DCS control for Panel PWM")
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Yetunde Adebisi <yetundex.adebisi@intel.com>
Cc: Deepak M <m.deepak@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Signed-off-by: Balasubramaniam, Hari Chand <hari.chand.balasubramaniam@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1502762746-191826-1-git-send-email-hari.chand.balasubramaniam@intel.com
(cherry picked from commit d59814a5b4852442e1d03c569a4542f8b08356a7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c
index 6e09ceb71500..150a156f3b1e 100644
--- a/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dsi_dcs_backlight.c
@@ -46,7 +46,7 @@ static u32 dcs_get_backlight(struct intel_connector *connector)
 	struct intel_encoder *encoder = connector->encoder;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	struct mipi_dsi_device *dsi_device;
-	u8 data;
+	u8 data = 0;
 	enum port port;
 
 	/* FIXME: Need to take care of 16 bit brightness level */

From 7c648bde211baeda7a029bd6be4957e8be48d8c9 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Fri, 11 Aug 2017 14:39:07 +0300
Subject: [PATCH 27/98] drm/i915/vbt: ignore extraneous child devices for a
 port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ever since we've parsed VBT child devices, starting from 6acab15a7b0d
("drm/i915: use the HDMI DDI buffer translations from VBT"), we've
ignored the child device information if more than one child device
references the same port. The rationale for this seems lost in time.

Since commit 311a20949f04 ("drm/i915: don't init DP or HDMI when not
supported by DDI port") we started using this information more to skip
HDMI/DP init if the port wasn't there per VBT child devices. However, at
the same time it added port defaults without further explanation.

Thus, if the child device info was skipped due to multiple child devices
referencing the same port, the device info would be retrieved from the
somewhat arbitrary defaults.

Finally, when commit bb1d132935c2 ("drm/i915/vbt: split out defaults
that are set when there is no VBT") stopped initializing the defaults
whenever VBT is present, thus trusting the VBT more, we stopped
initializing ports which were referenced by more than one child device.

Apparently at least Asus UX305UA, UX305U, and UX306U laptops have VBT
child device blocks which cause this behaviour. Arguably they were
shipped with a broken VBT.

Relax the rules for multiple references to the same port, and use the
first child device info to reference a port. Retain the logic to debug
log about this, though.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101745
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=196233
Fixes: bb1d132935c2 ("drm/i915/vbt: split out defaults that are set when there is no VBT")
Tested-by: Oliver Weißbarth <mail@oweissbarth.de>
Reported-by: Oliver Weißbarth <mail@oweissbarth.de>
Reported-by: Didier G <didierg-divers@orange.fr>
Reported-by: Giles Anderson <agander@gmail.com>
Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: <stable@vger.kernel.org> # v4.12+
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170811113907.6716-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
(cherry picked from commit b5273d72750555a673040070bfb23c454a7cd3ef)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_bios.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 639d45c1dd2e..7ea7fd1e8856 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -1120,8 +1120,8 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
 	bool is_dvi, is_hdmi, is_dp, is_edp, is_crt;
 	uint8_t aux_channel, ddc_pin;
 	/* Each DDI port can have more than one value on the "DVO Port" field,
-	 * so look for all the possible values for each port and abort if more
-	 * than one is found. */
+	 * so look for all the possible values for each port.
+	 */
 	int dvo_ports[][3] = {
 		{DVO_PORT_HDMIA, DVO_PORT_DPA, -1},
 		{DVO_PORT_HDMIB, DVO_PORT_DPB, -1},
@@ -1130,7 +1130,10 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
 		{DVO_PORT_CRT, DVO_PORT_HDMIE, DVO_PORT_DPE},
 	};
 
-	/* Find the child device to use, abort if more than one found. */
+	/*
+	 * Find the first child device to reference the port, report if more
+	 * than one found.
+	 */
 	for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
 		it = dev_priv->vbt.child_dev + i;
 
@@ -1140,11 +1143,11 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
 
 			if (it->common.dvo_port == dvo_ports[port][j]) {
 				if (child) {
-					DRM_DEBUG_KMS("More than one child device for port %c in VBT.\n",
+					DRM_DEBUG_KMS("More than one child device for port %c in VBT, using the first.\n",
 						      port_name(port));
-					return;
+				} else {
+					child = it;
 				}
-				child = it;
 			}
 		}
 	}

From 31c1a7b8f966470ce136710a95afcf5822fecef8 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Tue, 15 Aug 2017 20:04:03 -0700
Subject: [PATCH 28/98] drm/i915/cnl: Fix LSPCON support.

When LSPCON support was extended to CNL
one part was missed on lspcon_init.

So, instead of adding check per platform on lspcon_init
let's use HAS_LSPCON that is already there for that
purpose.

Fixes: ff15947e0f02 ("drm/i915/cnl: LSPCON support is gen9+")
Cc: Shashank Sharma <shashank.sharma@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Shashank Sharma <shashank.sharma@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170816030403.11368-1-rodrigo.vivi@intel.com
(cherry picked from commit acf58d4e965d40fc014252292b0911b4c9fe6697)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_lspcon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
index 5abef482eacf..beb9baaf2f2e 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c
@@ -210,8 +210,8 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port)
 	struct drm_device *dev = intel_dig_port->base.base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
-	if (!IS_GEN9(dev_priv)) {
-		DRM_ERROR("LSPCON is supported on GEN9 only\n");
+	if (!HAS_LSPCON(dev_priv)) {
+		DRM_ERROR("LSPCON is not supported on this platform\n");
 		return false;
 	}
 

From 47f078339be902e97d0ad828ca1d614a5a95334b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 16 Aug 2017 18:21:40 +0100
Subject: [PATCH 29/98] Revert "staging: fsl-mc: be consistent when checking
 strcmp() return"

The previous fix removed the equal to zero comparisons by the strcmps and
now the function always returns true. Revert this change to restore the
original correctly functioning code.

Detected by CoverityScan, CID#1452267 ("Constant expression result")

This reverts commit b93ad9a067e1515af42da7d56bc61f1a25075f94.

Fixes: b93ad9a067e1 ("staging: fsl-mc: be consistent when checking strcmp() return")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/fsl-mc/bus/fsl-mc-allocator.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c b/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c
index b37a6f48225f..8ea3920400a0 100644
--- a/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c
+++ b/drivers/staging/fsl-mc/bus/fsl-mc-allocator.c
@@ -16,9 +16,9 @@
 
 static bool __must_check fsl_mc_is_allocatable(const char *obj_type)
 {
-	return strcmp(obj_type, "dpbp") ||
-	       strcmp(obj_type, "dpmcp") ||
-	       strcmp(obj_type, "dpcon");
+	return strcmp(obj_type, "dpbp") == 0 ||
+	       strcmp(obj_type, "dpmcp") == 0 ||
+	       strcmp(obj_type, "dpcon") == 0;
 }
 
 /**

From 733f6563979d96dec180c350abb8ac67cc0367ba Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Tue, 15 Aug 2017 17:34:44 +0300
Subject: [PATCH 30/98] i2c: designware: Remove needless
 pm_runtime_put_noidle() call

I guess pm_runtime_put_noidle() call in i2c_dw_probe_slave() was copied
by accident from similar master mode adapter registration code. It is
unbalanced due missing pm_runtime_get_noresume() but harmless since it
doesn't decrease dev->power.usage_count below zero.

In theory we can hit similar needless runtime suspend/resume cycle
during slave mode adapter registration that was happening when
registering the master mode adapter. See commit cd998ded5c12 ("i2c:
designware: Prevent runtime suspend during adapter registration").

However, since we are slave, we can consider it as a wrong configuration
if we have other slaves attached under this adapter and can omit the
pm_runtime_get_noresume()/pm_runtime_put_noidle() calls for simplicity.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-slave.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 4b62a3872763..25fa33927124 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -382,7 +382,6 @@ int i2c_dw_probe_slave(struct dw_i2c_dev *dev)
 	ret = i2c_add_numbered_adapter(adap);
 	if (ret)
 		dev_err(dev->dev, "failure adding adapter: %d\n", ret);
-	pm_runtime_put_noidle(dev->dev);
 
 	return ret;
 }

From 2a86cdd2e7d3c75580f41f89f9b9211e225573cc Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Tue, 15 Aug 2017 17:34:45 +0300
Subject: [PATCH 31/98] i2c: designware: Fix runtime PM for I2C slave mode

I2C slave controller must be powered and active all the time when I2C
slave backend is registered in order to let master address and
communicate with us.

Now if the controller is runtime PM capable it will be suspended after
probe and cannot ever respond to the master or generate interrupts.

Fix this by resuming the controller when I2C slave backend is registered
and let it suspend after unregistering.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-slave.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 25fa33927124..78d8fb73927d 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -177,6 +177,8 @@ static int i2c_dw_reg_slave(struct i2c_client *slave)
 		return -EBUSY;
 	if (slave->flags & I2C_CLIENT_TEN)
 		return -EAFNOSUPPORT;
+	pm_runtime_get_sync(dev->dev);
+
 	/*
 	 * Set slave address in the IC_SAR register,
 	 * the address to which the DW_apb_i2c responds.
@@ -205,6 +207,7 @@ static int i2c_dw_unreg_slave(struct i2c_client *slave)
 	dev->disable_int(dev);
 	dev->disable(dev);
 	dev->slave = NULL;
+	pm_runtime_put(dev->dev);
 
 	return 0;
 }

From 1a92a80ad386a1a6e3b36d576d52a1a456394b70 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 24 Jul 2017 14:28:00 +1000
Subject: [PATCH 32/98] powerpc/mm: Ensure cpumask update is ordered

There is no guarantee that the various isync's involved with
the context switch will order the update of the CPU mask with
the first TLB entry for the new context being loaded by the HW.

Be safe here and add a memory barrier to order any subsequent
load/store which may bring entries into the TLB.

The corresponding barrier on the other side already exists as
pte updates use pte_xchg() which uses __cmpxchg_u64 which has
a sync after the atomic operation.

Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Add comments in the code]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mmu_context.h      | 18 ++++++++++++++++++
 arch/powerpc/include/asm/pgtable-be-types.h |  1 +
 arch/powerpc/include/asm/pgtable-types.h    |  1 +
 3 files changed, 20 insertions(+)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 0c76675394c5..35bec1c5bd5a 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -90,6 +90,24 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
 	/* Mark this context has been used on the new CPU */
 	if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
 		cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+
+		/*
+		 * This full barrier orders the store to the cpumask above vs
+		 * a subsequent operation which allows this CPU to begin loading
+		 * translations for next.
+		 *
+		 * When using the radix MMU that operation is the load of the
+		 * MMU context id, which is then moved to SPRN_PID.
+		 *
+		 * For the hash MMU it is either the first load from slb_cache
+		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
+		 * copy_mm_to_paca().
+		 *
+		 * On the read side the barrier is in pte_xchg(), which orders
+		 * the store to the PTE vs the load of mm_cpumask.
+		 */
+		smp_mb();
+
 		new_on_cpu = true;
 	}
 
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index 9c0f5db5cf46..67e7e3d990f4 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
 	unsigned long *p = (unsigned long *)ptep;
 	__be64 prev;
 
+	/* See comment in switch_mm_irqs_off() */
 	prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old),
 					     (__force unsigned long)pte_raw(new));
 
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 8bd3b13fe2fb..369a164b545c 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
 {
 	unsigned long *p = (unsigned long *)ptep;
 
+	/* See comment in switch_mm_irqs_off() */
 	return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new));
 }
 #endif

From f299aec6ebd747298e35934cff7709c6b119ca52 Mon Sep 17 00:00:00 2001
From: Charles Milette <charlesmilette@gmail.com>
Date: Fri, 18 Aug 2017 16:30:34 -0400
Subject: [PATCH 33/98] staging: rtl8188eu: add RNX-N150NUB support

Add support for USB Device Rosewill RNX-N150NUB.
VendorID: 0x0bda, ProductID: 0xffef

Signed-off-by: Charles Milette <charles.milette@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
index d283341cfe43..56cd4e5e51b2 100644
--- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c
+++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
@@ -45,6 +45,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = {
 	{USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */
 	{USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */
 	{USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */
+	{USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */
 	{}	/* Terminating entry */
 };
 

From 1d2226e45040ed4aee95b633cbd64702bf7fc2a1 Mon Sep 17 00:00:00 2001
From: KT Liao <kt.liao@emc.com.tw>
Date: Fri, 18 Aug 2017 16:58:15 -0700
Subject: [PATCH 34/98] Input: elan_i2c - add ELAN0602 ACPI ID to support
 Lenovo Yoga310

Add ELAN0602 to the list of known ACPI IDs to enable support for ELAN
touchpads found in Lenovo Yoga310.

Signed-off-by: KT Liao <kt.liao@emc.com.tw>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 714cf7f9b138..cfbc8ba4c96c 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1247,6 +1247,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0000", 0 },
 	{ "ELAN0100", 0 },
 	{ "ELAN0600", 0 },
+	{ "ELAN0602", 0 },
 	{ "ELAN0605", 0 },
 	{ "ELAN0608", 0 },
 	{ "ELAN0605", 0 },

From ec667683c532c93fb41e100e5d61a518971060e2 Mon Sep 17 00:00:00 2001
From: Aaron Ma <aaron.ma@canonical.com>
Date: Fri, 18 Aug 2017 12:17:21 -0700
Subject: [PATCH 35/98] Input: trackpoint - add new trackpoint firmware ID

Synaptics add new TP firmware ID: 0x2 and 0x3, for now both lower 2 bits
are indicated as TP. Change the constant to bitwise values.

This makes trackpoint to be recognized on Lenovo Carbon X1 Gen5 instead
of it being identified as "PS/2 Generic Mouse".

Signed-off-by: Aaron Ma <aaron.ma@canonical.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/trackpoint.c | 3 ++-
 drivers/input/mouse/trackpoint.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c
index 20b5b21c1bba..0871010f18d5 100644
--- a/drivers/input/mouse/trackpoint.c
+++ b/drivers/input/mouse/trackpoint.c
@@ -265,7 +265,8 @@ static int trackpoint_start_protocol(struct psmouse *psmouse, unsigned char *fir
 	if (ps2_command(&psmouse->ps2dev, param, MAKE_PS2_CMD(0, 2, TP_READ_ID)))
 		return -1;
 
-	if (param[0] != TP_MAGIC_IDENT)
+	/* add new TP ID. */
+	if (!(param[0] & TP_MAGIC_IDENT))
 		return -1;
 
 	if (firmware_id)
diff --git a/drivers/input/mouse/trackpoint.h b/drivers/input/mouse/trackpoint.h
index 5617ed3a7d7a..88055755f82e 100644
--- a/drivers/input/mouse/trackpoint.h
+++ b/drivers/input/mouse/trackpoint.h
@@ -21,8 +21,9 @@
 #define TP_COMMAND		0xE2	/* Commands start with this */
 
 #define TP_READ_ID		0xE1	/* Sent for device identification */
-#define TP_MAGIC_IDENT		0x01	/* Sent after a TP_READ_ID followed */
+#define TP_MAGIC_IDENT		0x03	/* Sent after a TP_READ_ID followed */
 					/* by the firmware ID */
+					/* Firmware ID includes 0x1, 0x2, 0x3 */
 
 
 /*

From 0c264af7be2013266c5b4c644f3f366399ee490a Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sun, 20 Aug 2017 15:54:26 +0900
Subject: [PATCH 36/98] ALSA: firewire: fix NULL pointer dereference when
 releasing uninitialized data of iso-resource

When calling 'iso_resource_free()' for uninitialized data, this function
causes NULL pointer dereference due to its 'unit' member. This occurs when
unplugging audio and music units on IEEE 1394 bus at failure of card
registration.

This commit fixes the bug. The bug exists since kernel v4.5.

Fixes: 324540c4e05c ('ALSA: fireface: postpone sound card registration') at v4.12
Fixes: 8865a31e0fd8 ('ALSA: firewire-motu: postpone sound card registration') at v4.12
Fixes: b610386c8afb ('ALSA: firewire-tascam: deleyed registration of sound card') at v4.7
Fixes: 86c8dd7f4da3 ('ALSA: firewire-digi00x: delayed registration of sound card') at v4.7
Fixes: 6c29230e2a5f ('ALSA: oxfw: delayed registration of sound card') at v4.7
Fixes: 7d3c1d5901aa ('ALSA: fireworks: delayed registration of sound card') at v4.7
Fixes: 04a2c73c97eb ('ALSA: bebob: delayed registration of sound card') at v4.7
Fixes: b59fb1900b4f ('ALSA: dice: postpone card registration') at v4.5
Cc: <stable@vger.kernel.org> # v4.5+
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/iso-resources.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sound/firewire/iso-resources.c b/sound/firewire/iso-resources.c
index f0e4d502d604..066b5df666f4 100644
--- a/sound/firewire/iso-resources.c
+++ b/sound/firewire/iso-resources.c
@@ -210,9 +210,14 @@ EXPORT_SYMBOL(fw_iso_resources_update);
  */
 void fw_iso_resources_free(struct fw_iso_resources *r)
 {
-	struct fw_card *card = fw_parent_device(r->unit)->card;
+	struct fw_card *card;
 	int bandwidth, channel;
 
+	/* Not initialized. */
+	if (r->unit == NULL)
+		return;
+	card = fw_parent_device(r->unit)->card;
+
 	mutex_lock(&r->mutex);
 
 	if (r->allocated) {

From dbd7396b4f24e0c3284fcc05f5def24f52c09884 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sun, 20 Aug 2017 15:55:02 +0900
Subject: [PATCH 37/98] ALSA: firewire-motu: destroy stream data surely at
 failure of card initialization

When failing sound card registration after initializing stream data, this
module leaves allocated data in stream data. This commit fixes the bug.

Fixes: 9b2bb4f2f4a2 ('ALSA: firewire-motu: add stream management functionality')
Cc: <stable@vger.kernel.org> # v4.12+
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/motu/motu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/firewire/motu/motu.c b/sound/firewire/motu/motu.c
index bf779cfeef0d..59a270406353 100644
--- a/sound/firewire/motu/motu.c
+++ b/sound/firewire/motu/motu.c
@@ -128,6 +128,7 @@ static void do_registration(struct work_struct *work)
 	return;
 error:
 	snd_motu_transaction_unregister(motu);
+	snd_motu_stream_destroy_duplex(motu);
 	snd_card_free(motu->card);
 	dev_info(&motu->unit->device,
 		 "Sound card registration failed: %d\n", err);

From f00fd7ae4f409abb7b2e5d099248832548199f0c Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Sun, 30 Jul 2017 16:14:42 -0600
Subject: [PATCH 38/98] PATCH] iio: Fix some documentation warnings

The kerneldoc description for the trig_readonly field of struct iio_dev
lacked a colon, leading to this doc build warning:

  ./include/linux/iio/iio.h:603: warning: No description found for parameter 'trig_readonly'

A similar issue for iio_trigger_set_immutable() in trigger.h yielded:

  ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'indio_dev'
  ./include/linux/iio/trigger.h:151: warning: No description found for parameter 'trig'

Fix the formatting and silence the warnings.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/iio.h     | 2 +-
 include/linux/iio/trigger.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index d68bec297a45..c380daa40c0e 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -535,7 +535,7 @@ struct iio_buffer_setup_ops {
  * @scan_timestamp:	[INTERN] set if any buffers have requested timestamp
  * @scan_index_timestamp:[INTERN] cache of the index to the timestamp
  * @trig:		[INTERN] current device trigger (buffer modes)
- * @trig_readonly	[INTERN] mark the current trigger immutable
+ * @trig_readonly:	[INTERN] mark the current trigger immutable
  * @pollfunc:		[DRIVER] function run on trigger being received
  * @pollfunc_event:	[DRIVER] function run on events trigger being received
  * @channels:		[DRIVER] channel specification structure table
diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index ea08302f2d7b..7142d8d6e470 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -144,8 +144,8 @@ void devm_iio_trigger_unregister(struct device *dev,
 /**
  * iio_trigger_set_immutable() - set an immutable trigger on destination
  *
- * @indio_dev - IIO device structure containing the device
- * @trig - trigger to assign to device
+ * @indio_dev: IIO device structure containing the device
+ * @trig: trigger to assign to device
  *
  **/
 int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *trig);

From fdd0d32eb95f135041236a6885d9006315aa9a1d Mon Sep 17 00:00:00 2001
From: Dragos Bogdan <dragos.bogdan@analog.com>
Date: Fri, 4 Aug 2017 01:37:27 +0300
Subject: [PATCH 39/98] iio: imu: adis16480: Fix acceleration scale factor for
 adis16480

According to the datasheet, the range of the acceleration is [-10 g, + 10 g],
so the scale factor should be 10 instead of 5.

Signed-off-by: Dragos Bogdan <dragos.bogdan@analog.com>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/adis16480.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c
index 8cf84d3488b2..12898424d838 100644
--- a/drivers/iio/imu/adis16480.c
+++ b/drivers/iio/imu/adis16480.c
@@ -696,7 +696,7 @@ static const struct adis16480_chip_info adis16480_chip_info[] = {
 		.gyro_max_val = IIO_RAD_TO_DEGREE(22500),
 		.gyro_max_scale = 450,
 		.accel_max_val = IIO_M_S_2_TO_G(12500),
-		.accel_max_scale = 5,
+		.accel_max_scale = 10,
 	},
 	[ADIS16485] = {
 		.channels = adis16485_channels,

From a359bb2a55f384bb93349ddf9d30b20b37e02e8a Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Thu, 3 Aug 2017 11:22:17 +0200
Subject: [PATCH 40/98] iio: trigger: stm32-timer: fix get trigger mode

Fix reading trigger mode, when other bit-fields are set. SMCR register
value must be masked to read SMS (slave mode selection) only.

Fixes: 9eba381 ("iio: make stm32 trigger driver use
INDIO_HARDWARE_TRIGGERED mode")

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/trigger/stm32-timer-trigger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c
index 14e6eb04bbb0..25ad6abfee22 100644
--- a/drivers/iio/trigger/stm32-timer-trigger.c
+++ b/drivers/iio/trigger/stm32-timer-trigger.c
@@ -485,7 +485,7 @@ static int stm32_get_trigger_mode(struct iio_dev *indio_dev,
 
 	regmap_read(priv->regmap, TIM_SMCR, &smcr);
 
-	return smcr == TIM_SMCR_SMS ? 0 : -EINVAL;
+	return (smcr & TIM_SMCR_SMS) == TIM_SMCR_SMS ? 0 : -EINVAL;
 }
 
 static const struct iio_enum stm32_trigger_mode_enum = {

From f1664eaacec31035450132c46ed2915fd2b2049a Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Sat, 12 Aug 2017 09:09:21 -0700
Subject: [PATCH 41/98] iio: hid-sensor-trigger: Fix the race with user space
 powering up sensors

It has been reported for a while that with iio-sensor-proxy service the
rotation only works after one suspend/resume cycle. This required a wait
in the systemd unit file to avoid race. I found a Yoga 900 where I could
reproduce this.

The problem scenerio is:
- During sensor driver init, enable run time PM and also set a
  auto-suspend for 3 seconds.
	This result in one runtime resume. But there is a check to avoid
a powerup in this sequence, but rpm is active
- User space iio-sensor-proxy tries to power up the sensor. Since rpm is
  active it will simply return. But sensors were not actually
powered up in the prior sequence, so actaully the sensors will not work
- After 3 seconds the auto suspend kicks

If we add a wait in systemd service file to fire iio-sensor-proxy after
3 seconds, then now everything will work as the runtime resume will
actually powerup the sensor as this is a user request.

To avoid this:
- Remove the check to match user requested state, this will cause a
  brief powerup, but if the iio-sensor-proxy starts immediately it will
still work as the sensors are ON.
- Also move the autosuspend delay to place when user requested turn off
  of sensors, like after user finished raw read or buffer disable

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Tested-by: Bastien Nocera <hadess@hadess.net>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index 16ade0a0327b..0e4b379ada45 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -111,8 +111,6 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 	s32 poll_value = 0;
 
 	if (state) {
-		if (!atomic_read(&st->user_requested_state))
-			return 0;
 		if (sensor_hub_device_open(st->hsdev))
 			return -EIO;
 
@@ -161,6 +159,9 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 				       &report_val);
 	}
 
+	pr_debug("HID_SENSOR %s set power_state %d report_state %d\n",
+		 st->pdev->name, state_val, report_val);
+
 	sensor_hub_get_feature(st->hsdev, st->power_state.report_id,
 			       st->power_state.index,
 			       sizeof(state_val), &state_val);
@@ -182,6 +183,7 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 		ret = pm_runtime_get_sync(&st->pdev->dev);
 	else {
 		pm_runtime_mark_last_busy(&st->pdev->dev);
+		pm_runtime_use_autosuspend(&st->pdev->dev);
 		ret = pm_runtime_put_autosuspend(&st->pdev->dev);
 	}
 	if (ret < 0) {
@@ -285,8 +287,6 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
 	/* Default to 3 seconds, but can be changed from sysfs */
 	pm_runtime_set_autosuspend_delay(&attrb->pdev->dev,
 					 3000);
-	pm_runtime_use_autosuspend(&attrb->pdev->dev);
-
 	return ret;
 error_unreg_trigger:
 	iio_trigger_unregister(trig);

From 541ee9b24fca587f510fe1bc58508d5cf40707af Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Wed, 16 Aug 2017 19:02:50 +0200
Subject: [PATCH 42/98] iio: magnetometer: st_magn: fix status register address
 for LSM303AGR

Fixes: 97865fe41322 (iio: st_sensors: verify interrupt event to status)
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/st_magn_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 8e1b0861fbe4..c11f0da86e74 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -358,7 +358,7 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = {
 			.mask_int1 = 0x01,
 			.addr_ihl = 0x63,
 			.mask_ihl = 0x04,
-			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
+			.addr_stat_drdy = 0x67,
 		},
 		.multi_read_bit = false,
 		.bootime = 2,

From 8b35a5f87a73842601cd376e0f5b9b25831390f4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Wed, 16 Aug 2017 19:02:51 +0200
Subject: [PATCH 43/98] iio: magnetometer: st_magn: remove ihl property for
 LSM303AGR

Remove IRQ active low support for LSM303AGR since the sensor does not
support that capability for data-ready line

Fixes: a9fd053b56c6 (iio: st_sensors: support active-low interrupts)
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/st_magn_core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index c11f0da86e74..c38563699984 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -356,8 +356,6 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = {
 		.drdy_irq = {
 			.addr = 0x62,
 			.mask_int1 = 0x01,
-			.addr_ihl = 0x63,
-			.mask_ihl = 0x04,
 			.addr_stat_drdy = 0x67,
 		},
 		.multi_read_bit = false,

From d912366a59c5384df436fd007667d6e574128b44 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 20 Aug 2017 09:29:03 -0700
Subject: [PATCH 44/98] Input: soc_button_array - silence -ENOENT error on Dell
 XPS13 9365

The Dell XPS13 9365 has an INT33D2 ACPI node with no GPIOs, causing
the following error in dmesg:

[    7.172275] soc_button_array: probe of INT33D2:00 failed with error -2

This commit silences this, by returning -ENODEV when there are no GPIOs.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=196679
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/misc/soc_button_array.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index f600f3a7a3c6..23520df7650f 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
@@ -331,7 +331,7 @@ static int soc_button_probe(struct platform_device *pdev)
 	error = gpiod_count(dev, NULL);
 	if (error < 0) {
 		dev_dbg(dev, "no GPIO attached, ignoring...\n");
-		return error;
+		return -ENODEV;
 	}
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);

From a93c11527528c951b8d8db638162128a09e09ec2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 17 Aug 2017 13:55:41 +0300
Subject: [PATCH 45/98] drm/i915/bxt: use NULL for GPIO connection ID

The commit 213e08ad60ba
	("drm/i915/bxt: add bxt dsi gpio element support")
enables GPIO support for Broxton based platforms.

While using that API we might get into troubles in the future, because
we can't rely on label name in the driver since vendor firmware might
provide any GPIO pin there, e.g. "reset", and even mark it in _DSD (in
which case the request will fail).

To avoid inconsistency and potential issues we have two options:
a) generate GPIO ACPI mapping table and supply it via
   acpi_dev_add_driver_gpios(), or
b) just pass NULL as connection ID.

The b) approach is much simpler and would work since the driver relies
on GPIO indices only. Moreover, the _CRS fallback mechanism, when
requesting GPIO, has been made stricter, and supplying non-NULL
connection ID when neither _DSD, nor GPIO ACPI mapping is present, is
making request fail.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101921
Fixes: f10e4bf6632b ("gpio: acpi: Even more tighten up ACPI GPIO lookups")
Cc: Mika Kahola <mika.kahola@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Tested-by: Mika Kahola <mika.kahola@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170817105541.63914-1-andriy.shevchenko@linux.intel.com
(cherry picked from commit cd55a1fbd21a820b7dd85a208b3170aa0b06adfa)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dsi_vbt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c
index 7158c7ce9c09..91c07b0c8db9 100644
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
@@ -306,7 +306,7 @@ static void bxt_exec_gpio(struct drm_i915_private *dev_priv,
 
 	if (!gpio_desc) {
 		gpio_desc = devm_gpiod_get_index(dev_priv->drm.dev,
-						 "panel", gpio_index,
+						 NULL, gpio_index,
 						 value ? GPIOD_OUT_LOW :
 						 GPIOD_OUT_HIGH);
 

From d41a3c2be178783c85e05025265ab58fbb4d4ce1 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 7 Aug 2017 13:19:19 +0100
Subject: [PATCH 46/98] drm/i915: Clear lost context-switch interrupts across
 reset

During a global reset, we disable the irq. As we disable the irq, the
hardware may be raising a GT interrupt that we then ignore, leaving it
pending in the GTIIR. After the reset, we then re-enable the irq,
triggering the pending interrupt. However, that interrupt was for the
stale state from before the reset, and the contents of the CSB buffer
are now invalid.

v2: Add a comment to make it clear that the double clear is purely my
paranoia.

Reported-by: "Dong, Chuanxiao" <chuanxiao.dong@intel.com>
Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Dong, Chuanxiao" <chuanxiao.dong@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170807121919.30165-1-chris@chris-wilson.co.uk
Link: https://patchwork.freedesktop.org/patch/msgid/20170818090509.5363-1-chris@chris-wilson.co.uk
Reviewed-by: Michel Thierry <michel.thierry@intel.com>
(cherry picked from commit 64f09f00caf0a7cb40a8c0b85789bacba0f51d9e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7404cf2aac28..2afa4daa88e8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1221,6 +1221,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	return ret;
 }
 
+static u8 gtiir[] = {
+	[RCS] = 0,
+	[BCS] = 0,
+	[VCS] = 1,
+	[VCS2] = 1,
+	[VECS] = 3,
+};
+
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -1245,9 +1253,22 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 
 	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
 
-	/* After a GPU reset, we may have requests to replay */
+	GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+	/*
+	 * Clear any pending interrupt state.
+	 *
+	 * We do it twice out of paranoia that some of the IIR are double
+	 * buffered, and if we only reset it once there may still be
+	 * an interrupt pending.
+	 */
+	I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
+	I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+		   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
 	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 
+	/* After a GPU reset, we may have requests to replay */
 	submit = false;
 	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
 		if (!port_isset(&port[n]))

From d83c2dbaa90a9bd6346e234d9802080a9c7b2fea Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Fri, 18 Aug 2017 09:16:08 +0800
Subject: [PATCH 47/98] mmc: block: prevent propagating R1_OUT_OF_RANGE for
 open-ending mode

We to some extent should tolerate R1_OUT_OF_RANGE for open-ending
mode as it is expected behaviour and most of the backup partition
tables should be located near some of the last blocks which will
always make open-ending read exceed the capacity of cards.

Fixes: 9820a5b11101 ("mmc: core: for data errors, take response of stop cmd into account")
Fixes: a04e6bae9e6f ("mmc: core: check also R1 response for stop commands")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 49 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index f1bbfd389367..80d1ec693d2d 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1371,12 +1371,46 @@ static inline void mmc_apply_rel_rw(struct mmc_blk_request *brq,
 	 R1_CC_ERROR |		/* Card controller error */		\
 	 R1_ERROR)		/* General/unknown error */
 
-static bool mmc_blk_has_cmd_err(struct mmc_command *cmd)
+static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq)
 {
-	if (!cmd->error && cmd->resp[0] & CMD_ERRORS)
-		cmd->error = -EIO;
+	u32 val;
 
-	return cmd->error;
+	/*
+	 * Per the SD specification(physical layer version 4.10)[1],
+	 * section 4.3.3, it explicitly states that "When the last
+	 * block of user area is read using CMD18, the host should
+	 * ignore OUT_OF_RANGE error that may occur even the sequence
+	 * is correct". And JESD84-B51 for eMMC also has a similar
+	 * statement on section 6.8.3.
+	 *
+	 * Multiple block read/write could be done by either predefined
+	 * method, namely CMD23, or open-ending mode. For open-ending mode,
+	 * we should ignore the OUT_OF_RANGE error as it's normal behaviour.
+	 *
+	 * However the spec[1] doesn't tell us whether we should also
+	 * ignore that for predefined method. But per the spec[1], section
+	 * 4.15 Set Block Count Command, it says"If illegal block count
+	 * is set, out of range error will be indicated during read/write
+	 * operation (For example, data transfer is stopped at user area
+	 * boundary)." In another word, we could expect a out of range error
+	 * in the response for the following CMD18/25. And if argument of
+	 * CMD23 + the argument of CMD18/25 exceed the max number of blocks,
+	 * we could also expect to get a -ETIMEDOUT or any error number from
+	 * the host drivers due to missing data response(for write)/data(for
+	 * read), as the cards will stop the data transfer by itself per the
+	 * spec. So we only need to check R1_OUT_OF_RANGE for open-ending mode.
+	 */
+
+	if (!brq->stop.error) {
+		bool oor_with_open_end;
+		/* If there is no error yet, check R1 response */
+
+		val = brq->stop.resp[0] & CMD_ERRORS;
+		oor_with_open_end = val & R1_OUT_OF_RANGE && !brq->mrq.sbc;
+
+		if (val && !oor_with_open_end)
+			brq->stop.error = -EIO;
+	}
 }
 
 static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card,
@@ -1400,8 +1434,11 @@ static enum mmc_blk_status mmc_blk_err_check(struct mmc_card *card,
 	 * stop.error indicates a problem with the stop command.  Data
 	 * may have been transferred, or may still be transferring.
 	 */
-	if (brq->sbc.error || brq->cmd.error || mmc_blk_has_cmd_err(&brq->stop) ||
-	    brq->data.error) {
+
+	mmc_blk_eval_resp_error(brq);
+
+	if (brq->sbc.error || brq->cmd.error ||
+	    brq->stop.error || brq->data.error) {
 		switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) {
 		case ERR_RETRY:
 			return MMC_BLK_RETRY;

From 4a4eefcd0e49f9f339933324c1bde431186a0a7d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 3 Aug 2017 13:05:11 +0200
Subject: [PATCH 48/98] KVM: s390: sthyi: fix sthyi inline assembly

The sthyi inline assembly misses register r3 within the clobber
list. The sthyi instruction will always write a return code to
register "R2+1", which in this case would be r3. Due to that we may
have register corruption and see host crashes or data corruption
depending on how gcc decided to allocate and use registers during
compile time.

Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation")
Cc: <stable@vger.kernel.org> # 4.8+
Reviewed-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/sthyi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index 926b5244263e..2773a2f6a5c4 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -394,7 +394,7 @@ static int sthyi(u64 vaddr)
 		"srl     %[cc],28\n"
 		: [cc] "=d" (cc)
 		: [code] "d" (code), [addr] "a" (addr)
-		: "memory", "cc");
+		: "3", "memory", "cc");
 	return cc;
 }
 

From 857b8de96795646c5891cf44ae6fb19b9ff74bf9 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 3 Aug 2017 14:27:30 +0200
Subject: [PATCH 49/98] KVM: s390: sthyi: fix specification exception detection

sthyi should only generate a specification exception if the function
code is zero and the response buffer is not on a 4k boundary.

The current code would also test for unknown function codes if the
response buffer, that is currently only defined for function code 0,
is not on a 4k boundary and incorrectly inject a specification
exception instead of returning with condition code 3 and return code 4
(unsupported function code).

Fix this by moving the boundary check.

Fixes: 95ca2cb57985 ("KVM: s390: Add sthyi emulation")
Cc: <stable@vger.kernel.org> # 4.8+
Reviewed-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/sthyi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index 2773a2f6a5c4..a2e5c24f47a7 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -425,7 +425,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
 	trace_kvm_s390_handle_sthyi(vcpu, code, addr);
 
-	if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK)
+	if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	if (code & 0xffff) {
@@ -433,6 +433,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
+	if (addr & ~PAGE_MASK)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
 	/*
 	 * If the page has not yet been faulted in, we want to do that
 	 * now and not after all the expensive calculations.

From deecd4d71b12626db48544faa66bb897e2cafd07 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 27 Jul 2017 15:56:55 -0500
Subject: [PATCH 50/98] objtool: Fix '-mtune=atom' decoding support in objtool
 2.0

With '-mtune=atom', which is enabled with CONFIG_MATOM=y, GCC uses some
unusual instructions for setting up the stack.

Instead of:

  mov %rsp, %rbp

it does:

  lea (%rsp), %rbp

And instead of:

  add imm, %rsp

it does:

  lea disp(%rsp), %rsp

Add support for these instructions to the objtool decoder.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: baa41469a7b9 ("objtool: Implement stack validation 2.0")
Link: http://lkml.kernel.org/r/4ea1db896e821226efe1f8e09f270771bde47e65.1501188854.git.jpoimboe@redhat.com
[ This is a cherry-picked version of upcoming commit 5b8de48e82ba. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/arch/x86/decode.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index a36c2eba64e7..4559a21a8de2 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -271,7 +271,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	case 0x8d:
 		if (rex == 0x48 && modrm == 0x65) {
 
-			/* lea -disp(%rbp), %rsp */
+			/* lea disp(%rbp), %rsp */
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_ADD;
 			op->src.reg = CFI_BP;
@@ -281,6 +281,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 			break;
 		}
 
+		if (rex == 0x48 && (modrm == 0xa4 || modrm == 0x64) &&
+		    sib == 0x24) {
+
+			/* lea disp(%rsp), %rsp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_ADD;
+			op->src.reg = CFI_SP;
+			op->src.offset = insn.displacement.value;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_SP;
+			break;
+		}
+
+		if (rex == 0x48 && modrm == 0x2c && sib == 0x24) {
+
+			/* lea (%rsp), %rbp */
+			*type = INSN_STACK;
+			op->src.type = OP_SRC_REG;
+			op->src.reg = CFI_SP;
+			op->dest.type = OP_DEST_REG;
+			op->dest.reg = CFI_BP;
+			break;
+		}
+
 		if (rex == 0x4c && modrm == 0x54 && sib == 0x24 &&
 		    insn.displacement.value == 8) {
 

From 07b3b5e9ed807a0d2077319b8e43a42e941db818 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Tue, 22 Aug 2017 08:33:53 +0200
Subject: [PATCH 51/98] ALSA: usb-audio: Add delay quirk for H650e/Jabra 550a
 USB headsets

These headsets reports a lot of: cannot set freq 44100 to ep 0x81
and need a small delay between sample rate settings, just like
Zoom R16/24. Add both headsets to the Zoom R16/24 quirk for
a 1 ms delay between control msgs.

Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 6a03f9697039..5d2a63248b1d 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1309,10 +1309,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
 	    && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
 		mdelay(20);
 
-	/* Zoom R16/24 needs a tiny delay here, otherwise requests like
-	 * get/set frequency return as failed despite actually succeeding.
+	/* Zoom R16/24, Logitech H650e, Jabra 550a needs a tiny delay here,
+	 * otherwise requests like get/set frequency return as failed despite
+	 * actually succeeding.
 	 */
-	if (chip->usb_id == USB_ID(0x1686, 0x00dd) &&
+	if ((chip->usb_id == USB_ID(0x1686, 0x00dd) ||
+	     chip->usb_id == USB_ID(0x046d, 0x0a46) ||
+	     chip->usb_id == USB_ID(0x0b0e, 0x0349)) &&
 	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
 		mdelay(1);
 }

From fe4600a548f2763dec91b3b27a1245c370ceee2a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 19 Aug 2017 13:05:58 +0100
Subject: [PATCH 52/98] drm: Release driver tracking before making the object
 available again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is the same bug as we fixed in commit f6cd7daecff5 ("drm: Release
driver references to handle before making it available again"), but now
the exposure is via the PRIME lookup tables. If we remove the
object/handle from the PRIME lut, then a new request for the same
object/fd will generate a new handle, thus for a short window that
object is known to userspace by two different handles. Fix this by
releasing the driver tracking before PRIME.

Fixes: 0ff926c7d4f0 ("drm/prime: add exported buffers to current fprivs
imported buffer list (v2)")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Thierry Reding <treding@nvidia.com>
Cc: stable@vger.kernel.org
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20170819120558.6465-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/drm_gem.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 8dc11064253d..cdaac37907b1 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -255,13 +255,13 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
 	struct drm_gem_object *obj = ptr;
 	struct drm_device *dev = obj->dev;
 
+	if (dev->driver->gem_close_object)
+		dev->driver->gem_close_object(obj, file_priv);
+
 	if (drm_core_check_feature(dev, DRIVER_PRIME))
 		drm_gem_remove_prime_handles(obj, file_priv);
 	drm_vma_node_revoke(&obj->vma_node, file_priv);
 
-	if (dev->driver->gem_close_object)
-		dev->driver->gem_close_object(obj, file_priv);
-
 	drm_gem_object_handle_put_unlocked(obj);
 
 	return 0;

From 88c54cdf61f508ebcf8da2d819f5dfc03e954d1d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 22 Aug 2017 08:15:13 +0200
Subject: [PATCH 53/98] ALSA: core: Fix unexpected error at replacing user TLV

When user tries to replace the user-defined control TLV, the kernel
checks the change of its content via memcmp().  The problem is that
the kernel passes the return value from memcmp() as is.  memcmp()
gives a non-zero negative value depending on the comparison result,
and this shall be recognized as an error code.

The patch covers that corner-case, return 1 properly for the changed
TLV.

Fixes: 8aa9b586e420 ("[ALSA] Control API - more robust TLV implementation")
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/control.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/core/control.c b/sound/core/control.c
index 3c6be1452e35..4525e127afd9 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -1137,7 +1137,7 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol,
 		mutex_lock(&ue->card->user_ctl_lock);
 		change = ue->tlv_data_size != size;
 		if (!change)
-			change = memcmp(ue->tlv_data, new_data, size);
+			change = memcmp(ue->tlv_data, new_data, size) != 0;
 		kfree(ue->tlv_data);
 		ue->tlv_data = new_data;
 		ue->tlv_data_size = size;

From b2a6d1b999a4c13e5997bb864694e77172d45250 Mon Sep 17 00:00:00 2001
From: Martijn Coenen <maco@android.com>
Date: Fri, 28 Jul 2017 13:56:08 +0200
Subject: [PATCH 54/98] ANDROID: binder: fix proc->tsk check.

Commit c4ea41ba195d ("binder: use group leader instead of open thread")'
was incomplete and didn't update a check in binder_mmap(), causing all
mmap() calls into the binder driver to fail.

Signed-off-by: Martijn Coenen <maco@android.com>
Tested-by: John Stultz <john.stultz@linaro.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index f7665c31feca..831cdd7d197d 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3362,7 +3362,7 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
 	const char *failure_string;
 	struct binder_buffer *buffer;
 
-	if (proc->tsk != current)
+	if (proc->tsk != current->group_leader)
 		return -EINVAL;
 
 	if ((vma->vm_end - vma->vm_start) > SZ_4M)

From ffeaf9aaf97b4bdaf114d6df52f800d71918768c Mon Sep 17 00:00:00 2001
From: fred gao <fred.gao@intel.com>
Date: Wed, 16 Aug 2017 15:48:03 +0800
Subject: [PATCH 55/98] drm/i915/gvt: Fix the kernel null pointer error

once error happens in shadow_indirect_ctx function, the variable
wa_ctx->indirect_ctx.obj is not initialized but accessed, so the
kernel null point panic occurs.

Fixes: 894cf7d15634 ("drm/i915/gvt: i915_gem_object_create() returns an error pointer")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: fred gao <fred.gao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 713848c36349..e556a46cd4c2 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2714,7 +2714,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 unmap_src:
 	i915_gem_object_unpin_map(obj);
 put_obj:
-	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
+	i915_gem_object_put(obj);
 	return ret;
 }
 

From bbba6f9d3da357bbabc6fda81e99ff5584500e76 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Aug 2017 09:30:17 +0200
Subject: [PATCH 56/98] ALSA: hda - Add stereo mic quirk for Lenovo G50-70
 (17aa:3978)

Lenovo G50-70 (17aa:3978) with Conexant codec chip requires the
similar workaround for the inverted stereo dmic like other Lenovo
models.

Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1020657
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 8c1289963c80..a81aacf684b2 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -947,6 +947,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
 	SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC),
+	SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC),
 	SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x1c06, 0x2011, "Lemote A1004", CXT_PINCFG_LEMOTE_A1004),

From 42bec214d8bd432be6d32a1acb0a9079ecd4d142 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Thu, 3 Aug 2017 13:09:03 +0530
Subject: [PATCH 57/98] cifs: Fix df output for users with quota limits

The df for a SMB2 share triggers a GetInfo call for
FS_FULL_SIZE_INFORMATION. The values returned are used to populate
struct statfs.

The problem is that none of the information returned by the call
contains the total blocks available on the filesystem. Instead we use
the blocks available to the user ie. quota limitation when filling out
statfs.f_blocks. The information returned does contain Actual free units
on the filesystem and is used to populate statfs.f_bfree. For users with
quota enabled, it can lead to situations where the total free space
reported is more than the total blocks on the system ending up with df
reports like the following

 # df -h /mnt/a
Filesystem         Size  Used Avail Use% Mounted on
//192.168.22.10/a  2.5G -2.3G  2.5G    - /mnt/a

To fix this problem, we instead populate both statfs.f_bfree with the
same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This
is similar to what is done already in the code for cifs and df now
reports the quota information for the user used to mount the share.

 # df --si /mnt/a
Filesystem         Size  Used Avail Use% Mounted on
//192.168.22.10/a  2.7G  101M  2.6G   4% /mnt/a

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Pierguido Lambri <plambri@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Cc: <stable@vger.kernel.org>
---
 fs/cifs/smb2pdu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5fb2fc2d0080..97edb4d376cd 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3219,8 +3219,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
 	kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
 			  le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
 	kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits);
-	kst->f_bfree  = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits);
-	kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
+	kst->f_bfree  = kst->f_bavail =
+			le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
 	return;
 }
 

From d3edede29f74d335f81d95a4588f5f136a9f7dcf Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 23 Aug 2017 14:48:14 +1000
Subject: [PATCH 58/98] cifs: return ENAMETOOLONG for overlong names in
 cifs_open()/cifs_lookup()

Add checking for the path component length and verify it is <= the maximum
that the server advertizes via FileFsAttributeInformation.

With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT
when users to access an overlong path.

To test this, try to cd into a (non-existing) directory on a CIFS share
that has a too long name:
cd /mnt/aaaaaaaaaaaaaaa...

and it now should show a good error message from the shell:
bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long

rh bz 1153996

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Cc: <stable@vger.kernel.org>
---
 fs/cifs/dir.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 56366e984076..569d3fb736be 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -194,15 +194,20 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix)
 }
 
 /*
+ * Don't allow path components longer than the server max.
  * Don't allow the separator character in a path component.
  * The VFS will not allow "/", but "\" is allowed by posix.
  */
 static int
-check_name(struct dentry *direntry)
+check_name(struct dentry *direntry, struct cifs_tcon *tcon)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
 	int i;
 
+	if (unlikely(direntry->d_name.len >
+		     tcon->fsAttrInfo.MaxPathNameComponentLength))
+		return -ENAMETOOLONG;
+
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
 		for (i = 0; i < direntry->d_name.len; i++) {
 			if (direntry->d_name.name[i] == '\\') {
@@ -500,10 +505,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 		return finish_no_open(file, res);
 	}
 
-	rc = check_name(direntry);
-	if (rc)
-		return rc;
-
 	xid = get_xid();
 
 	cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
@@ -516,6 +517,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
 	}
 
 	tcon = tlink_tcon(tlink);
+
+	rc = check_name(direntry, tcon);
+	if (rc)
+		goto out_free_xid;
+
 	server = tcon->ses->server;
 
 	if (server->ops->new_lease_key)
@@ -776,7 +782,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	}
 	pTcon = tlink_tcon(tlink);
 
-	rc = check_name(direntry);
+	rc = check_name(direntry, pTcon);
 	if (rc)
 		goto lookup_out;
 

From ea0ea2bc6dd8923d86a0fa98743dbeed98645486 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Fri, 18 Aug 2017 16:08:13 -0700
Subject: [PATCH 59/98] blk-throttle: cap discard request size

discard request usually is very big and easily use all bandwidth budget
of a cgroup. discard request size doesn't really mean the size of data
written, so it doesn't make sense to account it into bandwidth budget.
Jens pointed out treating the size 0 doesn't make sense too, because
discard request does have cost. But it's not easy to find the actual
cost. This patch simply makes the size one sector.

Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-throttle.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a7285bf2831c..80f5481fe9f6 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -382,6 +382,14 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
 	}								\
 } while (0)
 
+static inline unsigned int throtl_bio_data_size(struct bio *bio)
+{
+	/* assume it's one sector */
+	if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
+		return 512;
+	return bio->bi_iter.bi_size;
+}
+
 static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
 {
 	INIT_LIST_HEAD(&qn->node);
@@ -934,6 +942,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
 	bool rw = bio_data_dir(bio);
 	u64 bytes_allowed, extra_bytes, tmp;
 	unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
+	unsigned int bio_size = throtl_bio_data_size(bio);
 
 	jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
 
@@ -947,14 +956,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
 	do_div(tmp, HZ);
 	bytes_allowed = tmp;
 
-	if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
+	if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
 		if (wait)
 			*wait = 0;
 		return true;
 	}
 
 	/* Calc approx time to dispatch */
-	extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
+	extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
 	jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw));
 
 	if (!jiffy_wait)
@@ -1034,11 +1043,12 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
 static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
 {
 	bool rw = bio_data_dir(bio);
+	unsigned int bio_size = throtl_bio_data_size(bio);
 
 	/* Charge the bio to the group */
-	tg->bytes_disp[rw] += bio->bi_iter.bi_size;
+	tg->bytes_disp[rw] += bio_size;
 	tg->io_disp[rw]++;
-	tg->last_bytes_disp[rw] += bio->bi_iter.bi_size;
+	tg->last_bytes_disp[rw] += bio_size;
 	tg->last_io_disp[rw]++;
 
 	/*

From 1e6ec9ea89d30739b9447c1860fcb07fc29f3aef Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 23 Aug 2017 14:54:59 -0700
Subject: [PATCH 60/98] Revert "loop: support 4k physical blocksize"

There's some stuff still up in the air, let's not get stuck with a
subpar ABI. I'll follow up with something better for 4.14.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c      | 42 ++++++---------------------------------
 drivers/block/loop.h      |  1 -
 include/uapi/linux/loop.h |  3 ---
 3 files changed, 6 insertions(+), 40 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ef8334949b42..f321b96405f5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -221,8 +221,7 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
 }
 
 static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
-		 loff_t logical_blocksize)
+figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
 {
 	loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
 	sector_t x = (sector_t)size;
@@ -234,12 +233,6 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit,
 		lo->lo_offset = offset;
 	if (lo->lo_sizelimit != sizelimit)
 		lo->lo_sizelimit = sizelimit;
-	if (lo->lo_flags & LO_FLAGS_BLOCKSIZE) {
-		lo->lo_logical_blocksize = logical_blocksize;
-		blk_queue_physical_block_size(lo->lo_queue, lo->lo_blocksize);
-		blk_queue_logical_block_size(lo->lo_queue,
-					     lo->lo_logical_blocksize);
-	}
 	set_capacity(lo->lo_disk, x);
 	bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
 	/* let user-space know about the new size */
@@ -820,7 +813,6 @@ static void loop_config_discard(struct loop_device *lo)
 	struct file *file = lo->lo_backing_file;
 	struct inode *inode = file->f_mapping->host;
 	struct request_queue *q = lo->lo_queue;
-	int lo_bits = 9;
 
 	/*
 	 * We use punch hole to reclaim the free space used by the
@@ -840,11 +832,9 @@ static void loop_config_discard(struct loop_device *lo)
 
 	q->limits.discard_granularity = inode->i_sb->s_blocksize;
 	q->limits.discard_alignment = 0;
-	if (lo->lo_flags & LO_FLAGS_BLOCKSIZE)
-		lo_bits = blksize_bits(lo->lo_logical_blocksize);
 
-	blk_queue_max_discard_sectors(q, UINT_MAX >> lo_bits);
-	blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> lo_bits);
+	blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
+	blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
 	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
 }
 
@@ -938,7 +928,6 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 
 	lo->use_dio = false;
 	lo->lo_blocksize = lo_blocksize;
-	lo->lo_logical_blocksize = 512;
 	lo->lo_device = bdev;
 	lo->lo_flags = lo_flags;
 	lo->lo_backing_file = file;
@@ -1104,7 +1093,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	int err;
 	struct loop_func_table *xfer;
 	kuid_t uid = current_uid();
-	int lo_flags = lo->lo_flags;
 
 	if (lo->lo_encrypt_key_size &&
 	    !uid_eq(lo->lo_key_owner, uid) &&
@@ -1137,26 +1125,9 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	if (err)
 		goto exit;
 
-	if (info->lo_flags & LO_FLAGS_BLOCKSIZE) {
-		if (!(lo->lo_flags & LO_FLAGS_BLOCKSIZE))
-			lo->lo_logical_blocksize = 512;
-		lo->lo_flags |= LO_FLAGS_BLOCKSIZE;
-		if (LO_INFO_BLOCKSIZE(info) != 512 &&
-		    LO_INFO_BLOCKSIZE(info) != 1024 &&
-		    LO_INFO_BLOCKSIZE(info) != 2048 &&
-		    LO_INFO_BLOCKSIZE(info) != 4096)
-			return -EINVAL;
-		if (LO_INFO_BLOCKSIZE(info) > lo->lo_blocksize)
-			return -EINVAL;
-	}
-
 	if (lo->lo_offset != info->lo_offset ||
-	    lo->lo_sizelimit != info->lo_sizelimit ||
-	    lo->lo_flags != lo_flags ||
-	    ((lo->lo_flags & LO_FLAGS_BLOCKSIZE) &&
-	     lo->lo_logical_blocksize != LO_INFO_BLOCKSIZE(info))) {
-		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit,
-				     LO_INFO_BLOCKSIZE(info))) {
+	    lo->lo_sizelimit != info->lo_sizelimit) {
+		if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
 			err = -EFBIG;
 			goto exit;
 		}
@@ -1348,8 +1319,7 @@ static int loop_set_capacity(struct loop_device *lo)
 	if (unlikely(lo->lo_state != Lo_bound))
 		return -ENXIO;
 
-	return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit,
-				lo->lo_logical_blocksize);
+	return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
 }
 
 static int loop_set_dio(struct loop_device *lo, unsigned long arg)
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 2c096b9a17b8..fecd3f97ef8c 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -49,7 +49,6 @@ struct loop_device {
 	struct file *	lo_backing_file;
 	struct block_device *lo_device;
 	unsigned	lo_blocksize;
-	unsigned	lo_logical_blocksize;
 	void		*key_data; 
 
 	gfp_t		old_gfp_mask;
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index a3960f98679c..c8125ec1f4f2 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -22,7 +22,6 @@ enum {
 	LO_FLAGS_AUTOCLEAR	= 4,
 	LO_FLAGS_PARTSCAN	= 8,
 	LO_FLAGS_DIRECT_IO	= 16,
-	LO_FLAGS_BLOCKSIZE	= 32,
 };
 
 #include <asm/posix_types.h>	/* for __kernel_old_dev_t */
@@ -60,8 +59,6 @@ struct loop_info64 {
 	__u64		   lo_init[2];
 };
 
-#define LO_INFO_BLOCKSIZE(l) (l)->lo_init[0]
-
 /*
  * Loop filter types
  */

From 2fe59f507a65dbd734b990a11ebc7488f6f87a24 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 22 Aug 2017 18:43:48 +1000
Subject: [PATCH 61/98] timers: Fix excessive granularity of new timers after a
 nohz idle

When a timer base is idle, it is forwarded when a new timer is added
to ensure that granularity does not become excessive. When not idle,
the timer tick is expected to increment the base.

However there are several problems:

- If an existing timer is modified, the base is forwarded only after
  the index is calculated.

- The base is not forwarded by add_timer_on.

- There is a window after a timer is restarted from a nohz idle, after
  it is marked not-idle and before the timer tick on this CPU, where a
  timer may be added but the ancient base does not get forwarded.

These result in excessive granularity (a 1 jiffy timeout can blow out
to 100s of jiffies), which cause the rcu lockup detector to trigger,
among other things.

Fix this by keeping track of whether the timer base has been idle
since it was last run or forwarded, and if so then forward it before
adding a new timer.

There is still a case where mod_timer optimises the case of a pending
timer mod with the same expiry time, where the timer can see excessive
granularity relative to the new, shorter interval. A comment is added,
but it's not changed because it is an important fastpath for
networking.

This has been tested and found to fix the RCU softlockup messages.

Testing was also done with tracing to measure requested versus
achieved wakeup latencies for all non-deferrable timers in an idle
system (with no lockup watchdogs running). Wakeup latency relative to
absolute latency is calculated (note this suffers from round-up skew
at low absolute times) and analysed:

             max     avg      std
upstream   506.0    1.20     4.68
patched      2.0    1.08     0.15

The bug was noticed due to the lockup detector Kconfig changes
dropping it out of people's .configs and resulting in larger base
clk skew When the lockup detectors are enabled, no CPU can go idle for
longer than 4 seconds, which limits the granularity errors.
Sub-optimal timer behaviour is observable on a smaller scale in that
case:

	     max     avg      std
upstream     9.0    1.05     0.19
patched      2.0    1.04     0.11

Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: David Miller <davem@davemloft.net>
Cc: dzickus@redhat.com
Cc: sfr@canb.auug.org.au
Cc: mpe@ellerman.id.au
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: linuxarm@huawei.com
Cc: abdhalee@linux.vnet.ibm.com
Cc: John Stultz <john.stultz@linaro.org>
Cc: akpm@linux-foundation.org
Cc: paulmck@linux.vnet.ibm.com
Cc: torvalds@linux-foundation.org
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com
---
 kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 8f5d1bf18854..f2674a056c26 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -203,6 +203,7 @@ struct timer_base {
 	bool			migration_enabled;
 	bool			nohz_active;
 	bool			is_idle;
+	bool			must_forward_clk;
 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
 	struct hlist_head	vectors[WHEEL_SIZE];
 } ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
 
 static inline void forward_timer_base(struct timer_base *base)
 {
-	unsigned long jnow = READ_ONCE(jiffies);
+	unsigned long jnow;
 
 	/*
-	 * We only forward the base when it's idle and we have a delta between
-	 * base clock and jiffies.
+	 * We only forward the base when we are idle or have just come out of
+	 * idle (must_forward_clk logic), and have a delta between base clock
+	 * and jiffies. In the common case, run_timers will take care of it.
 	 */
-	if (!base->is_idle || (long) (jnow - base->clk) < 2)
+	if (likely(!base->must_forward_clk))
+		return;
+
+	jnow = READ_ONCE(jiffies);
+	base->must_forward_clk = base->is_idle;
+	if ((long)(jnow - base->clk) < 2)
 		return;
 
 	/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 	 * same array bucket then just return:
 	 */
 	if (timer_pending(timer)) {
+		/*
+		 * The downside of this optimization is that it can result in
+		 * larger granularity than you would get from adding a new
+		 * timer with this expiry.
+		 */
 		if (timer->expires == expires)
 			return 1;
 
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		 * dequeue/enqueue dance.
 		 */
 		base = lock_timer_base(timer, &flags);
+		forward_timer_base(base);
 
 		clk = base->clk;
 		idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		}
 	} else {
 		base = lock_timer_base(timer, &flags);
+		forward_timer_base(base);
 	}
 
 	ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 			raw_spin_lock(&base->lock);
 			WRITE_ONCE(timer->flags,
 				   (timer->flags & ~TIMER_BASEMASK) | base->cpu);
+			forward_timer_base(base);
 		}
 	}
 
-	/* Try to forward a stale timer base clock */
-	forward_timer_base(base);
-
 	timer->expires = expires;
 	/*
 	 * If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 		WRITE_ONCE(timer->flags,
 			   (timer->flags & ~TIMER_BASEMASK) | cpu);
 	}
+	forward_timer_base(base);
 
 	debug_activate(timer, timer->expires);
 	internal_add_timer(base, timer);
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 		if (!is_max_delta)
 			expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
 		/*
-		 * If we expect to sleep more than a tick, mark the base idle:
+		 * If we expect to sleep more than a tick, mark the base idle.
+		 * Also the tick is stopped so any added timer must forward
+		 * the base clk itself to keep granularity small. This idle
+		 * logic is only maintained for the BASE_STD base, deferrable
+		 * timers may still see large granularity skew (by design).
 		 */
-		if ((expires - basem) > TICK_NSEC)
+		if ((expires - basem) > TICK_NSEC) {
+			base->must_forward_clk = true;
 			base->is_idle = true;
+		}
 	}
 	raw_spin_unlock(&base->lock);
 
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
 {
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
+	/*
+	 * must_forward_clk must be cleared before running timers so that any
+	 * timer functions that call mod_timer will not try to forward the
+	 * base. idle trcking / clock forwarding logic is only used with
+	 * BASE_STD timers.
+	 *
+	 * The deferrable base does not do idle tracking at all, so we do
+	 * not forward it. This can result in very large variations in
+	 * granularity for deferrable timers, but they can be deferred for
+	 * long periods due to idle.
+	 */
+	base->must_forward_clk = false;
+
 	__run_timers(base);
 	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
 		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));

From bd0fdb191c8523a9126bb14ac1b22cb47698ebf5 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 13 Mar 2017 03:03:49 +1000
Subject: [PATCH 62/98] KVM: PPC: Book3S HV: Use msgsync with hypervisor
 doorbells on POWER9

When msgsnd is used for IPIs to other cores, msgsync must be executed by
the target to order stores performed on the source before its msgsnd
(provided the source executes the appropriate sync).

Fixes: 1704a81ccebc ("KVM: PPC: Book3S HV: Use msgsnd for IPIs to other cores on POWER9")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c52184a8efdf..9c9c983b864f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	/* Hypervisor doorbell - exit only if host IPI flag set */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
 	bne	3f
+BEGIN_FTR_SECTION
+	PPC_MSGSYNC
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	lbz	r0, HSTATE_HOST_IPI(r13)
 	cmpwi	r0, 0
 	beq	4f

From 2c4fb78f78b6e420604ee1b05bdfb5c1d637869f Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 18 Aug 2017 12:10:52 +1000
Subject: [PATCH 63/98] KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug
 causing IPB bit loss

This adds a workaround for a bug in POWER9 DD1 chips where changing
the CPPR (Current Processor Priority Register) can cause bits in the
IPB (Interrupt Pending Buffer) to get lost.  Thankfully it only
happens when manually manipulating CPPR which is quite rare.  When it
does happen it can cause interrupts to be delayed or lost.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_xive_template.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 4636ca6e7d38..150be86b1018 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -16,7 +16,16 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
 	u8 cppr;
 	u16 ack;
 
-	/* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */
+	/*
+	 * DD1 bug workaround: If PIPR is less favored than CPPR
+	 * ignore the interrupt or we might incorrectly lose an IPB
+	 * bit.
+	 */
+	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+		u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
+		if (pipr >= xc->hw_cppr)
+			return;
+	}
 
 	/* Perform the acknowledge OS to register cycle. */
 	ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));

From bb9b52bd51dcb17b965a30167d0812902c1b9927 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 18 Aug 2017 12:10:58 +1000
Subject: [PATCH 64/98] KVM: PPC: Book3S HV: Add missing barriers to XIVE code
 and document them

This adds missing memory barriers to order updates/tests of
the virtual CPPR and MFRR, thus fixing a lost IPI problem.

While at it also document all barriers in this file.

This fixes a bug causing guest IPIs to occasionally get lost.  The
symptom then is hangs or stalls in the guest.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_xive_template.c | 57 ++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 150be86b1018..d1ed2c41b5d2 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -16,6 +16,12 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
 	u8 cppr;
 	u16 ack;
 
+	/*
+	 * Ensure any previous store to CPPR is ordered vs.
+	 * the subsequent loads from PIPR or ACK.
+	 */
+	eieio();
+
 	/*
 	 * DD1 bug workaround: If PIPR is less favored than CPPR
 	 * ignore the interrupt or we might incorrectly lose an IPB
@@ -244,6 +250,11 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
 	/*
 	 * If we found an interrupt, adjust what the guest CPPR should
 	 * be as if we had just fetched that interrupt from HW.
+	 *
+	 * Note: This can only make xc->cppr smaller as the previous
+	 * loop will only exit with hirq != 0 if prio is lower than
+	 * the current xc->cppr. Thus we don't need to re-check xc->mfrr
+	 * for pending IPIs.
 	 */
 	if (hirq)
 		xc->cppr = prio;
@@ -389,6 +400,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
 	old_cppr = xc->cppr;
 	xc->cppr = cppr;
 
+	/*
+	 * Order the above update of xc->cppr with the subsequent
+	 * read of xc->mfrr inside push_pending_to_hw()
+	 */
+	smp_mb();
+
 	/*
 	 * We are masking less, we need to look for pending things
 	 * to deliver and set VP pending bits accordingly to trigger
@@ -429,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
 	 * used to signal MFRR changes is EOId when fetched from
 	 * the queue.
 	 */
-	if (irq == XICS_IPI || irq == 0)
+	if (irq == XICS_IPI || irq == 0) {
+		/*
+		 * This barrier orders the setting of xc->cppr vs.
+		 * subsquent test of xc->mfrr done inside
+		 * scan_interrupts and push_pending_to_hw
+		 */
+		smp_mb();
 		goto bail;
+	}
 
 	/* Find interrupt source */
 	sb = kvmppc_xive_find_source(xive, irq, &src);
 	if (!sb) {
 		pr_devel(" source not found !\n");
 		rc = H_PARAMETER;
+		/* Same as above */
+		smp_mb();
 		goto bail;
 	}
 	state = &sb->irq_state[src];
 	kvmppc_xive_select_irq(state, &hw_num, &xd);
 
 	state->in_eoi = true;
-	mb();
+
+	/*
+	 * This barrier orders both setting of in_eoi above vs,
+	 * subsequent test of guest_priority, and the setting
+	 * of xc->cppr vs. subsquent test of xc->mfrr done inside
+	 * scan_interrupts and push_pending_to_hw
+	 */
+	smp_mb();
 
 again:
 	if (state->guest_priority == MASKED) {
@@ -470,6 +503,14 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
 
 	}
 
+	/*
+	 * This barrier orders the above guest_priority check
+	 * and spin_lock/unlock with clearing in_eoi below.
+	 *
+	 * It also has to be a full mb() as it must ensure
+	 * the MMIOs done in source_eoi() are completed before
+	 * state->in_eoi is visible.
+	 */
 	mb();
 	state->in_eoi = false;
 bail:
@@ -504,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
 	/* Locklessly write over MFRR */
 	xc->mfrr = mfrr;
 
+	/*
+	 * The load of xc->cppr below and the subsequent MMIO store
+	 * to the IPI must happen after the above mfrr update is
+	 * globally visible so that:
+	 *
+	 * - Synchronize with another CPU doing an H_EOI or a H_CPPR
+	 *   updating xc->cppr then reading xc->mfrr.
+	 *
+	 * - The target of the IPI sees the xc->mfrr update
+	 */
+	mb();
+
 	/* Shoot the IPI if most favored than target cppr */
 	if (mfrr < xc->cppr)
 		__x_writeq(0, __x_trig_page(&xc->vp_ipi_data));

From 98cd249cf9d2c7e2322fbf20c454c019e141a28b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 20 Jul 2017 06:58:25 +0200
Subject: [PATCH 65/98] c6x: defconfig: Cleanup from old Kconfig options

Remove old, dead Kconfig options (in order appearing in this commit):
 - EXPERIMENTAL is gone since v3.9;
 - MISC_DEVICES: commit 7c5763b8453a ("drivers: misc: Remove
   MISC_DEVICES config option");

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Mark Salter <msalter@redhat.com>
---
 arch/c6x/configs/dsk6455_defconfig  | 2 --
 arch/c6x/configs/evmc6457_defconfig | 2 --
 arch/c6x/configs/evmc6472_defconfig | 2 --
 arch/c6x/configs/evmc6474_defconfig | 2 --
 arch/c6x/configs/evmc6678_defconfig | 2 --
 5 files changed, 10 deletions(-)

diff --git a/arch/c6x/configs/dsk6455_defconfig b/arch/c6x/configs/dsk6455_defconfig
index 4663487c67a1..d764ea4cce7f 100644
--- a/arch/c6x/configs/dsk6455_defconfig
+++ b/arch/c6x/configs/dsk6455_defconfig
@@ -1,5 +1,4 @@
 CONFIG_SOC_TMS320C6455=y
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_SPARSE_IRQ=y
@@ -25,7 +24,6 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=2
 CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/c6x/configs/evmc6457_defconfig b/arch/c6x/configs/evmc6457_defconfig
index bba40e195ec4..05d0b4a25ab1 100644
--- a/arch/c6x/configs/evmc6457_defconfig
+++ b/arch/c6x/configs/evmc6457_defconfig
@@ -1,5 +1,4 @@
 CONFIG_SOC_TMS320C6457=y
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_SPARSE_IRQ=y
@@ -26,7 +25,6 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=2
 CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/c6x/configs/evmc6472_defconfig b/arch/c6x/configs/evmc6472_defconfig
index 8c46155f6d31..8d81fcf86b0e 100644
--- a/arch/c6x/configs/evmc6472_defconfig
+++ b/arch/c6x/configs/evmc6472_defconfig
@@ -1,5 +1,4 @@
 CONFIG_SOC_TMS320C6472=y
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_SPARSE_IRQ=y
@@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=2
 CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/c6x/configs/evmc6474_defconfig b/arch/c6x/configs/evmc6474_defconfig
index 15533f632313..8156a98f3958 100644
--- a/arch/c6x/configs/evmc6474_defconfig
+++ b/arch/c6x/configs/evmc6474_defconfig
@@ -1,5 +1,4 @@
 CONFIG_SOC_TMS320C6474=y
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_SPARSE_IRQ=y
@@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=2
 CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
diff --git a/arch/c6x/configs/evmc6678_defconfig b/arch/c6x/configs/evmc6678_defconfig
index 5f126d4905b1..c4f433c25b69 100644
--- a/arch/c6x/configs/evmc6678_defconfig
+++ b/arch/c6x/configs/evmc6678_defconfig
@@ -1,5 +1,4 @@
 CONFIG_SOC_TMS320C6678=y
-CONFIG_EXPERIMENTAL=y
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_SPARSE_IRQ=y
@@ -27,7 +26,6 @@ CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=2
 CONFIG_BLK_DEV_RAM_SIZE=17000
-CONFIG_MISC_DEVICES=y
 # CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set

From 636d42117800db1a994726fcf017e3633db832a5 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Jul 2017 16:42:43 -0500
Subject: [PATCH 66/98] c6x: Convert to using %pOF instead of full_name

Now that we have a custom printf format specifier, convert users of
full_name to use %pOF instead. This is preparation to remove storing
of the full path string for each node.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Aurelien Jacquiot <a-jacquiot@ti.com>
Cc: linux-c6x-dev@linux-c6x.org
Signed-off-by: Mark Salter <msalter@redhat.com>
---
 arch/c6x/platforms/megamod-pic.c | 22 +++++++++++-----------
 arch/c6x/platforms/plldata.c     |  4 ++--
 arch/c6x/platforms/timer64.c     |  8 ++++----
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c
index 43afc03e4125..9519fa5f97d0 100644
--- a/arch/c6x/platforms/megamod-pic.c
+++ b/arch/c6x/platforms/megamod-pic.c
@@ -208,14 +208,14 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
 
 	pic = kzalloc(sizeof(struct megamod_pic), GFP_KERNEL);
 	if (!pic) {
-		pr_err("%s: Could not alloc PIC structure.\n", np->full_name);
+		pr_err("%pOF: Could not alloc PIC structure.\n", np);
 		return NULL;
 	}
 
 	pic->irqhost = irq_domain_add_linear(np, NR_COMBINERS * 32,
 					     &megamod_domain_ops, pic);
 	if (!pic->irqhost) {
-		pr_err("%s: Could not alloc host.\n", np->full_name);
+		pr_err("%pOF: Could not alloc host.\n", np);
 		goto error_free;
 	}
 
@@ -225,7 +225,7 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
 
 	pic->regs = of_iomap(np, 0);
 	if (!pic->regs) {
-		pr_err("%s: Could not map registers.\n", np->full_name);
+		pr_err("%pOF: Could not map registers.\n", np);
 		goto error_free;
 	}
 
@@ -253,8 +253,8 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
 
 		irq_data = irq_get_irq_data(irq);
 		if (!irq_data) {
-			pr_err("%s: combiner-%d no irq_data for virq %d!\n",
-			       np->full_name, i, irq);
+			pr_err("%pOF: combiner-%d no irq_data for virq %d!\n",
+			       np, i, irq);
 			continue;
 		}
 
@@ -265,16 +265,16 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
 		 * of the core priority interrupts (4 - 15).
 		 */
 		if (hwirq < 4 || hwirq >= NR_PRIORITY_IRQS) {
-			pr_err("%s: combiner-%d core irq %ld out of range!\n",
-			       np->full_name, i, hwirq);
+			pr_err("%pOF: combiner-%d core irq %ld out of range!\n",
+			       np, i, hwirq);
 			continue;
 		}
 
 		/* record the mapping */
 		mapping[hwirq - 4] = i;
 
-		pr_debug("%s: combiner-%d cascading to hwirq %ld\n",
-			 np->full_name, i, hwirq);
+		pr_debug("%pOF: combiner-%d cascading to hwirq %ld\n",
+			 np, i, hwirq);
 
 		cascade_data[i].pic = pic;
 		cascade_data[i].index = i;
@@ -290,8 +290,8 @@ static struct megamod_pic * __init init_megamod_pic(struct device_node *np)
 	/* Finally, set up the MUX registers */
 	for (i = 0; i < NR_MUX_OUTPUTS; i++) {
 		if (mapping[i] != IRQ_UNMAPPED) {
-			pr_debug("%s: setting mux %d to priority %d\n",
-				 np->full_name, mapping[i], i + 4);
+			pr_debug("%pOF: setting mux %d to priority %d\n",
+				 np, mapping[i], i + 4);
 			set_megamod_mux(pic, mapping[i], i);
 		}
 	}
diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c
index 755359eb6286..e8b6cc6a7b5a 100644
--- a/arch/c6x/platforms/plldata.c
+++ b/arch/c6x/platforms/plldata.c
@@ -436,8 +436,8 @@ void __init c64x_setup_clocks(void)
 
 	err = of_property_read_u32(node, "clock-frequency", &val);
 	if (err || val == 0) {
-		pr_err("%s: no clock-frequency found! Using %dMHz\n",
-		       node->full_name, (int)val / 1000000);
+		pr_err("%pOF: no clock-frequency found! Using %dMHz\n",
+		       node, (int)val / 1000000);
 		val = 25000000;
 	}
 	clkin1.rate = val;
diff --git a/arch/c6x/platforms/timer64.c b/arch/c6x/platforms/timer64.c
index 0bd0452ded80..241a9a607193 100644
--- a/arch/c6x/platforms/timer64.c
+++ b/arch/c6x/platforms/timer64.c
@@ -204,14 +204,14 @@ void __init timer64_init(void)
 
 	timer = of_iomap(np, 0);
 	if (!timer) {
-		pr_debug("%s: Cannot map timer registers.\n", np->full_name);
+		pr_debug("%pOF: Cannot map timer registers.\n", np);
 		goto out;
 	}
-	pr_debug("%s: Timer registers=%p.\n", np->full_name, timer);
+	pr_debug("%pOF: Timer registers=%p.\n", np, timer);
 
 	cd->irq	= irq_of_parse_and_map(np, 0);
 	if (cd->irq == NO_IRQ) {
-		pr_debug("%s: Cannot find interrupt.\n", np->full_name);
+		pr_debug("%pOF: Cannot find interrupt.\n", np);
 		iounmap(timer);
 		goto out;
 	}
@@ -229,7 +229,7 @@ void __init timer64_init(void)
 		dscr_set_devstate(timer64_devstate_id, DSCR_DEVSTATE_ENABLED);
 	}
 
-	pr_debug("%s: Timer irq=%d.\n", np->full_name, cd->irq);
+	pr_debug("%pOF: Timer irq=%d.\n", np, cd->irq);
 
 	clockevents_calc_mult_shift(cd, c6x_core_freq / TIMER_DIVISOR, 5);
 

From 50b4d485528d1dbe0bd249f2073140e3444f4a7b Mon Sep 17 00:00:00 2001
From: Benjamin Block <bblock@linux.vnet.ibm.com>
Date: Thu, 24 Aug 2017 01:57:56 +0200
Subject: [PATCH 67/98] bsg-lib: fix kernel panic resulting from missing
 allocation of reply-buffer

Since we split the scsi_request out of struct request bsg fails to
provide a reply-buffer for the drivers. This was done via the pointer
for sense-data, that is not preallocated anymore.

Failing to allocate/assign it results in illegal dereferences because
LLDs use this pointer unquestioned.

An example panic on s390x, using the zFCP driver, looks like this (I had
debugging on, otherwise NULL-pointer dereferences wouldn't even panic on
s390x):

Unable to handle kernel pointer dereference in virtual kernel address space
Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6403
Fault in home space mode while using kernel ASCE.
AS:0000000001590007 R3:0000000000000024
Oops: 0038 ilc:2 [#1] PREEMPT SMP DEBUG_PAGEALLOC
Modules linked in: <Long List>
CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.12.0-bsg-regression+ #3
Hardware name: IBM 2964 N96 702 (z/VM 6.4.0)
task: 0000000065cb0100 task.stack: 0000000065cb4000
Krnl PSW : 0704e00180000000 000003ff801e4156 (zfcp_fc_ct_els_job_handler+0x16/0x58 [zfcp])
           R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
Krnl GPRS: 0000000000000001 000000005fa9d0d0 000000005fa9d078 0000000000e16866
           000003ff00000290 6b6b6b6b6b6b6b6b 0000000059f78f00 000000000000000f
           00000000593a0958 00000000593a0958 0000000060d88800 000000005ddd4c38
           0000000058b50100 07000000659cba08 000003ff801e8556 00000000659cb9a8
Krnl Code: 000003ff801e4146: e31020500004        lg      %r1,80(%r2)
           000003ff801e414c: 58402040           l       %r4,64(%r2)
          #000003ff801e4150: e35020200004       lg      %r5,32(%r2)
          >000003ff801e4156: 50405004           st      %r4,4(%r5)
           000003ff801e415a: e54c50080000       mvhi    8(%r5),0
           000003ff801e4160: e33010280012       lt      %r3,40(%r1)
           000003ff801e4166: a718fffb           lhi     %r1,-5
           000003ff801e416a: 1803               lr      %r0,%r3
Call Trace:
([<000003ff801e8556>] zfcp_fsf_req_complete+0x726/0x768 [zfcp])
 [<000003ff801ea82a>] zfcp_fsf_reqid_check+0x102/0x180 [zfcp]
 [<000003ff801eb980>] zfcp_qdio_int_resp+0x230/0x278 [zfcp]
 [<00000000009b91b6>] qdio_kick_handler+0x2ae/0x2c8
 [<00000000009b9e3e>] __tiqdio_inbound_processing+0x406/0xc10
 [<00000000001684c2>] tasklet_action+0x15a/0x1d8
 [<0000000000bd28ec>] __do_softirq+0x3ec/0x848
 [<00000000001675a4>] irq_exit+0x74/0xf8
 [<000000000010dd6a>] do_IRQ+0xba/0xf0
 [<0000000000bd19e8>] io_int_handler+0x104/0x2d4
 [<00000000001033b6>] enabled_wait+0xb6/0x188
([<000000000010339e>] enabled_wait+0x9e/0x188)
 [<000000000010396a>] arch_cpu_idle+0x32/0x50
 [<0000000000bd0112>] default_idle_call+0x52/0x68
 [<00000000001cd0fa>] do_idle+0x102/0x188
 [<00000000001cd41e>] cpu_startup_entry+0x3e/0x48
 [<0000000000118c64>] smp_start_secondary+0x11c/0x130
 [<0000000000bd2016>] restart_int_handler+0x62/0x78
 [<0000000000000000>]           (null)
INFO: lockdep is turned off.
Last Breaking-Event-Address:
 [<000003ff801e41d6>] zfcp_fc_ct_job_handler+0x3e/0x48 [zfcp]

Kernel panic - not syncing: Fatal exception in interrupt

This patch moves bsg-lib to allocate and setup struct bsg_job ahead of
time, including the allocation of a buffer for the reply-data.

This means, struct bsg_job is not allocated separately anymore, but as part
of struct request allocation - similar to struct scsi_cmd. Reflect this in
the function names that used to handle creation/destruction of struct
bsg_job.

Reported-by: Steffen Maier <maier@linux.vnet.ibm.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Benjamin Block <bblock@linux.vnet.ibm.com>
Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request")
Cc: <stable@vger.kernel.org> #4.11+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg-lib.c         | 74 ++++++++++++++++++++++++-----------------
 include/linux/blkdev.h  |  1 -
 include/linux/bsg-lib.h |  2 ++
 3 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c4513b23f57a..dd56d7460cb9 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -29,26 +29,25 @@
 #include <scsi/scsi_cmnd.h>
 
 /**
- * bsg_destroy_job - routine to teardown/delete a bsg job
+ * bsg_teardown_job - routine to teardown a bsg job
  * @job: bsg_job that is to be torn down
  */
-static void bsg_destroy_job(struct kref *kref)
+static void bsg_teardown_job(struct kref *kref)
 {
 	struct bsg_job *job = container_of(kref, struct bsg_job, kref);
 	struct request *rq = job->req;
 
-	blk_end_request_all(rq, BLK_STS_OK);
-
 	put_device(job->dev);	/* release reference for the request */
 
 	kfree(job->request_payload.sg_list);
 	kfree(job->reply_payload.sg_list);
-	kfree(job);
+
+	blk_end_request_all(rq, BLK_STS_OK);
 }
 
 void bsg_job_put(struct bsg_job *job)
 {
-	kref_put(&job->kref, bsg_destroy_job);
+	kref_put(&job->kref, bsg_teardown_job);
 }
 EXPORT_SYMBOL_GPL(bsg_job_put);
 
@@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
  */
 static void bsg_softirq_done(struct request *rq)
 {
-	struct bsg_job *job = rq->special;
+	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
 
 	bsg_job_put(job);
 }
@@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
 }
 
 /**
- * bsg_create_job - create the bsg_job structure for the bsg request
+ * bsg_prepare_job - create the bsg_job structure for the bsg request
  * @dev: device that is being sent the bsg request
  * @req: BSG request that needs a job structure
  */
-static int bsg_create_job(struct device *dev, struct request *req)
+static int bsg_prepare_job(struct device *dev, struct request *req)
 {
 	struct request *rsp = req->next_rq;
-	struct request_queue *q = req->q;
 	struct scsi_request *rq = scsi_req(req);
-	struct bsg_job *job;
+	struct bsg_job *job = blk_mq_rq_to_pdu(req);
 	int ret;
 
-	BUG_ON(req->special);
-
-	job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
-	if (!job)
-		return -ENOMEM;
-
-	req->special = job;
-	job->req = req;
-	if (q->bsg_job_size)
-		job->dd_data = (void *)&job[1];
 	job->request = rq->cmd;
 	job->request_len = rq->cmd_len;
-	job->reply = rq->sense;
-	job->reply_len = SCSI_SENSE_BUFFERSIZE;	/* Size of sense buffer
-						 * allocated */
+
 	if (req->bio) {
 		ret = bsg_map_buffer(&job->request_payload, req);
 		if (ret)
@@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q)
 {
 	struct device *dev = q->queuedata;
 	struct request *req;
-	struct bsg_job *job;
 	int ret;
 
 	if (!get_device(dev))
@@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q)
 			break;
 		spin_unlock_irq(q->queue_lock);
 
-		ret = bsg_create_job(dev, req);
+		ret = bsg_prepare_job(dev, req);
 		if (ret) {
 			scsi_req(req)->result = ret;
 			blk_end_request_all(req, BLK_STS_OK);
@@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q)
 			continue;
 		}
 
-		job = req->special;
-		ret = q->bsg_job_fn(job);
+		ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req));
 		spin_lock_irq(q->queue_lock);
 		if (ret)
 			break;
@@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q)
 	spin_lock_irq(q->queue_lock);
 }
 
+static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
+{
+	struct bsg_job *job = blk_mq_rq_to_pdu(req);
+	struct scsi_request *sreq = &job->sreq;
+
+	memset(job, 0, sizeof(*job));
+
+	scsi_req_init(sreq);
+	sreq->sense_len = SCSI_SENSE_BUFFERSIZE;
+	sreq->sense = kzalloc(sreq->sense_len, gfp);
+	if (!sreq->sense)
+		return -ENOMEM;
+
+	job->req = req;
+	job->reply = sreq->sense;
+	job->reply_len = sreq->sense_len;
+	job->dd_data = job + 1;
+
+	return 0;
+}
+
+static void bsg_exit_rq(struct request_queue *q, struct request *req)
+{
+	struct bsg_job *job = blk_mq_rq_to_pdu(req);
+	struct scsi_request *sreq = &job->sreq;
+
+	kfree(sreq->sense);
+}
+
 /**
  * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
  * @dev: device to attach bsg device to
@@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
 	q = blk_alloc_queue(GFP_KERNEL);
 	if (!q)
 		return ERR_PTR(-ENOMEM);
-	q->cmd_size = sizeof(struct scsi_request);
+	q->cmd_size = sizeof(struct bsg_job) + dd_job_size;
+	q->init_rq_fn = bsg_init_rq;
+	q->exit_rq_fn = bsg_exit_rq;
 	q->request_fn = bsg_request_fn;
 
 	ret = blk_init_allocated_queue(q);
@@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
 		goto out_cleanup_queue;
 
 	q->queuedata = dev;
-	q->bsg_job_size = dd_job_size;
 	q->bsg_job_fn = job_fn;
 	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
 	queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 25f6a0cb27d3..2a5d52fa90f5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -568,7 +568,6 @@ struct request_queue {
 
 #if defined(CONFIG_BLK_DEV_BSG)
 	bsg_job_fn		*bsg_job_fn;
-	int			bsg_job_size;
 	struct bsg_class_device bsg_dev;
 #endif
 
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index e34dde2da0ef..637a20cfb237 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -24,6 +24,7 @@
 #define _BLK_BSG_
 
 #include <linux/blkdev.h>
+#include <scsi/scsi_request.h>
 
 struct request;
 struct device;
@@ -37,6 +38,7 @@ struct bsg_buffer {
 };
 
 struct bsg_job {
+	struct scsi_request sreq;
 	struct device *dev;
 	struct request *req;
 

From 9ce76511b67be8fbcdff36b7e1662e3887bb7377 Mon Sep 17 00:00:00 2001
From: Tom Rini <trini@konsulko.com>
Date: Tue, 22 Aug 2017 21:51:46 -0400
Subject: [PATCH 68/98] ASoC: rt5677: Reintroduce I2C device IDs

Not all devices with ACPI and this combination of sound devices will
have the required information provided via ACPI.  Reintroduce the I2C
device ID to restore sound functionality on on the Chromebook 'Samus'
model.

[ More background note:
 the commit a36afb0ab648 ("ASoC: rt5677: Introduce proper table...")
 moved the i2c ID probed via ACPI ("RT5677CE:00") to a proper
 acpi_device_id table.  Although the action itself is correct per se,
 the overseen issue is the reference id->driver_data at
 rt5677_i2c_probe() for retrieving the corresponding chip model for
 the given id.  Since id=NULL is passed for ACPI matching case, we get
 an Oops now.

 We already have queued more fixes for 4.14 and they already address
 the issue, but they are bigger changes that aren't preferable for the
 late 4.13-rc stage.  So, this patch just papers over the bug as a
 once-off quick fix for a particular ACPI matching.  -- tiwai ]

Fixes: a36afb0ab648 ("ASoC: rt5677: Introduce proper table for ACPI enumeration")
Signed-off-by: Tom Rini <trini@konsulko.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/soc/codecs/rt5677.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c
index 36e530a36c82..6f629278d982 100644
--- a/sound/soc/codecs/rt5677.c
+++ b/sound/soc/codecs/rt5677.c
@@ -5021,6 +5021,7 @@ static const struct regmap_config rt5677_regmap = {
 static const struct i2c_device_id rt5677_i2c_id[] = {
 	{ "rt5677", RT5677 },
 	{ "rt5676", RT5676 },
+	{ "RT5677CE:00", RT5677 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, rt5677_i2c_id);

From fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 18 Aug 2017 11:12:19 -0400
Subject: [PATCH 69/98] nfsd: Limit end of page list when decoding NFSv4 WRITE

When processing an NFSv4 WRITE operation, argp->end should never
point past the end of the data in the final page of the page list.
Otherwise, nfsd4_decode_compound can walk into uninitialized memory.

More critical, nfsd4_decode_write is failing to increment argp->pagelen
when it increments argp->pagelist.  This can cause later xdr decoders
to assume more data is available than really is, which can cause server
crashes on malformed requests.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 20fbcab97753..5f940d2a136b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -144,7 +144,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp)
 	argp->p = page_address(argp->pagelist[0]);
 	argp->pagelist++;
 	if (argp->pagelen < PAGE_SIZE) {
-		argp->end = argp->p + (argp->pagelen>>2);
+		argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
 		argp->pagelen = 0;
 	} else {
 		argp->end = argp->p + (PAGE_SIZE>>2);
@@ -1279,9 +1279,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 		argp->pagelen -= pages * PAGE_SIZE;
 		len -= pages * PAGE_SIZE;
 
-		argp->p = (__be32 *)page_address(argp->pagelist[0]);
-		argp->pagelist++;
-		argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
+		next_decode_page(argp);
 	}
 	argp->p += XDR_QUADLEN(len);
 

From eebe53e87f97975ee58a21693e44797608bf679c Mon Sep 17 00:00:00 2001
From: Vadim Lomovtsev <vlomovts@redhat.com>
Date: Mon, 21 Aug 2017 07:23:07 -0400
Subject: [PATCH 70/98] net: sunrpc: svcsock: fix NULL-pointer exception

While running nfs/connectathon tests kernel NULL-pointer exception
has been observed due to races in svcsock.c.

Race is appear when kernel accepts connection by kernel_accept
(which creates new socket) and start queuing ingress packets
to new socket. This happens in ksoftirq context which could run
concurrently on a different core while new socket setup is not done yet.

The fix is to re-order socket user data init sequence and add
write/read barrier calls to be sure that we got proper values
for callback pointers before actually calling them.

Test results: nfs/connectathon reports '0' failed tests for about 200+ iterations.

Crash log:
---<-snip->---
[ 6708.638984] Unable to handle kernel NULL pointer dereference at virtual address 00000000
[ 6708.647093] pgd = ffff0000094e0000
[ 6708.650497] [00000000] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff80003, *pte=0000000000000000
[ 6708.660761] Internal error: Oops: 86000005 [#1] SMP
[ 6708.665630] Modules linked in: nfsv3 nfnetlink_queue nfnetlink_log nfnetlink rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache overlay xt_CONNSECMARK xt_SECMARK xt_conntrack iptable_security ip_tables ah4 xfrm4_mode_transport sctp tun binfmt_misc ext4 jbd2 mbcache loop tcp_diag udp_diag inet_diag rpcrdma ib_isert iscsi_target_mod ib_iser rdma_cm iw_cm libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_ucm ib_uverbs ib_umad ib_cm ib_core nls_koi8_u nls_cp932 ts_kmp nf_conntrack_ipv4 nf_defrag_ipv4 nf_conntrack vfat fat ghash_ce sha2_ce sha1_ce cavium_rng_vf i2c_thunderx sg thunderx_edac i2c_smbus edac_core cavium_rng nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c nicvf nicpf ast i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops
[ 6708.736446]  ttm drm i2c_core thunder_bgx thunder_xcv mdio_thunder mdio_cavium dm_mirror dm_region_hash dm_log dm_mod [last unloaded: stap_3c300909c5b3f46dcacd49aab3334af_87021]
[ 6708.752275] CPU: 84 PID: 0 Comm: swapper/84 Tainted: G        W  OE   4.11.0-4.el7.aarch64 #1
[ 6708.760787] Hardware name: www.cavium.com CRB-2S/CRB-2S, BIOS 0.3 Mar 13 2017
[ 6708.767910] task: ffff810006842e80 task.stack: ffff81000689c000
[ 6708.773822] PC is at 0x0
[ 6708.776739] LR is at svc_data_ready+0x38/0x88 [sunrpc]
[ 6708.781866] pc : [<0000000000000000>] lr : [<ffff0000029d7378>] pstate: 60000145
[ 6708.789248] sp : ffff810ffbad3900
[ 6708.792551] x29: ffff810ffbad3900 x28: ffff000008c73d58
[ 6708.797853] x27: 0000000000000000 x26: ffff81000bbe1e00
[ 6708.803156] x25: 0000000000000020 x24: ffff800f7410bf28
[ 6708.808458] x23: ffff000008c63000 x22: ffff000008c63000
[ 6708.813760] x21: ffff800f7410bf28 x20: ffff81000bbe1e00
[ 6708.819063] x19: ffff810012412400 x18: 00000000d82a9df2
[ 6708.824365] x17: 0000000000000000 x16: 0000000000000000
[ 6708.829667] x15: 0000000000000000 x14: 0000000000000001
[ 6708.834969] x13: 0000000000000000 x12: 722e736f622e676e
[ 6708.840271] x11: 00000000f814dd99 x10: 0000000000000000
[ 6708.845573] x9 : 7374687225000000 x8 : 0000000000000000
[ 6708.850875] x7 : 0000000000000000 x6 : 0000000000000000
[ 6708.856177] x5 : 0000000000000028 x4 : 0000000000000000
[ 6708.861479] x3 : 0000000000000000 x2 : 00000000e5000000
[ 6708.866781] x1 : 0000000000000000 x0 : ffff81000bbe1e00
[ 6708.872084]
[ 6708.873565] Process swapper/84 (pid: 0, stack limit = 0xffff81000689c000)
[ 6708.880341] Stack: (0xffff810ffbad3900 to 0xffff8100068a0000)
[ 6708.886075] Call trace:
[ 6708.888513] Exception stack(0xffff810ffbad3710 to 0xffff810ffbad3840)
[ 6708.894942] 3700:                                   ffff810012412400 0001000000000000
[ 6708.902759] 3720: ffff810ffbad3900 0000000000000000 0000000060000145 ffff800f79300000
[ 6708.910577] 3740: ffff000009274d00 00000000000003ea 0000000000000015 ffff000008c63000
[ 6708.918395] 3760: ffff810ffbad3830 ffff800f79300000 000000000000004d 0000000000000000
[ 6708.926212] 3780: ffff810ffbad3890 ffff0000080f88dc ffff800f79300000 000000000000004d
[ 6708.934030] 37a0: ffff800f7930093c ffff000008c63000 0000000000000000 0000000000000140
[ 6708.941848] 37c0: ffff000008c2c000 0000000000040b00 ffff81000bbe1e00 0000000000000000
[ 6708.949665] 37e0: 00000000e5000000 0000000000000000 0000000000000000 0000000000000028
[ 6708.957483] 3800: 0000000000000000 0000000000000000 0000000000000000 7374687225000000
[ 6708.965300] 3820: 0000000000000000 00000000f814dd99 722e736f622e676e 0000000000000000
[ 6708.973117] [<          (null)>]           (null)
[ 6708.977824] [<ffff0000086f9fa4>] tcp_data_queue+0x754/0xc5c
[ 6708.983386] [<ffff0000086fa64c>] tcp_rcv_established+0x1a0/0x67c
[ 6708.989384] [<ffff000008704120>] tcp_v4_do_rcv+0x15c/0x22c
[ 6708.994858] [<ffff000008707418>] tcp_v4_rcv+0xaf0/0xb58
[ 6709.000077] [<ffff0000086df784>] ip_local_deliver_finish+0x10c/0x254
[ 6709.006419] [<ffff0000086dfea4>] ip_local_deliver+0xf0/0xfc
[ 6709.011980] [<ffff0000086dfad4>] ip_rcv_finish+0x208/0x3a4
[ 6709.017454] [<ffff0000086e018c>] ip_rcv+0x2dc/0x3c8
[ 6709.022328] [<ffff000008692fc8>] __netif_receive_skb_core+0x2f8/0xa0c
[ 6709.028758] [<ffff000008696068>] __netif_receive_skb+0x38/0x84
[ 6709.034580] [<ffff00000869611c>] netif_receive_skb_internal+0x68/0xdc
[ 6709.041010] [<ffff000008696bc0>] napi_gro_receive+0xcc/0x1a8
[ 6709.046690] [<ffff0000014b0fc4>] nicvf_cq_intr_handler+0x59c/0x730 [nicvf]
[ 6709.053559] [<ffff0000014b1380>] nicvf_poll+0x38/0xb8 [nicvf]
[ 6709.059295] [<ffff000008697a6c>] net_rx_action+0x2f8/0x464
[ 6709.064771] [<ffff000008081824>] __do_softirq+0x11c/0x308
[ 6709.070164] [<ffff0000080d14e4>] irq_exit+0x12c/0x174
[ 6709.075206] [<ffff00000813101c>] __handle_domain_irq+0x78/0xc4
[ 6709.081027] [<ffff000008081608>] gic_handle_irq+0x94/0x190
[ 6709.086501] Exception stack(0xffff81000689fdf0 to 0xffff81000689ff20)
[ 6709.092929] fde0:                                   0000810ff2ec0000 ffff000008c10000
[ 6709.100747] fe00: ffff000008c70ef4 0000000000000001 0000000000000000 ffff810ffbad9b18
[ 6709.108565] fe20: ffff810ffbad9c70 ffff8100169d3800 ffff810006843ab0 ffff81000689fe80
[ 6709.116382] fe40: 0000000000000bd0 0000ffffdf979cd0 183f5913da192500 0000ffff8a254ce4
[ 6709.124200] fe60: 0000ffff8a254b78 0000aaab10339808 0000000000000000 0000ffff8a0c2a50
[ 6709.132018] fe80: 0000ffffdf979b10 ffff000008d6d450 ffff000008c10000 ffff000008d6d000
[ 6709.139836] fea0: 0000000000000054 ffff000008cd3dbc 0000000000000000 0000000000000000
[ 6709.147653] fec0: 0000000000000000 0000000000000000 0000000000000000 ffff81000689ff20
[ 6709.155471] fee0: ffff000008085240 ffff81000689ff20 ffff000008085244 0000000060000145
[ 6709.163289] ff00: ffff81000689ff10 ffff00000813f1e4 ffffffffffffffff ffff00000813f238
[ 6709.171107] [<ffff000008082eb4>] el1_irq+0xb4/0x140
[ 6709.175976] [<ffff000008085244>] arch_cpu_idle+0x44/0x11c
[ 6709.181368] [<ffff0000087bf3b8>] default_idle_call+0x20/0x30
[ 6709.187020] [<ffff000008116d50>] do_idle+0x158/0x1e4
[ 6709.191973] [<ffff000008116ff4>] cpu_startup_entry+0x2c/0x30
[ 6709.197624] [<ffff00000808e7cc>] secondary_start_kernel+0x13c/0x160
[ 6709.203878] [<0000000001bc71c4>] 0x1bc71c4
[ 6709.207967] Code: bad PC value
[ 6709.211061] SMP: stopping secondary CPUs
[ 6709.218830] Starting crashdump kernel...
[ 6709.222749] Bye!
---<-snip>---

Signed-off-by: Vadim Lomovtsev <vlomovts@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/svcsock.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2b720fa35c4f..e18500151236 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -421,6 +421,9 @@ static void svc_data_ready(struct sock *sk)
 		dprintk("svc: socket %p(inet %p), busy=%d\n",
 			svsk, sk,
 			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+		/* Refer to svc_setup_socket() for details. */
+		rmb();
 		svsk->sk_odata(sk);
 		if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
 			svc_xprt_enqueue(&svsk->sk_xprt);
@@ -437,6 +440,9 @@ static void svc_write_space(struct sock *sk)
 	if (svsk) {
 		dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
 			svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+		/* Refer to svc_setup_socket() for details. */
+		rmb();
 		svsk->sk_owspace(sk);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
@@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
 	dprintk("svc: socket %p TCP (listen) state change %d\n",
 		sk, sk->sk_state);
 
-	if (svsk)
+	if (svsk) {
+		/* Refer to svc_setup_socket() for details. */
+		rmb();
 		svsk->sk_odata(sk);
+	}
+
 	/*
 	 * This callback may called twice when a new connection
 	 * is established as a child socket inherits everything
@@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct sock *sk)
 	if (!svsk)
 		printk("svc: socket %p: no user data\n", sk);
 	else {
+		/* Refer to svc_setup_socket() for details. */
+		rmb();
 		svsk->sk_ostate(sk);
 		if (sk->sk_state != TCP_ESTABLISHED) {
 			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 		return ERR_PTR(err);
 	}
 
-	inet->sk_user_data = svsk;
 	svsk->sk_sock = sock;
 	svsk->sk_sk = inet;
 	svsk->sk_ostate = inet->sk_state_change;
 	svsk->sk_odata = inet->sk_data_ready;
 	svsk->sk_owspace = inet->sk_write_space;
+	/*
+	 * This barrier is necessary in order to prevent race condition
+	 * with svc_data_ready(), svc_listen_data_ready() and others
+	 * when calling callbacks above.
+	 */
+	wmb();
+	inet->sk_user_data = svsk;
 
 	/* Initialize the socket */
 	if (sock->type == SOCK_DGRAM)

From 4a646580f793d19717f7e034c8d473b509c27d49 Mon Sep 17 00:00:00 2001
From: Masaki Ota <masaki.ota@jp.alps.com>
Date: Thu, 24 Aug 2017 15:44:36 -0700
Subject: [PATCH 71/98] Input: ALPS - fix two-finger scroll breakage in right
 side on ALPS touchpad

Fixed the issue that two finger scroll does not work correctly
on V8 protocol. The cause is that V8 protocol X-coordinate decode
is wrong at SS4 PLUS device. I added SS4 PLUS X decode definition.

Mote notes:
the problem manifests itself by the commit e7348396c6d5 ("Input: ALPS
- fix V8+ protocol handling (73 03 28)"), where a fix for the V8+
protocol was applied.  Although the culprit must have been present
beforehand, the two-finger scroll worked casually even with the
wrongly reported values by some reason.  It got broken by the commit
above just because it changed x_max value, and this made libinput
correctly figuring the MT events.  Since the X coord is reported as
falsely doubled, the events on the right-half side go outside the
boundary, thus they are no longer handled.  This resulted as a broken
two-finger scroll.

One finger event is decoded differently, and it didn't suffer from
this problem.  The problem was only about MT events. --tiwai

Fixes: e7348396c6d5 ("Input: ALPS - fix V8+ protocol handling (73 03 28)")
Signed-off-by: Masaki Ota <masaki.ota@jp.alps.com>
Tested-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Paul Donohue <linux-kernel@PaulSD.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/alps.c | 41 ++++++++++++++++++++++++++++----------
 drivers/input/mouse/alps.h |  8 ++++++++
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c
index 262d1057c1da..850b00e3ad8e 100644
--- a/drivers/input/mouse/alps.c
+++ b/drivers/input/mouse/alps.c
@@ -1215,14 +1215,24 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
 
 	case SS4_PACKET_ID_TWO:
 		if (priv->flags & ALPS_BUTTONPAD) {
-			f->mt[0].x = SS4_BTL_MF_X_V2(p, 0);
+			if (IS_SS4PLUS_DEV(priv->dev_id)) {
+				f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0);
+				f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1);
+			} else {
+				f->mt[0].x = SS4_BTL_MF_X_V2(p, 0);
+				f->mt[1].x = SS4_BTL_MF_X_V2(p, 1);
+			}
 			f->mt[0].y = SS4_BTL_MF_Y_V2(p, 0);
-			f->mt[1].x = SS4_BTL_MF_X_V2(p, 1);
 			f->mt[1].y = SS4_BTL_MF_Y_V2(p, 1);
 		} else {
-			f->mt[0].x = SS4_STD_MF_X_V2(p, 0);
+			if (IS_SS4PLUS_DEV(priv->dev_id)) {
+				f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0);
+				f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1);
+			} else {
+				f->mt[0].x = SS4_STD_MF_X_V2(p, 0);
+				f->mt[1].x = SS4_STD_MF_X_V2(p, 1);
+			}
 			f->mt[0].y = SS4_STD_MF_Y_V2(p, 0);
-			f->mt[1].x = SS4_STD_MF_X_V2(p, 1);
 			f->mt[1].y = SS4_STD_MF_Y_V2(p, 1);
 		}
 		f->pressure = SS4_MF_Z_V2(p, 0) ? 0x30 : 0;
@@ -1239,16 +1249,27 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
 
 	case SS4_PACKET_ID_MULTI:
 		if (priv->flags & ALPS_BUTTONPAD) {
-			f->mt[2].x = SS4_BTL_MF_X_V2(p, 0);
+			if (IS_SS4PLUS_DEV(priv->dev_id)) {
+				f->mt[0].x = SS4_PLUS_BTL_MF_X_V2(p, 0);
+				f->mt[1].x = SS4_PLUS_BTL_MF_X_V2(p, 1);
+			} else {
+				f->mt[2].x = SS4_BTL_MF_X_V2(p, 0);
+				f->mt[3].x = SS4_BTL_MF_X_V2(p, 1);
+			}
+
 			f->mt[2].y = SS4_BTL_MF_Y_V2(p, 0);
-			f->mt[3].x = SS4_BTL_MF_X_V2(p, 1);
 			f->mt[3].y = SS4_BTL_MF_Y_V2(p, 1);
 			no_data_x = SS4_MFPACKET_NO_AX_BL;
 			no_data_y = SS4_MFPACKET_NO_AY_BL;
 		} else {
-			f->mt[2].x = SS4_STD_MF_X_V2(p, 0);
+			if (IS_SS4PLUS_DEV(priv->dev_id)) {
+				f->mt[0].x = SS4_PLUS_STD_MF_X_V2(p, 0);
+				f->mt[1].x = SS4_PLUS_STD_MF_X_V2(p, 1);
+			} else {
+				f->mt[0].x = SS4_STD_MF_X_V2(p, 0);
+				f->mt[1].x = SS4_STD_MF_X_V2(p, 1);
+			}
 			f->mt[2].y = SS4_STD_MF_Y_V2(p, 0);
-			f->mt[3].x = SS4_STD_MF_X_V2(p, 1);
 			f->mt[3].y = SS4_STD_MF_Y_V2(p, 1);
 			no_data_x = SS4_MFPACKET_NO_AX;
 			no_data_y = SS4_MFPACKET_NO_AY;
@@ -2541,8 +2562,8 @@ static int alps_set_defaults_ss4_v2(struct psmouse *psmouse,
 
 	memset(otp, 0, sizeof(otp));
 
-	if (alps_get_otp_values_ss4_v2(psmouse, 0, &otp[0][0]) ||
-	    alps_get_otp_values_ss4_v2(psmouse, 1, &otp[1][0]))
+	if (alps_get_otp_values_ss4_v2(psmouse, 1, &otp[1][0]) ||
+	    alps_get_otp_values_ss4_v2(psmouse, 0, &otp[0][0]))
 		return -1;
 
 	alps_update_device_area_ss4_v2(otp, priv);
diff --git a/drivers/input/mouse/alps.h b/drivers/input/mouse/alps.h
index ed2d6879fa52..c80a7c76cb76 100644
--- a/drivers/input/mouse/alps.h
+++ b/drivers/input/mouse/alps.h
@@ -100,6 +100,10 @@ enum SS4_PACKET_ID {
 				 ((_b[1 + _i * 3]  << 5) & 0x1F00)	\
 				)
 
+#define SS4_PLUS_STD_MF_X_V2(_b, _i) (((_b[0 + (_i) * 3] << 4) & 0x0070) | \
+				 ((_b[1 + (_i) * 3]  << 4) & 0x0F80)	\
+				)
+
 #define SS4_STD_MF_Y_V2(_b, _i)	(((_b[1 + (_i) * 3] << 3) & 0x0010) |	\
 				 ((_b[2 + (_i) * 3] << 5) & 0x01E0) |	\
 				 ((_b[2 + (_i) * 3] << 4) & 0x0E00)	\
@@ -109,6 +113,10 @@ enum SS4_PACKET_ID {
 				 ((_b[0 + (_i) * 3] >> 3) & 0x0010)	\
 				)
 
+#define SS4_PLUS_BTL_MF_X_V2(_b, _i) (SS4_PLUS_STD_MF_X_V2(_b, _i) |	\
+				 ((_b[0 + (_i) * 3] >> 4) & 0x0008)	\
+				)
+
 #define SS4_BTL_MF_Y_V2(_b, _i)	(SS4_STD_MF_Y_V2(_b, _i) | \
 				 ((_b[0 + (_i) * 3] >> 3) & 0x0008)	\
 				)

From b974696da1cfc5aa0c29ed97dc8f6c239899e64b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 23 Aug 2017 09:03:04 +0200
Subject: [PATCH 72/98] mtd: nandsim: remove debugfs entries in error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The debugfs entries must be removed before an error is returned in the
probe function. Otherwise another try to load the module fails and when
the debugfs files are accessed without the module loaded, the kernel
still tries to call a function in that module.

Fixes: 5346c27c5fed ("mtd: nandsim: Introduce debugfs infrastructure")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Richard Weinberger <richard@nod.at>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nandsim.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 03a0d057bf2f..e4211c3cc49b 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -2373,6 +2373,7 @@ static int __init ns_init_module(void)
         return 0;
 
 err_exit:
+	nandsim_debugfs_remove(nand);
 	free_nandsim(nand);
 	nand_release(nsmtd);
 	for (i = 0;i < ARRAY_SIZE(nand->partitions); ++i)

From be3e83e3471cd0faff2c2d88fe9cfc73d9a9745a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 23 Aug 2017 20:45:01 +0200
Subject: [PATCH 73/98] mtd: nand: atmel: Relax tADL_min constraint

Version 4 of the ONFI spec mandates that tADL be at least 400 nanoseconds,
but, depending on the master clock rate, 400 ns may not fit in the tADL
field of the SMC reg. We need to relax the check and accept the -ERANGE
return code.

Note that previous versions of the ONFI spec had a lower tADL_min (100 or
200 ns). It's not clear why this timing constraint got increased but it
seems most NANDs are fine with values lower than 400ns, so we should be
safe.

Fixes: f9ce2eddf176 ("mtd: nand: atmel: Add ->setup_data_interface() hooks")
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/atmel/nand-controller.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c
index 2c8baa0c2c4e..ceec21bd30c4 100644
--- a/drivers/mtd/nand/atmel/nand-controller.c
+++ b/drivers/mtd/nand/atmel/nand-controller.c
@@ -1364,7 +1364,18 @@ static int atmel_smc_nand_prepare_smcconf(struct atmel_nand *nand,
 	ret = atmel_smc_cs_conf_set_timing(smcconf,
 					   ATMEL_HSMC_TIMINGS_TADL_SHIFT,
 					   ncycles);
-	if (ret)
+	/*
+	 * Version 4 of the ONFI spec mandates that tADL be at least 400
+	 * nanoseconds, but, depending on the master clock rate, 400 ns may not
+	 * fit in the tADL field of the SMC reg. We need to relax the check and
+	 * accept the -ERANGE return code.
+	 *
+	 * Note that previous versions of the ONFI spec had a lower tADL_min
+	 * (100 or 200 ns). It's not clear why this timing constraint got
+	 * increased but it seems most NANDs are fine with values lower than
+	 * 400ns, so we should be safe.
+	 */
+	if (ret && ret != -ERANGE)
 		return ret;
 
 	ncycles = DIV_ROUND_UP(conf->timings.sdr.tAR_min, mckperiodps);

From c469268cd523245cc58255f6696e0c295485cb0b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 24 Aug 2017 11:59:31 +0200
Subject: [PATCH 74/98] KVM: x86: block guest protection keys unless the host
 has them enabled

If the host has protection keys disabled, we cannot read and write the
guest PKRU---RDPKRU and WRPKRU fail with #GP(0) if CR4.PKE=0.  Block
the PKU cpuid bit in that case.

This ensures that guest_CR4.PKE=1 implies host_CR4.PKE=1.

Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870
Cc: stable@vger.kernel.org
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 59ca2eea522c..19adbb418443 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -469,7 +469,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
 			cpuid_mask(&entry->ecx, CPUID_7_ECX);
 			/* PKU is not yet implemented for shadow paging. */
-			if (!tdp_enabled)
+			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
 				entry->ecx &= ~F(PKU);
 			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
 			entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);

From b9dd21e104bcd45e124acfe978a79df71259e59b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 23 Aug 2017 23:14:38 +0200
Subject: [PATCH 75/98] KVM: x86: simplify handling of PKRU

Move it to struct kvm_arch_vcpu, replacing guest_pkru_valid with a
simple comparison against the host value of the register.  The write of
PKRU in addition can be skipped if the guest has not enabled the feature.
Once we do this, we need not test OSPKE in the host anymore, because
guest_CR4.PKE=1 implies host_CR4.PKE=1.

The static PKU test is kept to elide the code on older CPUs.

Suggested-by: Yang Zhang <zy107165@alibaba-inc.com>
Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870
Cc: stable@vger.kernel.org
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/kvm_cache_regs.h   |  5 -----
 arch/x86/kvm/mmu.h              |  2 +-
 arch/x86/kvm/svm.c              |  7 -------
 arch/x86/kvm/vmx.c              | 25 ++++++++-----------------
 5 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87ac4fba6d8e..f4d120a3e22e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -492,6 +492,7 @@ struct kvm_vcpu_arch {
 	unsigned long cr4;
 	unsigned long cr4_guest_owned_bits;
 	unsigned long cr8;
+	u32 pkru;
 	u32 hflags;
 	u64 efer;
 	u64 apic_base;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 762cdf2595f9..e1e89ee4af75 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
 		| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
 }
 
-static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
-{
-	return kvm_x86_ops->get_pkru(vcpu);
-}
-
 static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hflags |= HF_GUEST_MASK;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d7d248a000dd..4b9a3ae6b725 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -185,7 +185,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 		* index of the protection domain, so pte_pkey * 2 is
 		* is the index of the first bit for the domain.
 		*/
-		pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
+		pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
 
 		/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
 		offset = (pfec & ~1) +
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 56ba05312759..af256b786a70 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1777,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 	to_svm(vcpu)->vmcb->save.rflags = rflags;
 }
 
-static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 {
 	switch (reg) {
@@ -5413,8 +5408,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
 
-	.get_pkru = svm_get_pkru,
-
 	.tlb_flush = svm_flush_tlb,
 
 	.run = svm_vcpu_run,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9b21b1223035..c6ef2940119b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -636,8 +636,6 @@ struct vcpu_vmx {
 
 	u64 current_tsc_ratio;
 
-	bool guest_pkru_valid;
-	u32 guest_pkru;
 	u32 host_pkru;
 
 	/*
@@ -2383,11 +2381,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
 		to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
 }
 
-static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
-{
-	return to_vmx(vcpu)->guest_pkru;
-}
-
 static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
 {
 	u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
@@ -9020,8 +9013,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
-	if (vmx->guest_pkru_valid)
-		__write_pkru(vmx->guest_pkru);
+	if (static_cpu_has(X86_FEATURE_PKU) &&
+	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
+	    vcpu->arch.pkru != vmx->host_pkru)
+		__write_pkru(vcpu->arch.pkru);
 
 	atomic_switch_perf_msrs(vmx);
 	debugctlmsr = get_debugctlmsr();
@@ -9169,13 +9164,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	 * back on host, so it is safe to read guest PKRU from current
 	 * XSAVE.
 	 */
-	if (boot_cpu_has(X86_FEATURE_OSPKE)) {
-		vmx->guest_pkru = __read_pkru();
-		if (vmx->guest_pkru != vmx->host_pkru) {
-			vmx->guest_pkru_valid = true;
+	if (static_cpu_has(X86_FEATURE_PKU) &&
+	    kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
+		vcpu->arch.pkru = __read_pkru();
+		if (vcpu->arch.pkru != vmx->host_pkru)
 			__write_pkru(vmx->host_pkru);
-		} else
-			vmx->guest_pkru_valid = false;
 	}
 
 	/*
@@ -11682,8 +11675,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
 
-	.get_pkru = vmx_get_pkru,
-
 	.tlb_flush = vmx_flush_tlb,
 
 	.run = vmx_vcpu_run,

From 38cfd5e3df9c4f88e76b547eee2087ee5c042ae2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 23 Aug 2017 23:16:29 +0200
Subject: [PATCH 76/98] KVM, pkeys: do not use PKRU value in
 vcpu->arch.guest_fpu.state

The host pkru is restored right after vcpu exit (commit 1be0e61), so
KVM_GET_XSAVE will return the host PKRU value instead.  Fix this by
using the guest PKRU explicitly in fill_xsave and load_xsave.  This
part is based on a patch by Junkang Fu.

The host PKRU data may also not match the value in vcpu->arch.guest_fpu.state,
because it could have been changed by userspace since the last time
it was saved, so skip loading it in kvm_load_guest_fpu.

Reported-by: Junkang Fu <junkang.fjk@alibaba-inc.com>
Cc: Yang Zhang <zy107165@alibaba-inc.com>
Fixes: 1be0e61c1f255faaeab04a390e00c8b9b9042870
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/fpu/internal.h |  6 +++---
 arch/x86/kvm/x86.c                  | 17 ++++++++++++++---
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 255645f60ca2..554cdb205d17 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
 	return 0;
 }
 
-static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
+static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
 {
 	if (use_xsave()) {
-		copy_kernel_to_xregs(&fpstate->xsave, -1);
+		copy_kernel_to_xregs(&fpstate->xsave, mask);
 	} else {
 		if (use_fxsr())
 			copy_kernel_to_fxregs(&fpstate->fxsave);
@@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
 			: : [addr] "m" (fpstate));
 	}
 
-	__copy_kernel_to_fpregs(fpstate);
+	__copy_kernel_to_fpregs(fpstate, -1);
 }
 
 extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d734aa8c5b4f..05a5e57c6f39 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3245,7 +3245,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 			u32 size, offset, ecx, edx;
 			cpuid_count(XSTATE_CPUID, index,
 				    &size, &offset, &ecx, &edx);
-			memcpy(dest + offset, src, size);
+			if (feature == XFEATURE_MASK_PKRU)
+				memcpy(dest + offset, &vcpu->arch.pkru,
+				       sizeof(vcpu->arch.pkru));
+			else
+				memcpy(dest + offset, src, size);
+
 		}
 
 		valid -= feature;
@@ -3283,7 +3288,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 			u32 size, offset, ecx, edx;
 			cpuid_count(XSTATE_CPUID, index,
 				    &size, &offset, &ecx, &edx);
-			memcpy(dest, src + offset, size);
+			if (feature == XFEATURE_MASK_PKRU)
+				memcpy(&vcpu->arch.pkru, src + offset,
+				       sizeof(vcpu->arch.pkru));
+			else
+				memcpy(dest, src + offset, size);
 		}
 
 		valid -= feature;
@@ -7633,7 +7642,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	 */
 	vcpu->guest_fpu_loaded = 1;
 	__kernel_fpu_begin();
-	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
+	/* PKRU is separately restored in kvm_x86_ops->run.  */
+	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
+				~XFEATURE_MASK_PKRU);
 	trace_kvm_fpu(1);
 }
 

From ccd5b3235180eef3cfec337df1c8554ab151b5cc Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 24 Aug 2017 10:50:29 -0700
Subject: [PATCH 77/98] x86/mm: Fix use-after-free of ldt_struct

The following commit:

  39a0526fb3f7 ("x86/mm: Factor out LDT init from context init")

renamed init_new_context() to init_new_context_ldt() and added a new
init_new_context() which calls init_new_context_ldt().  However, the
error code of init_new_context_ldt() was ignored.  Consequently, if a
memory allocation in alloc_ldt_struct() failed during a fork(), the
->context.ldt of the new task remained the same as that of the old task
(due to the memcpy() in dup_mm()).  ldt_struct's are not intended to be
shared, so a use-after-free occurred after one task exited.

Fix the bug by making init_new_context() pass through the error code of
init_new_context_ldt().

This bug was found by syzkaller, which encountered the following splat:

    BUG: KASAN: use-after-free in free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116
    Read of size 4 at addr ffff88006d2cb7c8 by task kworker/u9:0/3710

    CPU: 1 PID: 3710 Comm: kworker/u9:0 Not tainted 4.13.0-rc4-next-20170811 #2
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Call Trace:
     __dump_stack lib/dump_stack.c:16 [inline]
     dump_stack+0x194/0x257 lib/dump_stack.c:52
     print_address_description+0x73/0x250 mm/kasan/report.c:252
     kasan_report_error mm/kasan/report.c:351 [inline]
     kasan_report+0x24e/0x340 mm/kasan/report.c:409
     __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:429
     free_ldt_struct.part.2+0x10a/0x150 arch/x86/kernel/ldt.c:116
     free_ldt_struct arch/x86/kernel/ldt.c:173 [inline]
     destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171
     destroy_context arch/x86/include/asm/mmu_context.h:157 [inline]
     __mmdrop+0xe9/0x530 kernel/fork.c:889
     mmdrop include/linux/sched/mm.h:42 [inline]
     exec_mmap fs/exec.c:1061 [inline]
     flush_old_exec+0x173c/0x1ff0 fs/exec.c:1291
     load_elf_binary+0x81f/0x4ba0 fs/binfmt_elf.c:855
     search_binary_handler+0x142/0x6b0 fs/exec.c:1652
     exec_binprm fs/exec.c:1694 [inline]
     do_execveat_common.isra.33+0x1746/0x22e0 fs/exec.c:1816
     do_execve+0x31/0x40 fs/exec.c:1860
     call_usermodehelper_exec_async+0x457/0x8f0 kernel/umh.c:100
     ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:431

    Allocated by task 3700:
     save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
     save_stack+0x43/0xd0 mm/kasan/kasan.c:447
     set_track mm/kasan/kasan.c:459 [inline]
     kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
     kmem_cache_alloc_trace+0x136/0x750 mm/slab.c:3627
     kmalloc include/linux/slab.h:493 [inline]
     alloc_ldt_struct+0x52/0x140 arch/x86/kernel/ldt.c:67
     write_ldt+0x7b7/0xab0 arch/x86/kernel/ldt.c:277
     sys_modify_ldt+0x1ef/0x240 arch/x86/kernel/ldt.c:307
     entry_SYSCALL_64_fastpath+0x1f/0xbe

    Freed by task 3700:
     save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
     save_stack+0x43/0xd0 mm/kasan/kasan.c:447
     set_track mm/kasan/kasan.c:459 [inline]
     kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
     __cache_free mm/slab.c:3503 [inline]
     kfree+0xca/0x250 mm/slab.c:3820
     free_ldt_struct.part.2+0xdd/0x150 arch/x86/kernel/ldt.c:121
     free_ldt_struct arch/x86/kernel/ldt.c:173 [inline]
     destroy_context_ldt+0x60/0x80 arch/x86/kernel/ldt.c:171
     destroy_context arch/x86/include/asm/mmu_context.h:157 [inline]
     __mmdrop+0xe9/0x530 kernel/fork.c:889
     mmdrop include/linux/sched/mm.h:42 [inline]
     __mmput kernel/fork.c:916 [inline]
     mmput+0x541/0x6e0 kernel/fork.c:927
     copy_process.part.36+0x22e1/0x4af0 kernel/fork.c:1931
     copy_process kernel/fork.c:1546 [inline]
     _do_fork+0x1ef/0xfb0 kernel/fork.c:2025
     SYSC_clone kernel/fork.c:2135 [inline]
     SyS_clone+0x37/0x50 kernel/fork.c:2129
     do_syscall_64+0x26c/0x8c0 arch/x86/entry/common.c:287
     return_from_SYSCALL_64+0x0/0x7a

Here is a C reproducer:

    #include <asm/ldt.h>
    #include <pthread.h>
    #include <signal.h>
    #include <stdlib.h>
    #include <sys/syscall.h>
    #include <sys/wait.h>
    #include <unistd.h>

    static void *fork_thread(void *_arg)
    {
        fork();
    }

    int main(void)
    {
        struct user_desc desc = { .entry_number = 8191 };

        syscall(__NR_modify_ldt, 1, &desc, sizeof(desc));

        for (;;) {
            if (fork() == 0) {
                pthread_t t;

                srand(getpid());
                pthread_create(&t, NULL, fork_thread, NULL);
                usleep(rand() % 10000);
                syscall(__NR_exit_group, 0);
            }
            wait(NULL);
        }
    }

Note: the reproducer takes advantage of the fact that alloc_ldt_struct()
may use vmalloc() to allocate a large ->entries array, and after
commit:

  5d17a73a2ebe ("vmalloc: back off when the current task is killed")

it is possible for userspace to fail a task's vmalloc() by
sending a fatal signal, e.g. via exit_group().  It would be more
difficult to reproduce this bug on kernels without that commit.

This bug only affected kernels with CONFIG_MODIFY_LDT_SYSCALL=y.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: <stable@vger.kernel.org> [v4.6+]
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mm@kvack.org
Fixes: 39a0526fb3f7 ("x86/mm: Factor out LDT init from context init")
Link: http://lkml.kernel.org/r/20170824175029.76040-1-ebiggers3@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 265c907d7d4c..7a234be7e298 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -140,9 +140,7 @@ static inline int init_new_context(struct task_struct *tsk,
 		mm->context.execute_only_pkey = -1;
 	}
 	#endif
-	init_new_context_ldt(tsk, mm);
-
-	return 0;
+	return init_new_context_ldt(tsk, mm);
 }
 static inline void destroy_context(struct mm_struct *mm)
 {

From 47c5310a8dbe7c2cb9f0083daa43ceed76c257fa Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 24 Aug 2017 19:14:47 +1000
Subject: [PATCH 78/98] KVM: PPC: Book3S: Fix race and leak in
 kvm_vm_ioctl_create_spapr_tce()

Nixiaoming pointed out that there is a memory leak in
kvm_vm_ioctl_create_spapr_tce() if the call to anon_inode_getfd()
fails; the memory allocated for the kvmppc_spapr_tce_table struct
is not freed, and nor are the pages allocated for the iommu
tables.  In addition, we have already incremented the process's
count of locked memory pages, and this doesn't get restored on
error.

David Hildenbrand pointed out that there is a race in that the
function checks early on that there is not already an entry in the
stt->iommu_tables list with the same LIOBN, but an entry with the
same LIOBN could get added between then and when the new entry is
added to the list.

This fixes all three problems.  To simplify things, we now call
anon_inode_getfd() before placing the new entry in the list.  The
check for an existing entry is done while holding the kvm->lock
mutex, immediately before adding the new entry to the list.
Finally, on failure we now call kvmppc_account_memlimit to
decrement the process's count of locked memory pages.

Reported-by: Nixiaoming <nixiaoming@huawei.com>
Reported-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/book3s_64_vio.c | 56 +++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index a160c14304eb..53766e2bc029 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 				   struct kvm_create_spapr_tce_64 *args)
 {
 	struct kvmppc_spapr_tce_table *stt = NULL;
+	struct kvmppc_spapr_tce_table *siter;
 	unsigned long npages, size;
 	int ret = -ENOMEM;
 	int i;
+	int fd = -1;
 
 	if (!args->size)
 		return -EINVAL;
 
-	/* Check this LIOBN hasn't been previously allocated */
-	list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
-		if (stt->liobn == args->liobn)
-			return -EBUSY;
-	}
-
 	size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
 	npages = kvmppc_tce_pages(size);
 	ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
-	if (ret) {
-		stt = NULL;
-		goto fail;
-	}
+	if (ret)
+		return ret;
 
 	ret = -ENOMEM;
 	stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
 		      GFP_KERNEL);
 	if (!stt)
-		goto fail;
+		goto fail_acct;
 
 	stt->liobn = args->liobn;
 	stt->page_shift = args->page_shift;
@@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 			goto fail;
 	}
 
-	kvm_get_kvm(kvm);
+	ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+				    stt, O_RDWR | O_CLOEXEC);
+	if (ret < 0)
+		goto fail;
 
 	mutex_lock(&kvm->lock);
-	list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+
+	/* Check this LIOBN hasn't been previously allocated */
+	ret = 0;
+	list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
+		if (siter->liobn == args->liobn) {
+			ret = -EBUSY;
+			break;
+		}
+	}
+
+	if (!ret) {
+		list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+		kvm_get_kvm(kvm);
+	}
 
 	mutex_unlock(&kvm->lock);
 
-	return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
-				stt, O_RDWR | O_CLOEXEC);
+	if (!ret)
+		return fd;
 
-fail:
-	if (stt) {
-		for (i = 0; i < npages; i++)
-			if (stt->pages[i])
-				__free_page(stt->pages[i]);
+	put_unused_fd(fd);
 
-		kfree(stt);
-	}
+ fail:
+	for (i = 0; i < npages; i++)
+		if (stt->pages[i])
+			__free_page(stt->pages[i]);
+
+	kfree(stt);
+ fail_acct:
+	kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
 	return ret;
 }
 

From 22d538213ec4fa65b08b1edbf610066d8aab7bbb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 18 Aug 2017 15:52:54 -0700
Subject: [PATCH 79/98] blk-mq-debugfs: Add names for recently added flags

The symbolic constants QUEUE_FLAG_SCSI_PASSTHROUGH, QUEUE_FLAG_QUIESCED
and REQ_NOWAIT are missing from blk-mq-debugfs.c. Add these to
blk-mq-debugfs.c such that these appear as names in debugfs instead of
as numbers.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 9ebc2945f991..4f927a58dff8 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -75,6 +75,8 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(STATS),
 	QUEUE_FLAG_NAME(POLL_STATS),
 	QUEUE_FLAG_NAME(REGISTERED),
+	QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
+	QUEUE_FLAG_NAME(QUIESCED),
 };
 #undef QUEUE_FLAG_NAME
 
@@ -265,6 +267,7 @@ static const char *const cmd_flag_name[] = {
 	CMD_FLAG_NAME(RAHEAD),
 	CMD_FLAG_NAME(BACKGROUND),
 	CMD_FLAG_NAME(NOUNMAP),
+	CMD_FLAG_NAME(NOWAIT),
 };
 #undef CMD_FLAG_NAME
 

From 1046d304900cf9d4b2c730c6860b8e03cc704377 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 26 Jul 2017 15:32:23 +0100
Subject: [PATCH 80/98] virtio_blk: fix incorrect message when disk is resized

The message printed on disk resize is incorrect.  The following is
printed when resizing to 2 GiB:

  $ truncate -s 1G test.img
  $ qemu -device virtio-blk-pci,logical_block_size=4096,...
  (qemu) block_resize drive1 2G

  virtio_blk virtio0: new size: 4194304 4096-byte logical blocks (17.2 GB/16.0 GiB)

The virtio_blk capacity config field is in 512-byte sector units
regardless of logical_block_size as per the VIRTIO specification.
Therefore the message should read:

  virtio_blk virtio0: new size: 524288 4096-byte logical blocks (2.15 GB/2.0 GiB)

Note that this only affects the printed message.  Thankfully the actual
block device has the correct size because the block layer expects
capacity in sectors.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/block/virtio_blk.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1498b899a593..d3d5523862c2 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -381,6 +381,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
 	struct request_queue *q = vblk->disk->queue;
 	char cap_str_2[10], cap_str_10[10];
 	char *envp[] = { "RESIZE=1", NULL };
+	unsigned long long nblocks;
 	u64 capacity;
 
 	/* Host must always specify the capacity. */
@@ -393,16 +394,19 @@ static void virtblk_config_changed_work(struct work_struct *work)
 		capacity = (sector_t)-1;
 	}
 
-	string_get_size(capacity, queue_logical_block_size(q),
+	nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
+
+	string_get_size(nblocks, queue_logical_block_size(q),
 			STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
-	string_get_size(capacity, queue_logical_block_size(q),
+	string_get_size(nblocks, queue_logical_block_size(q),
 			STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
 
 	dev_notice(&vdev->dev,
-		  "new size: %llu %d-byte logical blocks (%s/%s)\n",
-		  (unsigned long long)capacity,
-		  queue_logical_block_size(q),
-		  cap_str_10, cap_str_2);
+		   "new size: %llu %d-byte logical blocks (%s/%s)\n",
+		   nblocks,
+		   queue_logical_block_size(q),
+		   cap_str_10,
+		   cap_str_2);
 
 	set_capacity(vblk->disk, capacity);
 	revalidate_disk(vblk->disk);

From ba74b6f7fcc07355d087af6939712eed4a454821 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 24 Aug 2017 18:07:02 +0200
Subject: [PATCH 81/98] virtio_pci: fix cpu affinity support

Commit 0b0f9dc5 ("Revert "virtio_pci: use shared interrupts for
virtqueues"") removed the adjustment of the pre_vectors for the virtio
MSI-X vector allocation which was added in commit fb5e31d9 ("virtio:
allow drivers to request IRQ affinity when creating VQs"). This will
lead to an incorrect assignment of MSI-X vectors, and potential
deadlocks when offlining cpus.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Fixes: 0b0f9dc5 ("Revert "virtio_pci: use shared interrupts for virtqueues")
Reported-by: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 007a4f366086..1c4797e53f68 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -107,6 +107,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
+	unsigned flags = PCI_IRQ_MSIX;
 	unsigned i, v;
 	int err = -ENOMEM;
 
@@ -126,10 +127,13 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 					GFP_KERNEL))
 			goto error;
 
+	if (desc) {
+		flags |= PCI_IRQ_AFFINITY;
+		desc->pre_vectors++; /* virtio config vector */
+	}
+
 	err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
-					     nvectors, PCI_IRQ_MSIX |
-					     (desc ? PCI_IRQ_AFFINITY : 0),
-					     desc);
+					     nvectors, flags, desc);
 	if (err < 0)
 		goto error;
 	vp_dev->msix_enabled = 1;

From 556b969a1cfe2686aae149137fa1dfcac0eefe54 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 25 Aug 2017 15:55:30 -0700
Subject: [PATCH 82/98] PM/hibernate: touch NMI watchdog when creating snapshot

There is a problem that when counting the pages for creating the
hibernation snapshot will take significant amount of time, especially on
system with large memory.  Since the counting job is performed with irq
disabled, this might lead to NMI lockup.  The following warning were
found on a system with 1.5TB DRAM:

  Freezing user space processes ... (elapsed 0.002 seconds) done.
  OOM killer disabled.
  PM: Preallocating image memory...
  NMI watchdog: Watchdog detected hard LOCKUP on cpu 27
  CPU: 27 PID: 3128 Comm: systemd-sleep Not tainted 4.13.0-0.rc2.git0.1.fc27.x86_64 #1
  task: ffff9f01971ac000 task.stack: ffffb1a3f325c000
  RIP: 0010:memory_bm_find_bit+0xf4/0x100
  Call Trace:
   swsusp_set_page_free+0x2b/0x30
   mark_free_pages+0x147/0x1c0
   count_data_pages+0x41/0xa0
   hibernate_preallocate_memory+0x80/0x450
   hibernation_snapshot+0x58/0x410
   hibernate+0x17c/0x310
   state_store+0xdf/0xf0
   kobj_attr_store+0xf/0x20
   sysfs_kf_write+0x37/0x40
   kernfs_fop_write+0x11c/0x1a0
   __vfs_write+0x37/0x170
   vfs_write+0xb1/0x1a0
   SyS_write+0x55/0xc0
   entry_SYSCALL_64_fastpath+0x1a/0xa5
  ...
  done (allocated 6590003 pages)
  PM: Allocated 26360012 kbytes in 19.89 seconds (1325.28 MB/s)

It has taken nearly 20 seconds(2.10GHz CPU) thus the NMI lockup was
triggered.  In case the timeout of the NMI watch dog has been set to 1
second, a safe interval should be 6590003/20 = 320k pages in theory.
However there might also be some platforms running at a lower frequency,
so feed the watchdog every 100k pages.

[yu.c.chen@intel.com: simplification]
  Link: http://lkml.kernel.org/r/1503460079-29721-1-git-send-email-yu.c.chen@intel.com
[yu.c.chen@intel.com: use interval of 128k instead of 100k to avoid modulus]
Link: http://lkml.kernel.org/r/1503328098-5120-1-git-send-email-yu.c.chen@intel.com
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Reported-by: Jan Filipcewicz <jan.filipcewicz@intel.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Michal Hocko <mhocko@suse.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Len Brown <lenb@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1bad301820c7..7a58eb5757e3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -66,6 +66,7 @@
 #include <linux/kthread.h>
 #include <linux/memcontrol.h>
 #include <linux/ftrace.h>
+#include <linux/nmi.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -2535,9 +2536,14 @@ void drain_all_pages(struct zone *zone)
 
 #ifdef CONFIG_HIBERNATION
 
+/*
+ * Touch the watchdog for every WD_PAGE_COUNT pages.
+ */
+#define WD_PAGE_COUNT	(128*1024)
+
 void mark_free_pages(struct zone *zone)
 {
-	unsigned long pfn, max_zone_pfn;
+	unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT;
 	unsigned long flags;
 	unsigned int order, t;
 	struct page *page;
@@ -2552,6 +2558,11 @@ void mark_free_pages(struct zone *zone)
 		if (pfn_valid(pfn)) {
 			page = pfn_to_page(pfn);
 
+			if (!--page_count) {
+				touch_nmi_watchdog();
+				page_count = WD_PAGE_COUNT;
+			}
+
 			if (page_zone(page) != zone)
 				continue;
 
@@ -2565,8 +2576,13 @@ void mark_free_pages(struct zone *zone)
 			unsigned long i;
 
 			pfn = page_to_pfn(page);
-			for (i = 0; i < (1UL << order); i++)
+			for (i = 0; i < (1UL << order); i++) {
+				if (!--page_count) {
+					touch_nmi_watchdog();
+					page_count = WD_PAGE_COUNT;
+				}
 				swsusp_set_page_free(pfn_to_page(pfn + i));
+			}
 		}
 	}
 	spin_unlock_irqrestore(&zone->lock, flags);

From 435c0b87d661da83771c30ed775f7c37eed193fb Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 25 Aug 2017 15:55:33 -0700
Subject: [PATCH 83/98] mm, shmem: fix handling
 /sys/kernel/mm/transparent_hugepage/shmem_enabled

/sys/kernel/mm/transparent_hugepage/shmem_enabled controls if we want
to allocate huge pages when allocate pages for private in-kernel shmem
mount.

Unfortunately, as Dan noticed, I've screwed it up and the only way to
make kernel allocate huge page for the mount is to use "force" there.
All other values will be effectively ignored.

Link: http://lkml.kernel.org/r/20170822144254.66431-1-kirill.shutemov@linux.intel.com
Fixes: 5a6e75f8110c ("shmem: prepare huge= mount option and sysfs knob")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org> [4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 6540e5982444..fbcb3c96a186 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3967,7 +3967,7 @@ int __init shmem_init(void)
 	}
 
 #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
-	if (has_transparent_hugepage() && shmem_huge < SHMEM_HUGE_DENY)
+	if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
 		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
 	else
 		shmem_huge = 0; /* just in case it was patched */
@@ -4028,7 +4028,7 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
 		return -EINVAL;
 
 	shmem_huge = huge;
-	if (shmem_huge < SHMEM_HUGE_DENY)
+	if (shmem_huge > SHMEM_HUGE_DENY)
 		SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
 	return count;
 }

From fffa281b48a91ad6dac1a18c5907ece58fa3879b Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Fri, 25 Aug 2017 15:55:36 -0700
Subject: [PATCH 84/98] dax: fix deadlock due to misaligned PMD faults

In DAX there are two separate places where the 2MiB range of a PMD is
defined.

The first is in the page tables, where a PMD mapping inserted for a
given address spans from (vmf->address & PMD_MASK) to ((vmf->address &
PMD_MASK) + PMD_SIZE - 1).  That is, from the 2MiB boundary below the
address to the 2MiB boundary above the address.

So, for example, a fault at address 3MiB (0x30 0000) falls within the
PMD that ranges from 2MiB (0x20 0000) to 4MiB (0x40 0000).

The second PMD range is in the mapping->page_tree, where a given file
offset is covered by a radix tree entry that spans from one 2MiB aligned
file offset to another 2MiB aligned file offset.

So, for example, the file offset for 3MiB (pgoff 768) falls within the
PMD range for the order 9 radix tree entry that ranges from 2MiB (pgoff
512) to 4MiB (pgoff 1024).

This system works so long as the addresses and file offsets for a given
mapping both have the same offsets relative to the start of each PMD.

Consider the case where the starting address for a given file isn't 2MiB
aligned - say our faulting address is 3 MiB (0x30 0000), but that
corresponds to the beginning of our file (pgoff 0).  Now all the PMDs in
the mapping are misaligned so that the 2MiB range defined in the page
tables never matches up with the 2MiB range defined in the radix tree.

The current code notices this case for DAX faults to storage with the
following test in dax_pmd_insert_mapping():

	if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR)
		goto unlock_fallback;

This test makes sure that the pfn we get from the driver is 2MiB
aligned, and relies on the assumption that the 2MiB alignment of the pfn
we get back from the driver matches the 2MiB alignment of the faulting
address.

However, faults to holes were not checked and we could hit the problem
described above.

This was reported in response to the NVML nvml/src/test/pmempool_sync
TEST5:

	$ cd nvml/src/test/pmempool_sync
	$ make TEST5

You can grab NVML here:

	https://github.com/pmem/nvml/

The dmesg warning you see when you hit this error is:

  WARNING: CPU: 13 PID: 2900 at fs/dax.c:641 dax_insert_mapping_entry+0x2df/0x310

Where we notice in dax_insert_mapping_entry() that the radix tree entry
we are about to replace doesn't match the locked entry that we had
previously inserted into the tree.  This happens because the initial
insertion was done in grab_mapping_entry() using a pgoff calculated from
the faulting address (vmf->address), and the replacement in
dax_pmd_load_hole() => dax_insert_mapping_entry() is done using
vmf->pgoff.

In our failure case those two page offsets (one calculated from
vmf->address, one using vmf->pgoff) point to different order 9 radix
tree entries.

This failure case can result in a deadlock because the radix tree unlock
also happens on the pgoff calculated from vmf->address.  This means that
the locked radix tree entry that we swapped in to the tree in
dax_insert_mapping_entry() using vmf->pgoff is never unlocked, so all
future faults to that 2MiB range will block forever.

Fix this by validating that the faulting address's PMD offset matches
the PMD offset from the start of the file.  This check is done at the
very beginning of the fault and covers faults that would have mapped to
storage as well as faults to holes.  I left the COLOUR check in
dax_pmd_insert_mapping() in place in case we ever hit the insanity
condition where the alignment of the pfn we get from the driver doesn't
match the alignment of the userspace address.

Link: http://lkml.kernel.org/r/20170822222436.18926-1-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reported-by: "Slusarz, Marcin" <marcin.slusarz@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index 306c2b603fb8..865d42c63e23 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1383,6 +1383,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
 
 	trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
 
+	/*
+	 * Make sure that the faulting address's PMD offset (color) matches
+	 * the PMD offset from the start of the file.  This is necessary so
+	 * that a PMD range in the page table overlaps exactly with a PMD
+	 * range in the radix tree.
+	 */
+	if ((vmf->pgoff & PG_PMD_COLOUR) !=
+	    ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
+		goto fallback;
+
 	/* Fall back to PTEs if we're going to COW */
 	if (write && !(vma->vm_flags & VM_SHARED))
 		goto fallback;

From 263630e8d176d87308481ebdcd78ef9426739c6b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 25 Aug 2017 15:55:39 -0700
Subject: [PATCH 85/98] mm/madvise.c: fix freeing of locked page with MADV_FREE

If madvise(..., MADV_FREE) split a transparent hugepage, it called
put_page() before unlock_page().

This was wrong because put_page() can free the page, e.g. if a
concurrent madvise(..., MADV_DONTNEED) has removed it from the memory
mapping. put_page() then rightfully complained about freeing a locked
page.

Fix this by moving the unlock_page() before put_page().

This bug was found by syzkaller, which encountered the following splat:

    BUG: Bad page state in process syzkaller412798  pfn:1bd800
    page:ffffea0006f60000 count:0 mapcount:0 mapping:          (null) index:0x20a00
    flags: 0x200000000040019(locked|uptodate|dirty|swapbacked)
    raw: 0200000000040019 0000000000000000 0000000000020a00 00000000ffffffff
    raw: ffffea0006f60020 ffffea0006f60020 0000000000000000 0000000000000000
    page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
    bad because of flags: 0x1(locked)
    Modules linked in:
    CPU: 1 PID: 3037 Comm: syzkaller412798 Not tainted 4.13.0-rc5+ #35
    Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
    Call Trace:
     __dump_stack lib/dump_stack.c:16 [inline]
     dump_stack+0x194/0x257 lib/dump_stack.c:52
     bad_page+0x230/0x2b0 mm/page_alloc.c:565
     free_pages_check_bad+0x1f0/0x2e0 mm/page_alloc.c:943
     free_pages_check mm/page_alloc.c:952 [inline]
     free_pages_prepare mm/page_alloc.c:1043 [inline]
     free_pcp_prepare mm/page_alloc.c:1068 [inline]
     free_hot_cold_page+0x8cf/0x12b0 mm/page_alloc.c:2584
     __put_single_page mm/swap.c:79 [inline]
     __put_page+0xfb/0x160 mm/swap.c:113
     put_page include/linux/mm.h:814 [inline]
     madvise_free_pte_range+0x137a/0x1ec0 mm/madvise.c:371
     walk_pmd_range mm/pagewalk.c:50 [inline]
     walk_pud_range mm/pagewalk.c:108 [inline]
     walk_p4d_range mm/pagewalk.c:134 [inline]
     walk_pgd_range mm/pagewalk.c:160 [inline]
     __walk_page_range+0xc3a/0x1450 mm/pagewalk.c:249
     walk_page_range+0x200/0x470 mm/pagewalk.c:326
     madvise_free_page_range.isra.9+0x17d/0x230 mm/madvise.c:444
     madvise_free_single_vma+0x353/0x580 mm/madvise.c:471
     madvise_dontneed_free mm/madvise.c:555 [inline]
     madvise_vma mm/madvise.c:664 [inline]
     SYSC_madvise mm/madvise.c:832 [inline]
     SyS_madvise+0x7d3/0x13c0 mm/madvise.c:760
     entry_SYSCALL_64_fastpath+0x1f/0xbe

Here is a C reproducer:

    #define _GNU_SOURCE
    #include <pthread.h>
    #include <sys/mman.h>
    #include <unistd.h>

    #define MADV_FREE	8
    #define PAGE_SIZE	4096

    static void *mapping;
    static const size_t mapping_size = 0x1000000;

    static void *madvise_thrproc(void *arg)
    {
        madvise(mapping, mapping_size, (long)arg);
    }

    int main(void)
    {
        pthread_t t[2];

        for (;;) {
            mapping = mmap(NULL, mapping_size, PROT_WRITE,
                           MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);

            munmap(mapping + mapping_size / 2, PAGE_SIZE);

            pthread_create(&t[0], 0, madvise_thrproc, (void*)MADV_DONTNEED);
            pthread_create(&t[1], 0, madvise_thrproc, (void*)MADV_FREE);
            pthread_join(t[0], NULL);
            pthread_join(t[1], NULL);
            munmap(mapping, mapping_size);
        }
    }

Note: to see the splat, CONFIG_TRANSPARENT_HUGEPAGE=y and
CONFIG_DEBUG_VM=y are needed.

Google Bug Id: 64696096

Link: http://lkml.kernel.org/r/20170823205235.132061-1-ebiggers3@gmail.com
Fixes: 854e9ed09ded ("mm: support madvise(MADV_FREE)")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>	[v4.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/madvise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 47d8d8a25eae..23ed525bc2bc 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -368,8 +368,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 				pte_offset_map_lock(mm, pmd, addr, &ptl);
 				goto out;
 			}
-			put_page(page);
 			unlock_page(page);
+			put_page(page);
 			pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 			pte--;
 			addr -= PAGE_SIZE;

From 2b7e8665b4ff51c034c55df3cff76518d1a9ee3a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 25 Aug 2017 15:55:43 -0700
Subject: [PATCH 86/98] fork: fix incorrect fput of ->exe_file causing
 use-after-free

Commit 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for
write killable") made it possible to kill a forking task while it is
waiting to acquire its ->mmap_sem for write, in dup_mmap().

However, it was overlooked that this introduced an new error path before
a reference is taken on the mm_struct's ->exe_file.  Since the
->exe_file of the new mm_struct was already set to the old ->exe_file by
the memcpy() in dup_mm(), it was possible for the mmput() in the error
path of dup_mm() to drop a reference to ->exe_file which was never
taken.

This caused the struct file to later be freed prematurely.

Fix it by updating mm_init() to NULL out the ->exe_file, in the same
place it clears other things like the list of mmaps.

This bug was found by syzkaller.  It can be reproduced using the
following C program:

    #define _GNU_SOURCE
    #include <pthread.h>
    #include <stdlib.h>
    #include <sys/mman.h>
    #include <sys/syscall.h>
    #include <sys/wait.h>
    #include <unistd.h>

    static void *mmap_thread(void *_arg)
    {
        for (;;) {
            mmap(NULL, 0x1000000, PROT_READ,
                 MAP_POPULATE|MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
        }
    }

    static void *fork_thread(void *_arg)
    {
        usleep(rand() % 10000);
        fork();
    }

    int main(void)
    {
        fork();
        fork();
        fork();
        for (;;) {
            if (fork() == 0) {
                pthread_t t;

                pthread_create(&t, NULL, mmap_thread, NULL);
                pthread_create(&t, NULL, fork_thread, NULL);
                usleep(rand() % 10000);
                syscall(__NR_exit_group, 0);
            }
            wait(NULL);
        }
    }

No special kernel config options are needed.  It usually causes a NULL
pointer dereference in __remove_shared_vm_struct() during exit, or in
dup_mmap() (which is usually inlined into copy_process()) during fork.
Both are due to a vm_area_struct's ->vm_file being used after it's
already been freed.

Google Bug Id: 64772007

Link: http://lkml.kernel.org/r/20170823211408.31198-1-ebiggers3@gmail.com
Fixes: 7c051267931a ("mm, fork: make dup_mmap wait for mmap_sem for write killable")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>	[v4.7+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/fork.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/fork.c b/kernel/fork.c
index e075b7780421..cbbea277b3fb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -806,6 +806,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_init_cpumask(mm);
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
+	RCU_INIT_POINTER(mm->exe_file, NULL);
 	mmu_notifier_mm_init(mm);
 	init_tlb_flush_pending(mm);
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS

From 91b540f98872a206ea1c49e4aa6ea8eed0886644 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Fri, 25 Aug 2017 15:55:46 -0700
Subject: [PATCH 87/98] mm/memblock.c: reversed logic in memblock_discard()

In recently introduced memblock_discard() there is a reversed logic bug.
Memory is freed of static array instead of dynamically allocated one.

Link: http://lkml.kernel.org/r/1503511441-95478-2-git-send-email-pasha.tatashin@oracle.com
Fixes: 3010f876500f ("mm: discard memblock data later")
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reported-by: Woody Suwalski <terraluna977@gmail.com>
Tested-by: Woody Suwalski <terraluna977@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index bf14aea6ab70..91205780e6b1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -299,7 +299,7 @@ void __init memblock_discard(void)
 		__memblock_free_late(addr, size);
 	}
 
-	if (memblock.memory.regions == memblock_memory_init_regions) {
+	if (memblock.memory.regions != memblock_memory_init_regions) {
 		addr = __pa(memblock.memory.regions);
 		size = PAGE_ALIGN(sizeof(struct memblock_region) *
 				  memblock.memory.max);

From 8e1101d251647802d0a4ae19eb3d0e1453eaeff4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Aug 2017 18:58:42 -0500
Subject: [PATCH 88/98] PCI/MSI: Don't warn when irq_create_affinity_masks()
 returns NULL

irq_create_affinity_masks() can return NULL on non-SMP systems, when there
are not enough "free" vectors available to spread, or if memory allocation
for the CPU masks fails.  Only the allocation failure is of interest, and
even then the system will work just fine except for non-optimally spread
vectors.  Thus remove the warnings.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 drivers/pci/msi.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 253d92409bb3..2225afc1cbbb 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -538,12 +538,9 @@ msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd)
 	struct msi_desc *entry;
 	u16 control;
 
-	if (affd) {
+	if (affd)
 		masks = irq_create_affinity_masks(nvec, affd);
-		if (!masks)
-			dev_err(&dev->dev, "can't allocate MSI affinity masks for %d vectors\n",
-				nvec);
-	}
+
 
 	/* MSI Entry Initialization */
 	entry = alloc_msi_entry(&dev->dev, nvec, masks);
@@ -679,12 +676,8 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 	struct msi_desc *entry;
 	int ret, i;
 
-	if (affd) {
+	if (affd)
 		masks = irq_create_affinity_masks(nvec, affd);
-		if (!masks)
-			dev_err(&dev->dev, "can't allocate MSI-X affinity masks for %d vectors\n",
-				nvec);
-	}
 
 	for (i = 0, curmsk = masks; i < nvec; i++) {
 		entry = alloc_msi_entry(&dev->dev, 1, curmsk);

From 0cc3b0ec23ce4c69e1e890ed2b8d2fa932b14aad Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 27 Aug 2017 12:12:25 -0700
Subject: [PATCH 89/98] Clarify (and fix) MAX_LFS_FILESIZE macros

We have a MAX_LFS_FILESIZE macro that is meant to be filled in by
filesystems (and other IO targets) that know they are 64-bit clean and
don't have any 32-bit limits in their IO path.

It turns out that our 32-bit value for that limit was bogus.  On 32-bit,
the VM layer is limited by the page cache to only 32-bit index values,
but our logic for that was confusing and actually wrong.  We used to
define that value to

	(((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1)

which is actually odd in several ways: it limits the index to 31 bits,
and then it limits files so that they can't have data in that last byte
of a page that has the highest 31-bit index (ie page index 0x7fffffff).

Neither of those limitations make sense.  The index is actually the full
32 bit unsigned value, and we can use that whole full page.  So the
maximum size of the file would logically be "PAGE_SIZE << BITS_PER_LONG".

However, we do wan tto avoid the maximum index, because we have code
that iterates over the page indexes, and we don't want that code to
overflow.  So the maximum size of a file on a 32-bit host should
actually be one page less than the full 32-bit index.

So the actual limit is ULONG_MAX << PAGE_SHIFT.  That means that we will
not actually be using the page of that last index (ULONG_MAX), but we
can grow a file up to that limit.

The wrong value of MAX_LFS_FILESIZE actually caused problems for Doug
Nazar, who was still using a 32-bit host, but with a 9.7TB 2 x RAID5
volume.  It turns out that our old MAX_LFS_FILESIZE was 8TiB (well, one
byte less), but the actual true VM limit is one page less than 16TiB.

This was invisible until commit c2a9737f45e2 ("vfs,mm: fix a dead loop
in truncate_inode_pages_range()"), which started applying that
MAX_LFS_FILESIZE limit to block devices too.

NOTE! On 64-bit, the page index isn't a limiter at all, and the limit is
actually just the offset type itself (loff_t), which is signed.  But for
clarity, on 64-bit, just use the maximum signed value, and don't make
people have to count the number of 'f' characters in the hex constant.

So just use LLONG_MAX for the 64-bit case.  That was what the value had
been before too, just written out as a hex constant.

Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()")
Reported-and-tested-by: Doug Nazar <nazard@nazar.ca>
Cc: Andreas Dilger <adilger@dilger.ca>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Dave Kleikamp <shaggy@kernel.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6e1fd5d21248..cbfe127bccf8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -907,9 +907,9 @@ static inline struct file *get_file(struct file *f)
 /* Page cache limit. The filesystems should put that into their s_maxbytes 
    limits, otherwise bad things can happen in VM. */ 
 #if BITS_PER_LONG==32
-#define MAX_LFS_FILESIZE	(((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1)
+#define MAX_LFS_FILESIZE	((loff_t)ULONG_MAX << PAGE_SHIFT)
 #elif BITS_PER_LONG==64
-#define MAX_LFS_FILESIZE 	((loff_t)0x7fffffffffffffffLL)
+#define MAX_LFS_FILESIZE 	((loff_t)LLONG_MAX)
 #endif
 
 #define FL_POSIX	1

From 3510ca20ece0150af6b10c77a74ff1b5c198e3e2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 27 Aug 2017 13:55:12 -0700
Subject: [PATCH 90/98] Minor page waitqueue cleanups

Tim Chen and Kan Liang have been battling a customer load that shows
extremely long page wakeup lists.  The cause seems to be constant NUMA
migration of a hot page that is shared across a lot of threads, but the
actual root cause for the exact behavior has not been found.

Tim has a patch that batches the wait list traversal at wakeup time, so
that we at least don't get long uninterruptible cases where we traverse
and wake up thousands of processes and get nasty latency spikes.  That
is likely 4.14 material, but we're still discussing the page waitqueue
specific parts of it.

In the meantime, I've tried to look at making the page wait queues less
expensive, and failing miserably.  If you have thousands of threads
waiting for the same page, it will be painful.  We'll need to try to
figure out the NUMA balancing issue some day, in addition to avoiding
the excessive spinlock hold times.

That said, having tried to rewrite the page wait queues, I can at least
fix up some of the braindamage in the current situation. In particular:

 (a) we don't want to continue walking the page wait list if the bit
     we're waiting for already got set again (which seems to be one of
     the patterns of the bad load).  That makes no progress and just
     causes pointless cache pollution chasing the pointers.

 (b) we don't want to put the non-locking waiters always on the front of
     the queue, and the locking waiters always on the back.  Not only is
     that unfair, it means that we wake up thousands of reading threads
     that will just end up being blocked by the writer later anyway.

Also add a comment about the layout of 'struct wait_page_key' - there is
an external user of it in the cachefiles code that means that it has to
match the layout of 'struct wait_bit_key' in the two first members.  It
so happens to match, because 'struct page *' and 'unsigned long *' end
up having the same values simply because the page flags are the first
member in struct page.

Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Christopher Lameter <cl@linux.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sched/wait.c |  7 ++++---
 mm/filemap.c        | 11 ++++++-----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 17f11c6b0a9f..d6afed6d0752 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -70,9 +70,10 @@ static void __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
 
 	list_for_each_entry_safe(curr, next, &wq_head->head, entry) {
 		unsigned flags = curr->flags;
-
-		if (curr->func(curr, mode, wake_flags, key) &&
-				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
+		int ret = curr->func(curr, mode, wake_flags, key);
+		if (ret < 0)
+			break;
+		if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
 			break;
 	}
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index a49702445ce0..baba290c276b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -885,6 +885,7 @@ void __init pagecache_init(void)
 	page_writeback_init();
 }
 
+/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
 struct wait_page_key {
 	struct page *page;
 	int bit_nr;
@@ -909,8 +910,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 
 	if (wait_page->bit_nr != key->bit_nr)
 		return 0;
+
+	/* Stop walking if it's locked */
 	if (test_bit(key->bit_nr, &key->page->flags))
-		return 0;
+		return -1;
 
 	return autoremove_wake_function(wait, mode, sync, key);
 }
@@ -964,6 +967,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 	int ret = 0;
 
 	init_wait(wait);
+	wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0;
 	wait->func = wake_page_function;
 	wait_page.page = page;
 	wait_page.bit_nr = bit_nr;
@@ -972,10 +976,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 		spin_lock_irq(&q->lock);
 
 		if (likely(list_empty(&wait->entry))) {
-			if (lock)
-				__add_wait_queue_entry_tail_exclusive(q, wait);
-			else
-				__add_wait_queue(q, wait);
+			__add_wait_queue_entry_tail(q, wait);
 			SetPageWaiters(page);
 		}
 

From a8b169afbf06a678437632709caac98e16f99263 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 27 Aug 2017 16:25:09 -0700
Subject: [PATCH 91/98] Avoid page waitqueue race leaving possible page locker
 waiting

The "lock_page_killable()" function waits for exclusive access to the
page lock bit using the WQ_FLAG_EXCLUSIVE bit in the waitqueue entry
set.

That means that if it gets woken up, other waiters may have been
skipped.

That, in turn, means that if it sees the page being unlocked, it *must*
take that lock and return success, even if a lethal signal is also
pending.

So instead of checking for lethal signals first, we need to check for
them after we've checked the actual bit that we were waiting for.  Even
if that might then delay the killing of the process.

This matches the order of the old "wait_on_bit_lock()" infrastructure
that the page locking used to use (and is still used in a few other
areas).

Note that if we still return an error after having unsuccessfully tried
to acquire the page lock, that is ok: that means that some other thread
was able to get ahead of us and lock the page, and when that other
thread then unlocks the page, the wakeup event will be repeated.  So any
other pending waiters will now get properly woken up.

Fixes: 62906027091f ("mm: add PageWaiters indicating tasks are waiting for a page bit")
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Jan Kara <jack@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index baba290c276b..0b41c8cbeabc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -986,10 +986,6 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 
 		if (likely(test_bit(bit_nr, &page->flags))) {
 			io_schedule();
-			if (unlikely(signal_pending_state(state, current))) {
-				ret = -EINTR;
-				break;
-			}
 		}
 
 		if (lock) {
@@ -999,6 +995,11 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 			if (!test_bit(bit_nr, &page->flags))
 				break;
 		}
+
+		if (unlikely(signal_pending_state(state, current))) {
+			ret = -EINTR;
+			break;
+		}
 	}
 
 	finish_wait(q, wait);

From cc4a41fe5541a73019a864883297bd5043aa6d98 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 27 Aug 2017 17:20:40 -0700
Subject: [PATCH 92/98] Linux 4.13-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index dda88e744d5f..8db6be7dca73 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From 79de3cbe9a974e03a02b71da80da9ee0eb15a2d0 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 23 Aug 2017 22:37:00 +0200
Subject: [PATCH 93/98] fs/select: Fix memory corruption in compat_get_fd_set()

Commit 464d62421cb8 ("select: switch compat_{get,put}_fd_set() to
compat_{get,put}_bitmap()") changed the calculation on how many bytes
need to be zeroed when userspace handed over a NULL pointer for a fdset
array in the select syscall.

The calculation was changed in compat_get_fd_set() wrongly from
	memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t));
to
	memset(fdset, 0, ALIGN(nr, BITS_PER_LONG));

The ALIGN(nr, BITS_PER_LONG) calculates the number of _bits_ which need
to be zeroed in the target fdset array (rounded up to the next full bits
for an unsigned long).

But the memset() call expects the number of _bytes_ to be zeroed.

This leads to clearing more memory than wanted (on the stack area or
even at kmalloc()ed memory areas) and to random kernel crashes as we
have seen them on the parisc platform.

The correct change should have been

	memset(fdset, 0, (ALIGN(nr, BITS_PER_LONG) / BITS_PER_LONG) * BYTES_PER_LONG);

which is the same as can be archieved with a call to

	zero_fd_set(nr, fdset).

Fixes: 464d62421cb8 ("select: switch compat_{get,put}_fd_set() to compat_{get,put}_bitmap()"
Acked-by:: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/select.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/select.c b/fs/select.c
index 9d5f15ed87fe..c6362e38ae92 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1164,11 +1164,7 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
 	if (ufdset) {
 		return compat_get_bitmap(fdset, ufdset, nr);
 	} else {
-		/* Tricky, must clear full unsigned long in the
-		 * kernel fdset at the end, ALIGN makes sure that
-		 * actually happens.
-		 */
-		memset(fdset, 0, ALIGN(nr, BITS_PER_LONG));
+		zero_fd_set(nr, fdset);
 		return 0;
 	}
 }

From e8206d2baa41a4c7cf4590929f8819f8309b564d Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Mon, 28 Aug 2017 15:03:58 -0700
Subject: [PATCH 94/98] ARCv2: SMP: Mask only private-per-core IRQ lines on
 boot at core intc

Recent commit a8ec3ee861b6 "arc: Mask individual IRQ lines during core
INTC init" breaks interrupt handling on ARCv2 SMP systems.

That commit masked all interrupts at onset, as some controllers on some
boards (customer as well as internal), would assert interrutps early
before any handlers were installed.  For SMP systems, the masking was
done at each cpu's core-intc.  Later, when the IRQ was actually
requested, it was unmasked, but only on the requesting cpu.

For "common" interrupts, which were wired up from the 2nd level IDU
intc, this was as issue as they needed to be enabled on ALL the cpus
(given that IDU IRQs are by default served Round Robin across cpus)

So fix that by NOT masking "common" interrupts at core-intc, but instead
at the 2nd level IDU intc (latter already being done in idu_of_init())

Fixes: a8ec3ee861b6 ("arc: Mask individual IRQ lines during core INTC init")
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
[vgupta: reworked changelog, removed the extraneous idu_irq_mask_raw()]
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/kernel/intc-arcv2.c   | 11 +++++++++--
 arch/arc/kernel/intc-compact.c |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index cf90714a676d..067ea362fb3e 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -75,13 +75,20 @@ void arc_init_IRQ(void)
 	 * Set a default priority for all available interrupts to prevent
 	 * switching of register banks if Fast IRQ and multiple register banks
 	 * are supported by CPU.
-	 * Also disable all IRQ lines so faulty external hardware won't
+	 * Also disable private-per-core IRQ lines so faulty external HW won't
 	 * trigger interrupt that kernel is not ready to handle.
 	 */
 	for (i = NR_EXCEPTIONS; i < irq_bcr.irqs + NR_EXCEPTIONS; i++) {
 		write_aux_reg(AUX_IRQ_SELECT, i);
 		write_aux_reg(AUX_IRQ_PRIORITY, ARCV2_IRQ_DEF_PRIO);
-		write_aux_reg(AUX_IRQ_ENABLE, 0);
+
+		/*
+		 * Only mask cpu private IRQs here.
+		 * "common" interrupts are masked at IDU, otherwise it would
+		 * need to be unmasked at each cpu, with IPIs
+		 */
+		if (i < FIRST_EXT_IRQ)
+			write_aux_reg(AUX_IRQ_ENABLE, 0);
 	}
 
 	/* setup status32, don't enable intr yet as kernel doesn't want */
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index cef388025adf..47b421fa0147 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -27,7 +27,7 @@
  */
 void arc_init_IRQ(void)
 {
-	int level_mask = 0, i;
+	unsigned int level_mask = 0, i;
 
        /* Is timer high priority Interrupt (Level2 in ARCompact jargon) */
 	level_mask |= IS_ENABLED(CONFIG_ARC_COMPACT_IRQ_LEVELS) << TIMER0_IRQ;

From b339752d054fb32863418452dff350a1086885b1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 28 Aug 2017 14:51:27 -0700
Subject: [PATCH 95/98] cpumask: fix spurious cpumask_of_node() on non-NUMA
 multi-node configs

When !NUMA, cpumask_of_node(@node) equals cpu_online_mask regardless of
@node.  The assumption seems that if !NUMA, there shouldn't be more than
one node and thus reporting cpu_online_mask regardless of @node is
correct.  However, that assumption was broken years ago to support
DISCONTIGMEM and whether a system has multiple nodes or not is
separately controlled by NEED_MULTIPLE_NODES.

This means that, on a system with !NUMA && NEED_MULTIPLE_NODES,
cpumask_of_node() will report cpu_online_mask for all possible nodes,
indicating that the CPUs are associated with multiple nodes which is an
impossible configuration.

This bug has been around forever but doesn't look like it has caused any
noticeable symptoms.  However, it triggers a WARN recently added to
workqueue to verify NUMA affinity configuration.

Fix it by reporting empty cpumask on non-zero nodes if !NUMA.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/topology.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index fc824e2828f3..5d2add1a6c96 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -48,7 +48,11 @@
 #define parent_node(node)	((void)(node),0)
 #endif
 #ifndef cpumask_of_node
-#define cpumask_of_node(node)	((void)node, cpu_online_mask)
+  #ifdef CONFIG_NEED_MULTIPLE_NODES
+    #define cpumask_of_node(node)	((node) == 0 ? cpu_online_mask : cpu_none_mask)
+  #else
+    #define cpumask_of_node(node)	((void)node, cpu_online_mask)
+  #endif
 #endif
 #ifndef pcibus_to_node
 #define pcibus_to_node(bus)	((void)(bus), -1)

From 9c3a815f471a84811cf8021cf64aae3b8081dfde Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 28 Aug 2017 16:45:40 -0700
Subject: [PATCH 96/98] page waitqueue: always add new entries at the end

Commit 3510ca20ece0 ("Minor page waitqueue cleanups") made the page
queue code always add new waiters to the back of the queue, which helps
upcoming patches to batch the wakeups for some horrid loads where the
wait queues grow to thousands of entries.

However, I forgot about the nasrt add_page_wait_queue() special case
code that is only used by the cachefiles code.  That one still continued
to add the new wait queue entries at the beginning of the list.

Fix it, because any sane batched wakeup will require that we don't
suddenly start getting new entries at the beginning of the list that we
already handled in a previous batch.

[ The current code always does the whole list while holding the lock, so
  wait queue ordering doesn't matter for correctness, but even then it's
  better to add later entries at the end from a fairness standpoint ]

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/filemap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 0b41c8cbeabc..65b4b6e7f7bd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1041,7 +1041,7 @@ void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
 	unsigned long flags;
 
 	spin_lock_irqsave(&q->lock, flags);
-	__add_wait_queue(q, waiter);
+	__add_wait_queue_entry_tail(q, waiter);
 	SetPageWaiters(page);
 	spin_unlock_irqrestore(&q->lock, flags);
 }

From f12f42acdbb577a12eecfcebbbec41c81505c4dc Mon Sep 17 00:00:00 2001
From: Meng Xu <mengxu.gatech@gmail.com>
Date: Wed, 23 Aug 2017 17:07:50 -0400
Subject: [PATCH 97/98] perf/core: Fix potential double-fetch bug

While examining the kernel source code, I found a dangerous operation that
could turn into a double-fetch situation (a race condition bug) where the same
userspace memory region are fetched twice into kernel with sanity checks after
the first fetch while missing checks after the second fetch.

  1. The first fetch happens in line 9573 get_user(size, &uattr->size).

  2. Subsequently the 'size' variable undergoes a few sanity checks and
     transformations (line 9577 to 9584).

  3. The second fetch happens in line 9610 copy_from_user(attr, uattr, size)

  4. Given that 'uattr' can be fully controlled in userspace, an attacker can
     race condition to override 'uattr->size' to arbitrary value (say, 0xFFFFFFFF)
     after the first fetch but before the second fetch. The changed value will be
     copied to 'attr->size'.

  5. There is no further checks on 'attr->size' until the end of this function,
     and once the function returns, we lose the context to verify that 'attr->size'
     conforms to the sanity checks performed in step 2 (line 9577 to 9584).

  6. My manual analysis shows that 'attr->size' is not used elsewhere later,
     so, there is no working exploit against it right now. However, this could
     easily turns to an exploitable one if careless developers start to use
     'attr->size' later.

To fix this, override 'attr->size' from the second fetch to the one from the
first fetch, regardless of what is actually copied in.

In this way, it is assured that 'attr->size' is consistent with the checks
performed after the first fetch.

Signed-off-by: Meng Xu <mengxu.gatech@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: alexander.shishkin@linux.intel.com
Cc: meng.xu@gatech.edu
Cc: sanidhya@gatech.edu
Cc: taesoo@gatech.edu
Link: http://lkml.kernel.org/r/1503522470-35531-1-git-send-email-meng.xu@gatech.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3504125871d2..ce131d25622a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9611,6 +9611,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 	if (ret)
 		return -EFAULT;
 
+	attr->size = size;
+
 	if (attr->__reserved_1)
 		return -EINVAL;
 

From 75e8387685f6c65feb195a4556110b58f852b848 Mon Sep 17 00:00:00 2001
From: Zhou Chengming <zhouchengming1@huawei.com>
Date: Fri, 25 Aug 2017 21:49:37 +0800
Subject: [PATCH 98/98] perf/ftrace: Fix double traces of perf on
 ftrace:function

When running perf on the ftrace:function tracepoint, there is a bug
which can be reproduced by:

  perf record -e ftrace:function -a sleep 20 &
  perf record -e ftrace:function ls
  perf script

              ls 10304 [005]   171.853235: ftrace:function:
  perf_output_begin
              ls 10304 [005]   171.853237: ftrace:function:
  perf_output_begin
              ls 10304 [005]   171.853239: ftrace:function:
  task_tgid_nr_ns
              ls 10304 [005]   171.853240: ftrace:function:
  task_tgid_nr_ns
              ls 10304 [005]   171.853242: ftrace:function:
  __task_pid_nr_ns
              ls 10304 [005]   171.853244: ftrace:function:
  __task_pid_nr_ns

We can see that all the function traces are doubled.

The problem is caused by the inconsistency of the register
function perf_ftrace_event_register() with the probe function
perf_ftrace_function_call(). The former registers one probe
for every perf_event. And the latter handles all perf_events
on the current cpu. So when two perf_events on the current cpu,
the traces of them will be doubled.

So this patch adds an extra parameter "event" for perf_tp_event,
only send sample data to this event when it's not NULL.

Signed-off-by: Zhou Chengming <zhouchengming1@huawei.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: alexander.shishkin@linux.intel.com
Cc: huawei.libin@huawei.com
Link: http://lkml.kernel.org/r/1503668977-12526-1-git-send-email-zhouchengming1@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h      |  2 +-
 include/linux/trace_events.h    |  4 ++--
 kernel/events/core.c            | 13 +++++++++----
 kernel/trace/trace_event_perf.c |  4 +++-
 kernel/trace/trace_kprobe.c     |  4 ++--
 kernel/trace/trace_syscalls.c   |  4 ++--
 kernel/trace/trace_uprobe.c     |  2 +-
 7 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b14095bcf4bb..c00cd4b02f32 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1201,7 +1201,7 @@ extern void perf_event_init(void);
 extern void perf_tp_event(u16 event_type, u64 count, void *record,
 			  int entry_size, struct pt_regs *regs,
 			  struct hlist_head *head, int rctx,
-			  struct task_struct *task);
+			  struct task_struct *task, struct perf_event *event);
 extern void perf_bp_event(struct perf_event *event, void *data);
 
 #ifndef perf_misc_flags
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 536c80ff7ad9..5012b524283d 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -508,9 +508,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
 static inline void
 perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
 		       u64 count, struct pt_regs *regs, void *head,
-		       struct task_struct *task)
+		       struct task_struct *task, struct perf_event *event)
 {
-	perf_tp_event(type, count, raw_data, size, regs, head, rctx, task);
+	perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event);
 }
 #endif
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ce131d25622a..03ac9c8b02fb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7906,16 +7906,15 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
 		}
 	}
 	perf_tp_event(call->event.type, count, raw_data, size, regs, head,
-		      rctx, task);
+		      rctx, task, NULL);
 }
 EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit);
 
 void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
 		   struct pt_regs *regs, struct hlist_head *head, int rctx,
-		   struct task_struct *task)
+		   struct task_struct *task, struct perf_event *event)
 {
 	struct perf_sample_data data;
-	struct perf_event *event;
 
 	struct perf_raw_record raw = {
 		.frag = {
@@ -7929,9 +7928,15 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
 
 	perf_trace_buf_update(record, event_type);
 
-	hlist_for_each_entry_rcu(event, head, hlist_entry) {
+	/* Use the given event instead of the hlist */
+	if (event) {
 		if (perf_tp_event_match(event, &data, regs))
 			perf_swevent_event(event, count, &data, regs);
+	} else {
+		hlist_for_each_entry_rcu(event, head, hlist_entry) {
+			if (perf_tp_event_match(event, &data, regs))
+				perf_swevent_event(event, count, &data, regs);
+		}
 	}
 
 	/*
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 562fa69df5d3..13ba2d3f6a91 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -306,6 +306,7 @@ static void
 perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
 			  struct ftrace_ops *ops, struct pt_regs *pt_regs)
 {
+	struct perf_event *event;
 	struct ftrace_entry *entry;
 	struct hlist_head *head;
 	struct pt_regs regs;
@@ -329,8 +330,9 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
 
 	entry->ip = ip;
 	entry->parent_ip = parent_ip;
+	event = container_of(ops, struct perf_event, ftrace_ops);
 	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
-			      1, &regs, head, NULL);
+			      1, &regs, head, NULL, event);
 
 #undef ENTRY_SIZE
 }
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index c9b5aa10fbf9..8a907e12b6b9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1200,7 +1200,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 	memset(&entry[1], 0, dsize);
 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
-			      head, NULL);
+			      head, NULL, NULL);
 }
 NOKPROBE_SYMBOL(kprobe_perf_func);
 
@@ -1236,7 +1236,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 	entry->ret_ip = (unsigned long)ri->ret_addr;
 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
-			      head, NULL);
+			      head, NULL, NULL);
 }
 NOKPROBE_SYMBOL(kretprobe_perf_func);
 #endif	/* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5e10395da88e..74d9a86eccc0 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -596,7 +596,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 			       (unsigned long *)&rec->args);
 	perf_trace_buf_submit(rec, size, rctx,
 			      sys_data->enter_event->event.type, 1, regs,
-			      head, NULL);
+			      head, NULL, NULL);
 }
 
 static int perf_sysenter_enable(struct trace_event_call *call)
@@ -667,7 +667,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
 	perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
-			      1, regs, head, NULL);
+			      1, regs, head, NULL, NULL);
 }
 
 static int perf_sysexit_enable(struct trace_event_call *call)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index a7581fec9681..4525e0271a53 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1156,7 +1156,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
 	}
 
 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
-			      head, NULL);
+			      head, NULL, NULL);
  out:
 	preempt_enable();
 }