From 24e0e61db3cb86a66824531989f1df80e0939f26 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 4 Sep 2023 22:42:56 +0200 Subject: [PATCH 1/7] ata: libata: disallow dev-initiated LPM transitions to unsupported states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In AHCI 1.3.1, the register description for CAP.SSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Slumber state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Slumber requests." In AHCI 1.3.1, the register description for CAP.PSC: "When cleared to ‘0’, software must not allow the HBA to initiate transitions to the Partial state via agressive link power management nor the PxCMD.ICC field in each port, and the PxSCTL.IPM field in each port must be programmed to disallow device initiated Partial requests." Ensure that we always set the corresponding bits in PxSCTL.IPM, such that a device is not allowed to initiate transitions to power states which are unsupported by the HBA. DevSleep is always initiated by the HBA, however, for completeness, set the corresponding bit in PxSCTL.IPM such that agressive link power management cannot transition to DevSleep if DevSleep is not supported. sata_link_scr_lpm() is used by libahci, ata_piix and libata-pmp. However, only libahci has the ability to read the CAP/CAP2 register to see if these features are supported. Therefore, in order to not introduce any regressions on ata_piix or libata-pmp, create flags that indicate that the respective feature is NOT supported. This way, the behavior for ata_piix and libata-pmp should remain unchanged. This change is based on a patch originally submitted by Runa Guo-oc. Signed-off-by: Niklas Cassel Fixes: 1152b2617a6e ("libata: implement sata_link_scr_lpm() and make ata_dev_set_feature() global") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 9 +++++++++ drivers/ata/libata-sata.c | 19 ++++++++++++++++--- include/linux/libata.h | 4 ++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index abb5911c9d09..08745e7db820 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1883,6 +1883,15 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) else dev_info(&pdev->dev, "SSS flag set, parallel bus scan disabled\n"); + if (!(hpriv->cap & HOST_CAP_PART)) + host->flags |= ATA_HOST_NO_PART; + + if (!(hpriv->cap & HOST_CAP_SSC)) + host->flags |= ATA_HOST_NO_SSC; + + if (!(hpriv->cap2 & HOST_CAP2_SDS)) + host->flags |= ATA_HOST_NO_DEVSLP; + if (pi.flags & ATA_FLAG_EM) ahci_reset_em(host); diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 5d31c08be013..a701e1538482 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -396,10 +396,23 @@ int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, case ATA_LPM_MED_POWER_WITH_DIPM: case ATA_LPM_MIN_POWER_WITH_PARTIAL: case ATA_LPM_MIN_POWER: - if (ata_link_nr_enabled(link) > 0) - /* no restrictions on LPM transitions */ + if (ata_link_nr_enabled(link) > 0) { + /* assume no restrictions on LPM transitions */ scontrol &= ~(0x7 << 8); - else { + + /* + * If the controller does not support partial, slumber, + * or devsleep, then disallow these transitions. + */ + if (link->ap->host->flags & ATA_HOST_NO_PART) + scontrol |= (0x1 << 8); + + if (link->ap->host->flags & ATA_HOST_NO_SSC) + scontrol |= (0x2 << 8); + + if (link->ap->host->flags & ATA_HOST_NO_DEVSLP) + scontrol |= (0x4 << 8); + } else { /* empty port, power off */ scontrol &= ~0xf; scontrol |= (0x1 << 2); diff --git a/include/linux/libata.h b/include/linux/libata.h index 52d58b13e5ee..bf4913f4d7ac 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -222,6 +222,10 @@ enum { ATA_HOST_PARALLEL_SCAN = (1 << 2), /* Ports on this host can be scanned in parallel */ ATA_HOST_IGNORE_ATA = (1 << 3), /* Ignore ATA devices on this host. */ + ATA_HOST_NO_PART = (1 << 4), /* Host does not support partial */ + ATA_HOST_NO_SSC = (1 << 5), /* Host does not support slumber */ + ATA_HOST_NO_DEVSLP = (1 << 6), /* Host does not support devslp */ + /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ From e97eb65dd464e7f118a16a26337322d07eb653e2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 4 Sep 2023 21:54:36 +0200 Subject: [PATCH 2/7] ata: sata_mv: Fix incorrect string length computation in mv_dump_mem() snprintf() returns the "number of characters which *would* be generated for the given input", not the size *really* generated. In order to avoid too large values for 'o' (and potential negative values for "sizeof(linebuf) o") use scnprintf() instead of snprintf(). Note that given the "w < 4" in the for loop, the buffer can NOT overflow, but using the *right* function is always better. Signed-off-by: Christophe JAILLET Signed-off-by: Damien Le Moal --- drivers/ata/sata_mv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index d105db5c7d81..45e48d653c60 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -1255,8 +1255,8 @@ static void mv_dump_mem(struct device *dev, void __iomem *start, unsigned bytes) for (b = 0; b < bytes; ) { for (w = 0, o = 0; b < bytes && w < 4; w++) { - o += snprintf(linebuf + o, sizeof(linebuf) - o, - "%08x ", readl(start + b)); + o += scnprintf(linebuf + o, sizeof(linebuf) - o, + "%08x ", readl(start + b)); b += sizeof(u32); } dev_dbg(dev, "%s: %p: %s\n", From 737dd811a3dbfd7edd4ad2ba5152e93d99074f83 Mon Sep 17 00:00:00 2001 From: Szuying Chen Date: Thu, 7 Sep 2023 16:17:10 +0800 Subject: [PATCH 3/7] ata: libahci: clear pending interrupt status When a CRC error occurs, the HBA asserts an interrupt to indicate an interface fatal error (PxIS.IFS). The ISR clears PxIE and PxIS, then does error recovery. If the adapter receives another SDB FIS with an error (PxIS.TFES) from the device before the start of the EH recovery process, the interrupt signaling the new SDB cannot be serviced as PxIE was cleared already. This in turn results in the HBA inability to issue any command during the error recovery process after setting PxCMD.ST to 1 because PxIS.TFES is still set. According to AHCI 1.3.1 specifications section 6.2.2, fatal errors notified by setting PxIS.HBFS, PxIS.HBDS, PxIS.IFS or PxIS.TFES will cause the HBA to enter the ERR:Fatal state. In this state, the HBA shall not issue any new commands. To avoid this situation, introduce the function ahci_port_clear_pending_irq() to clear pending interrupts before executing a COMRESET. This follows the AHCI 1.3.1 - section 6.2.2.2 specification. Signed-off-by: Szuying Chen Fixes: e0bfd149973d ("[PATCH] ahci: stop engine during hard reset") Cc: stable@vger.kernel.org Reviewed-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libahci.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index e2bacedf28ef..f1263364fa97 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1256,6 +1256,26 @@ static ssize_t ahci_activity_show(struct ata_device *dev, char *buf) return sprintf(buf, "%d\n", emp->blink_policy); } +static void ahci_port_clear_pending_irq(struct ata_port *ap) +{ + struct ahci_host_priv *hpriv = ap->host->private_data; + void __iomem *port_mmio = ahci_port_base(ap); + u32 tmp; + + /* clear SError */ + tmp = readl(port_mmio + PORT_SCR_ERR); + dev_dbg(ap->host->dev, "PORT_SCR_ERR 0x%x\n", tmp); + writel(tmp, port_mmio + PORT_SCR_ERR); + + /* clear port IRQ */ + tmp = readl(port_mmio + PORT_IRQ_STAT); + dev_dbg(ap->host->dev, "PORT_IRQ_STAT 0x%x\n", tmp); + if (tmp) + writel(tmp, port_mmio + PORT_IRQ_STAT); + + writel(1 << ap->port_no, hpriv->mmio + HOST_IRQ_STAT); +} + static void ahci_port_init(struct device *dev, struct ata_port *ap, int port_no, void __iomem *mmio, void __iomem *port_mmio) @@ -1270,18 +1290,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, if (rc) dev_warn(dev, "%s (%d)\n", emsg, rc); - /* clear SError */ - tmp = readl(port_mmio + PORT_SCR_ERR); - dev_dbg(dev, "PORT_SCR_ERR 0x%x\n", tmp); - writel(tmp, port_mmio + PORT_SCR_ERR); - - /* clear port IRQ */ - tmp = readl(port_mmio + PORT_IRQ_STAT); - dev_dbg(dev, "PORT_IRQ_STAT 0x%x\n", tmp); - if (tmp) - writel(tmp, port_mmio + PORT_IRQ_STAT); - - writel(1 << port_no, mmio + HOST_IRQ_STAT); + ahci_port_clear_pending_irq(ap); /* mark esata ports */ tmp = readl(port_mmio + PORT_CMD); @@ -1603,6 +1612,8 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, tf.status = ATA_BUSY; ata_tf_to_fis(&tf, 0, 0, d2h_fis); + ahci_port_clear_pending_irq(ap); + rc = sata_link_hardreset(link, timing, deadline, online, ahci_check_ready); From e3da4c401f2d088cf049769eb1e39c299867ee9d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 15 Sep 2023 11:33:12 +0900 Subject: [PATCH 4/7] ata: pata_parport: Fix code style issues Fix indentation and other code style issues in the comm.c file. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202309150646.n3iBvbPj-lkp@intel.com/ Signed-off-by: Damien Le Moal --- drivers/ata/pata_parport/comm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/ata/pata_parport/comm.c b/drivers/ata/pata_parport/comm.c index 4839becbbd56..94b8d352102e 100644 --- a/drivers/ata/pata_parport/comm.c +++ b/drivers/ata/pata_parport/comm.c @@ -37,7 +37,7 @@ static int comm_read_regr(struct pi_adapter *pi, int cont, int regr) { int l, h, r; - r = regr + cont_map[cont]; + r = regr + cont_map[cont]; switch (pi->mode) { case 0: @@ -90,7 +90,6 @@ static void comm_connect(struct pi_adapter *pi) } static void comm_disconnect(struct pi_adapter *pi) - { w2(0); w2(0); w2(0); w2(4); w0(pi->saved_r0); @@ -172,12 +171,12 @@ static void comm_write_block(struct pi_adapter *pi, char *buf, int count) w4l(swab16(((u16 *)buf)[2 * k]) | swab16(((u16 *)buf)[2 * k + 1]) << 16); break; - } + } } static void comm_log_adapter(struct pi_adapter *pi) - -{ char *mode_string[5] = { "4-bit", "8-bit", "EPP-8", "EPP-16", "EPP-32" }; +{ + char *mode_string[5] = { "4-bit", "8-bit", "EPP-8", "EPP-16", "EPP-32" }; dev_info(&pi->dev, "DataStor Commuter at 0x%x, mode %d (%s), delay %d\n", From 80cc944eca4f0baa9c381d0706f3160e491437f2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Sep 2023 00:19:16 +0200 Subject: [PATCH 5/7] ata: libata-eh: do not clear ATA_PFLAG_EH_PENDING in ata_eh_reset() ata_scsi_port_error_handler() starts off by clearing ATA_PFLAG_EH_PENDING, before calling ap->ops->error_handler() (without holding the ap->lock). If an error IRQ is received while ap->ops->error_handler() is running, the irq handler will set ATA_PFLAG_EH_PENDING. Once ap->ops->error_handler() returns, ata_scsi_port_error_handler() checks if ATA_PFLAG_EH_PENDING is set, and if it is, another iteration of ATA EH is performed. The problem is that ATA_PFLAG_EH_PENDING is not only cleared by ata_scsi_port_error_handler(), it is also cleared by ata_eh_reset(). ata_eh_reset() is called by ap->ops->error_handler(). This additional clearing done by ata_eh_reset() breaks the whole retry logic in ata_scsi_port_error_handler(). Thus, if an error IRQ is received while ap->ops->error_handler() is running, the port will currently remain frozen and will never get re-enabled. The additional clearing in ata_eh_reset() was introduced in commit 1e641060c4b5 ("libata: clear eh_info on reset completion"). Looking at the original error report: https://marc.info/?l=linux-ide&m=124765325828495&w=2 We can see the following happening: [ 1.074659] ata3: XXX port freeze [ 1.074700] ata3: XXX hardresetting link, stopping engine [ 1.074746] ata3: XXX flipping SControl [ 1.411471] ata3: XXX irq_stat=400040 CONN|PHY [ 1.411475] ata3: XXX port freeze [ 1.420049] ata3: XXX starting engine [ 1.420096] ata3: XXX rc=0, class=1 [ 1.420142] ata3: XXX clearing IRQs for thawing [ 1.420188] ata3: XXX port thawed [ 1.420234] ata3: SATA link up 3.0 Gbps (SStatus 123 SControl 300) We are not supposed to be able to receive an error IRQ while the port is frozen (PxIE is set to 0, i.e. all IRQs for the port are disabled). AHCI 1.3.1 section 10.7.1.1 First Tier (IS Register) states: "Each bit location can be thought of as reporting a '1' if the virtual "interrupt line" for that port is indicating it wishes to generate an interrupt. That is, if a port has one or more interrupt status bit set, and the enables for those status bits are set, then this bit shall be set." Additionally, AHCI state P:ComInit clearly shows that the state machine will only jump to P:ComInitSetIS (which sets IS.IPS(x) to '1'), if PxIE.PCE is set to '1'. In our case, PxIE is set to 0, so IS.IPS(x) won't get set. So IS.IPS(x) only gets set if PxIS and PxIE is set. AHCI 1.3.1 section 10.7.1.1 First Tier (IS Register) also states: "The bits in this register are read/write clear. It is set by the level of the virtual interrupt line being a set, and cleared by a write of '1' from the software." So if IS.IPS(x) is set, you need to explicitly clear it by writing a 1 to IS.IPS(x) for that port. Since PxIE is cleared, the only way to get an interrupt while the port is frozen, is if IS.IPS(x) is set, and the only way IS.IPS(x) can be set when the port is frozen, is if it was set before the port was frozen. However, since commit 737dd811a3db ("ata: libahci: clear pending interrupt status"), we clear both PxIS and IS.IPS(x) after freezing the port, but before the COMRESET, so the problem that commit 1e641060c4b5 ("libata: clear eh_info on reset completion") fixed can no longer happen. Thus, revert commit 1e641060c4b5 ("libata: clear eh_info on reset completion"), so that the retry logic in ata_scsi_port_error_handler() works once again. (The retry logic is still needed, since we can still get an error IRQ _after_ the port has been thawed, but before ata_scsi_port_error_handler() takes the ap->lock in order to check if ATA_PFLAG_EH_PENDING is set.) Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-eh.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 159ba6ba19eb..5c493b6316eb 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2796,18 +2796,11 @@ int ata_eh_reset(struct ata_link *link, int classify, } } - /* - * Some controllers can't be frozen very well and may set spurious - * error conditions during reset. Clear accumulated error - * information and re-thaw the port if frozen. As reset is the - * final recovery action and we cross check link onlineness against - * device classification later, no hotplug event is lost by this. - */ + /* clear cached SError */ spin_lock_irqsave(link->ap->lock, flags); - memset(&link->eh_info, 0, sizeof(link->eh_info)); + link->eh_info.serror = 0; if (slave) - memset(&slave->eh_info, 0, sizeof(link->eh_info)); - ap->pflags &= ~ATA_PFLAG_EH_PENDING; + slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags); if (ata_port_is_frozen(ap)) From 7a3bc2b3989e05bbaa904a63279049a401491c84 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 14 Sep 2023 00:19:17 +0200 Subject: [PATCH 6/7] ata: libata-eh: do not thaw the port twice in ata_eh_reset() commit 1e641060c4b5 ("libata: clear eh_info on reset completion") added a workaround that broke the retry mechanism in ATA EH. Tejun himself suggested to remove this workaround when it was identified to cause additional problems: https://lore.kernel.org/linux-ide/20110426135027.GI878@htj.dyndns.org/ He even said: "Hmm... it seems I wasn't thinking straight when I added that work around." https://lore.kernel.org/linux-ide/20110426155229.GM878@htj.dyndns.org/ While removing the workaround solved the issue, however, the workaround was kept to avoid "spurious hotplug events during reset", and instead another workaround was added on top of the existing workaround in commit 8c56cacc724c ("libata: fix unexpectedly frozen port after ata_eh_reset()"). Because these IRQs happened when the port was frozen, we know that they were actually a side effect of PxIS and IS.IPS(x) not being cleared before the COMRESET. This is now done in commit 94152042eaa9 ("ata: libahci: clear pending interrupt status"), so these workarounds can now be removed. Since commit 1e641060c4b5 ("libata: clear eh_info on reset completion") has now been reverted, the ATA EH retry mechanism is functional again, so there is once again no need to thaw the port more than once in ata_eh_reset(). This reverts "the workaround on top of the workaround" introduced in commit 8c56cacc724c ("libata: fix unexpectedly frozen port after ata_eh_reset()"). Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-eh.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 5c493b6316eb..4cf4f57e57b8 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -2803,9 +2803,6 @@ int ata_eh_reset(struct ata_link *link, int classify, slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags); - if (ata_port_is_frozen(ap)) - ata_eh_thaw_port(ap); - /* * Make sure onlineness and classification result correspond. * Hotplug could have happened during reset and some From 5e35a9ac3fe3a0d571b899a16ca84253e53dc70c Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 13 Sep 2023 17:04:43 +0200 Subject: [PATCH 7/7] ata: libata-core: fetch sense data for successful commands iff CDL enabled Currently, we fetch sense data for a _successful_ command if either: 1) Command was NCQ and ATA_DFLAG_CDL_ENABLED flag set (flag ATA_DFLAG_CDL_ENABLED will only be set if the Successful NCQ command sense data supported bit is set); or 2) Command was non-NCQ and regular sense data reporting is enabled. This means that case 2) will trigger for a non-NCQ command which has ATA_SENSE bit set, regardless if CDL is enabled or not. This decision was by design. If the device reports that it has sense data available, it makes sense to fetch that sense data, since the sk/asc/ascq could be important information regardless if CDL is enabled or not. However, the fetching of sense data for a successful command is done via ATA EH. Considering how intricate the ATA EH is, we really do not want to invoke ATA EH unless absolutely needed. Before commit 18bd7718b5c4 ("scsi: ata: libata: Handle completion of CDL commands using policy 0xD") we never fetched sense data for successful commands. In order to not invoke the ATA EH unless absolutely necessary, even if the device claims support for sense data reporting, only fetch sense data for successful (NCQ and non-NCQ commands) commands that are using CDL. [Damien] Modified the check to test the qc flag ATA_QCFLAG_HAS_CDL instead of the device support for CDL, which is implied for commands using CDL. Fixes: 3ac873c76d79 ("ata: libata-core: fix when to fetch sense data for successful commands") Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 74314311295f..0072e0f9ad39 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4783,11 +4783,8 @@ void ata_qc_complete(struct ata_queued_cmd *qc) * been aborted by the device due to a limit timeout using the policy * 0xD. For these commands, invoke EH to get the command sense data. */ - if (qc->result_tf.status & ATA_SENSE && - ((ata_is_ncq(qc->tf.protocol) && - dev->flags & ATA_DFLAG_CDL_ENABLED) || - (!ata_is_ncq(qc->tf.protocol) && - ata_id_sense_reporting_enabled(dev->id)))) { + if (qc->flags & ATA_QCFLAG_HAS_CDL && + qc->result_tf.status & ATA_SENSE) { /* * Tell SCSI EH to not overwrite scmd->result even if this * command is finished with result SAM_STAT_GOOD.