mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-10-28 15:20:41 +00:00
cxgb4: Move SGE Ingress DMA state monitor code to a new routine
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
982b81eb24
commit
a3bfb6179c
3 changed files with 156 additions and 79 deletions
|
@ -328,6 +328,17 @@ struct adapter_params {
|
||||||
unsigned int max_ird_adapter; /* Max read depth per adapter */
|
unsigned int max_ird_adapter; /* Max read depth per adapter */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* State needed to monitor the forward progress of SGE Ingress DMA activities
|
||||||
|
* and possible hangs.
|
||||||
|
*/
|
||||||
|
struct sge_idma_monitor_state {
|
||||||
|
unsigned int idma_1s_thresh; /* 1s threshold in Core Clock ticks */
|
||||||
|
unsigned int idma_stalled[2]; /* synthesized stalled timers in HZ */
|
||||||
|
unsigned int idma_state[2]; /* IDMA Hang detect state */
|
||||||
|
unsigned int idma_qid[2]; /* IDMA Hung Ingress Queue ID */
|
||||||
|
unsigned int idma_warn[2]; /* time to warning in HZ */
|
||||||
|
};
|
||||||
|
|
||||||
#include "t4fw_api.h"
|
#include "t4fw_api.h"
|
||||||
|
|
||||||
#define FW_VERSION(chip) ( \
|
#define FW_VERSION(chip) ( \
|
||||||
|
@ -630,12 +641,7 @@ struct sge {
|
||||||
u32 fl_align; /* response queue message alignment */
|
u32 fl_align; /* response queue message alignment */
|
||||||
u32 fl_starve_thres; /* Free List starvation threshold */
|
u32 fl_starve_thres; /* Free List starvation threshold */
|
||||||
|
|
||||||
/* State variables for detecting an SGE Ingress DMA hang */
|
struct sge_idma_monitor_state idma_monitor;
|
||||||
unsigned int idma_1s_thresh;/* SGE same State Counter 1s threshold */
|
|
||||||
unsigned int idma_stalled[2];/* SGE synthesized stalled timers in HZ */
|
|
||||||
unsigned int idma_state[2]; /* SGE IDMA Hang detect state */
|
|
||||||
unsigned int idma_qid[2]; /* SGE IDMA Hung Ingress Queue ID */
|
|
||||||
|
|
||||||
unsigned int egr_start;
|
unsigned int egr_start;
|
||||||
unsigned int egr_sz;
|
unsigned int egr_sz;
|
||||||
unsigned int ingr_start;
|
unsigned int ingr_start;
|
||||||
|
@ -1311,4 +1317,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
|
||||||
u32 addr, u32 val);
|
u32 addr, u32 val);
|
||||||
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
|
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
|
||||||
void t4_free_mem(void *addr);
|
void t4_free_mem(void *addr);
|
||||||
|
void t4_idma_monitor_init(struct adapter *adapter,
|
||||||
|
struct sge_idma_monitor_state *idma);
|
||||||
|
void t4_idma_monitor(struct adapter *adapter,
|
||||||
|
struct sge_idma_monitor_state *idma,
|
||||||
|
int hz, int ticks);
|
||||||
#endif /* __CXGB4_H__ */
|
#endif /* __CXGB4_H__ */
|
||||||
|
|
|
@ -100,16 +100,6 @@
|
||||||
*/
|
*/
|
||||||
#define TX_QCHECK_PERIOD (HZ / 2)
|
#define TX_QCHECK_PERIOD (HZ / 2)
|
||||||
|
|
||||||
/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate
|
|
||||||
* (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA
|
|
||||||
* State Machines in the same state for this amount of time (in HZ) then we'll
|
|
||||||
* issue a warning about a potential hang. We'll repeat the warning as the
|
|
||||||
* SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till
|
|
||||||
* the situation clears. If the situation clears, we'll note that as well.
|
|
||||||
*/
|
|
||||||
#define SGE_IDMA_WARN_THRESH (1 * HZ)
|
|
||||||
#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Max number of Tx descriptors to be reclaimed by the Tx timer.
|
* Max number of Tx descriptors to be reclaimed by the Tx timer.
|
||||||
*/
|
*/
|
||||||
|
@ -2279,7 +2269,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap)
|
||||||
static void sge_rx_timer_cb(unsigned long data)
|
static void sge_rx_timer_cb(unsigned long data)
|
||||||
{
|
{
|
||||||
unsigned long m;
|
unsigned long m;
|
||||||
unsigned int i, idma_same_state_cnt[2];
|
unsigned int i;
|
||||||
struct adapter *adap = (struct adapter *)data;
|
struct adapter *adap = (struct adapter *)data;
|
||||||
struct sge *s = &adap->sge;
|
struct sge *s = &adap->sge;
|
||||||
|
|
||||||
|
@ -2300,67 +2290,16 @@ static void sge_rx_timer_cb(unsigned long data)
|
||||||
set_bit(id, s->starving_fl);
|
set_bit(id, s->starving_fl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* The remainder of the SGE RX Timer Callback routine is dedicated to
|
||||||
|
* global Master PF activities like checking for chip ingress stalls,
|
||||||
|
* etc.
|
||||||
|
*/
|
||||||
|
if (!(adap->flags & MASTER_PF))
|
||||||
|
goto done;
|
||||||
|
|
||||||
t4_write_reg(adap, SGE_DEBUG_INDEX_A, 13);
|
t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD);
|
||||||
idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH_A);
|
|
||||||
idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
|
|
||||||
|
|
||||||
for (i = 0; i < 2; i++) {
|
|
||||||
u32 debug0, debug11;
|
|
||||||
|
|
||||||
/* If the Ingress DMA Same State Counter ("timer") is less
|
|
||||||
* than 1s, then we can reset our synthesized Stall Timer and
|
|
||||||
* continue. If we have previously emitted warnings about a
|
|
||||||
* potential stalled Ingress Queue, issue a note indicating
|
|
||||||
* that the Ingress Queue has resumed forward progress.
|
|
||||||
*/
|
|
||||||
if (idma_same_state_cnt[i] < s->idma_1s_thresh) {
|
|
||||||
if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH)
|
|
||||||
CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n",
|
|
||||||
i, s->idma_qid[i],
|
|
||||||
s->idma_stalled[i]/HZ);
|
|
||||||
s->idma_stalled[i] = 0;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Synthesize an SGE Ingress DMA Same State Timer in the Hz
|
|
||||||
* domain. The first time we get here it'll be because we
|
|
||||||
* passed the 1s Threshold; each additional time it'll be
|
|
||||||
* because the RX Timer Callback is being fired on its regular
|
|
||||||
* schedule.
|
|
||||||
*
|
|
||||||
* If the stall is below our Potential Hung Ingress Queue
|
|
||||||
* Warning Threshold, continue.
|
|
||||||
*/
|
|
||||||
if (s->idma_stalled[i] == 0)
|
|
||||||
s->idma_stalled[i] = HZ;
|
|
||||||
else
|
|
||||||
s->idma_stalled[i] += RX_QCHECK_PERIOD;
|
|
||||||
|
|
||||||
if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */
|
|
||||||
if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Read and save the SGE IDMA State and Queue ID information.
|
|
||||||
* We do this every time in case it changes across time ...
|
|
||||||
*/
|
|
||||||
t4_write_reg(adap, SGE_DEBUG_INDEX_A, 0);
|
|
||||||
debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
|
|
||||||
s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
|
|
||||||
|
|
||||||
t4_write_reg(adap, SGE_DEBUG_INDEX_A, 11);
|
|
||||||
debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
|
|
||||||
s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
|
|
||||||
|
|
||||||
CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n",
|
|
||||||
i, s->idma_qid[i], s->idma_state[i],
|
|
||||||
s->idma_stalled[i]/HZ, debug0, debug11);
|
|
||||||
t4_sge_decode_idma_state(adap, s->idma_state[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
done:
|
||||||
mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
|
mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3121,11 +3060,11 @@ int t4_sge_init(struct adapter *adap)
|
||||||
egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
|
egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
|
||||||
s->fl_starve_thres = 2*egress_threshold + 1;
|
s->fl_starve_thres = 2*egress_threshold + 1;
|
||||||
|
|
||||||
|
t4_idma_monitor_init(adap, &s->idma_monitor);
|
||||||
|
|
||||||
setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
|
setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
|
||||||
setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
|
setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
|
||||||
s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */
|
|
||||||
s->idma_stalled[0] = 0;
|
|
||||||
s->idma_stalled[1] = 0;
|
|
||||||
spin_lock_init(&s->intrq_lock);
|
spin_lock_init(&s->intrq_lock);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -5717,3 +5717,130 @@ void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr)
|
||||||
t4_write_reg(adap, TP_DBG_LA_CONFIG_A,
|
t4_write_reg(adap, TP_DBG_LA_CONFIG_A,
|
||||||
cfg | adap->params.tp.la_mask);
|
cfg | adap->params.tp.la_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* SGE Hung Ingress DMA Warning Threshold time and Warning Repeat Rate (in
|
||||||
|
* seconds). If we find one of the SGE Ingress DMA State Machines in the same
|
||||||
|
* state for more than the Warning Threshold then we'll issue a warning about
|
||||||
|
* a potential hang. We'll repeat the warning as the SGE Ingress DMA Channel
|
||||||
|
* appears to be hung every Warning Repeat second till the situation clears.
|
||||||
|
* If the situation clears, we'll note that as well.
|
||||||
|
*/
|
||||||
|
#define SGE_IDMA_WARN_THRESH 1
|
||||||
|
#define SGE_IDMA_WARN_REPEAT 300
|
||||||
|
|
||||||
|
/**
|
||||||
|
* t4_idma_monitor_init - initialize SGE Ingress DMA Monitor
|
||||||
|
* @adapter: the adapter
|
||||||
|
* @idma: the adapter IDMA Monitor state
|
||||||
|
*
|
||||||
|
* Initialize the state of an SGE Ingress DMA Monitor.
|
||||||
|
*/
|
||||||
|
void t4_idma_monitor_init(struct adapter *adapter,
|
||||||
|
struct sge_idma_monitor_state *idma)
|
||||||
|
{
|
||||||
|
/* Initialize the state variables for detecting an SGE Ingress DMA
|
||||||
|
* hang. The SGE has internal counters which count up on each clock
|
||||||
|
* tick whenever the SGE finds its Ingress DMA State Engines in the
|
||||||
|
* same state they were on the previous clock tick. The clock used is
|
||||||
|
* the Core Clock so we have a limit on the maximum "time" they can
|
||||||
|
* record; typically a very small number of seconds. For instance,
|
||||||
|
* with a 600MHz Core Clock, we can only count up to a bit more than
|
||||||
|
* 7s. So we'll synthesize a larger counter in order to not run the
|
||||||
|
* risk of having the "timers" overflow and give us the flexibility to
|
||||||
|
* maintain a Hung SGE State Machine of our own which operates across
|
||||||
|
* a longer time frame.
|
||||||
|
*/
|
||||||
|
idma->idma_1s_thresh = core_ticks_per_usec(adapter) * 1000000; /* 1s */
|
||||||
|
idma->idma_stalled[0] = 0;
|
||||||
|
idma->idma_stalled[1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* t4_idma_monitor - monitor SGE Ingress DMA state
|
||||||
|
* @adapter: the adapter
|
||||||
|
* @idma: the adapter IDMA Monitor state
|
||||||
|
* @hz: number of ticks/second
|
||||||
|
* @ticks: number of ticks since the last IDMA Monitor call
|
||||||
|
*/
|
||||||
|
void t4_idma_monitor(struct adapter *adapter,
|
||||||
|
struct sge_idma_monitor_state *idma,
|
||||||
|
int hz, int ticks)
|
||||||
|
{
|
||||||
|
int i, idma_same_state_cnt[2];
|
||||||
|
|
||||||
|
/* Read the SGE Debug Ingress DMA Same State Count registers. These
|
||||||
|
* are counters inside the SGE which count up on each clock when the
|
||||||
|
* SGE finds its Ingress DMA State Engines in the same states they
|
||||||
|
* were in the previous clock. The counters will peg out at
|
||||||
|
* 0xffffffff without wrapping around so once they pass the 1s
|
||||||
|
* threshold they'll stay above that till the IDMA state changes.
|
||||||
|
*/
|
||||||
|
t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 13);
|
||||||
|
idma_same_state_cnt[0] = t4_read_reg(adapter, SGE_DEBUG_DATA_HIGH_A);
|
||||||
|
idma_same_state_cnt[1] = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
|
||||||
|
|
||||||
|
for (i = 0; i < 2; i++) {
|
||||||
|
u32 debug0, debug11;
|
||||||
|
|
||||||
|
/* If the Ingress DMA Same State Counter ("timer") is less
|
||||||
|
* than 1s, then we can reset our synthesized Stall Timer and
|
||||||
|
* continue. If we have previously emitted warnings about a
|
||||||
|
* potential stalled Ingress Queue, issue a note indicating
|
||||||
|
* that the Ingress Queue has resumed forward progress.
|
||||||
|
*/
|
||||||
|
if (idma_same_state_cnt[i] < idma->idma_1s_thresh) {
|
||||||
|
if (idma->idma_stalled[i] >= SGE_IDMA_WARN_THRESH * hz)
|
||||||
|
dev_warn(adapter->pdev_dev, "SGE idma%d, queue %u, "
|
||||||
|
"resumed after %d seconds\n",
|
||||||
|
i, idma->idma_qid[i],
|
||||||
|
idma->idma_stalled[i] / hz);
|
||||||
|
idma->idma_stalled[i] = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Synthesize an SGE Ingress DMA Same State Timer in the Hz
|
||||||
|
* domain. The first time we get here it'll be because we
|
||||||
|
* passed the 1s Threshold; each additional time it'll be
|
||||||
|
* because the RX Timer Callback is being fired on its regular
|
||||||
|
* schedule.
|
||||||
|
*
|
||||||
|
* If the stall is below our Potential Hung Ingress Queue
|
||||||
|
* Warning Threshold, continue.
|
||||||
|
*/
|
||||||
|
if (idma->idma_stalled[i] == 0) {
|
||||||
|
idma->idma_stalled[i] = hz;
|
||||||
|
idma->idma_warn[i] = 0;
|
||||||
|
} else {
|
||||||
|
idma->idma_stalled[i] += ticks;
|
||||||
|
idma->idma_warn[i] -= ticks;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idma->idma_stalled[i] < SGE_IDMA_WARN_THRESH * hz)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* We'll issue a warning every SGE_IDMA_WARN_REPEAT seconds.
|
||||||
|
*/
|
||||||
|
if (idma->idma_warn[i] > 0)
|
||||||
|
continue;
|
||||||
|
idma->idma_warn[i] = SGE_IDMA_WARN_REPEAT * hz;
|
||||||
|
|
||||||
|
/* Read and save the SGE IDMA State and Queue ID information.
|
||||||
|
* We do this every time in case it changes across time ...
|
||||||
|
* can't be too careful ...
|
||||||
|
*/
|
||||||
|
t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 0);
|
||||||
|
debug0 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
|
||||||
|
idma->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
|
||||||
|
|
||||||
|
t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 11);
|
||||||
|
debug11 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
|
||||||
|
idma->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
|
||||||
|
|
||||||
|
dev_warn(adapter->pdev_dev, "SGE idma%u, queue %u, potentially stuck in "
|
||||||
|
"state %u for %d seconds (debug0=%#x, debug11=%#x)\n",
|
||||||
|
i, idma->idma_qid[i], idma->idma_state[i],
|
||||||
|
idma->idma_stalled[i] / hz,
|
||||||
|
debug0, debug11);
|
||||||
|
t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue