scsi: smartpqi: add heartbeat check

check for controller lockups

Reviewed-by: Scott Benesh <scott.benesh@microsemi.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microsemi.com>
Signed-off-by: Don Brace <don.brace@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Kevin Barnett 2017-05-03 18:53:11 -05:00 committed by Martin K. Petersen
parent 061ef06a2d
commit 98f876674a
3 changed files with 143 additions and 47 deletions

View file

@ -490,7 +490,6 @@ struct pqi_raid_error_info {
#define PQI_EVENT_TYPE_LOGICAL_DEVICE 0x5
#define PQI_EVENT_TYPE_AIO_STATE_CHANGE 0xfd
#define PQI_EVENT_TYPE_AIO_CONFIG_CHANGE 0xfe
#define PQI_EVENT_TYPE_HEARTBEAT 0xff
#pragma pack()
@ -635,6 +634,58 @@ struct pqi_encryption_info {
u32 encrypt_tweak_upper;
};
#pragma pack(1)
#define PQI_CONFIG_TABLE_SIGNATURE "CFGTABLE"
#define PQI_CONFIG_TABLE_MAX_LENGTH ((u16)~0)
/* configuration table section IDs */
#define PQI_CONFIG_TABLE_SECTION_GENERAL_INFO 0
#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_FEATURES 1
#define PQI_CONFIG_TABLE_SECTION_FIRMWARE_ERRATA 2
#define PQI_CONFIG_TABLE_SECTION_DEBUG 3
#define PQI_CONFIG_TABLE_SECTION_HEARTBEAT 4
struct pqi_config_table {
u8 signature[8]; /* "CFGTABLE" */
__le32 first_section_offset; /* offset in bytes from the base */
/* address of this table to the */
/* first section */
};
struct pqi_config_table_section_header {
__le16 section_id; /* as defined by the */
/* PQI_CONFIG_TABLE_SECTION_* */
/* manifest constants above */
__le16 next_section_offset; /* offset in bytes from base */
/* address of the table of the */
/* next section or 0 if last entry */
};
struct pqi_config_table_general_info {
struct pqi_config_table_section_header header;
__le32 section_length; /* size of this section in bytes */
/* including the section header */
__le32 max_outstanding_requests; /* max. outstanding */
/* commands supported by */
/* the controller */
__le32 max_sg_size; /* max. transfer size of a single */
/* command */
__le32 max_sg_per_request; /* max. number of scatter-gather */
/* entries supported in a single */
/* command */
};
struct pqi_config_table_debug {
struct pqi_config_table_section_header header;
__le32 scratchpad;
};
struct pqi_config_table_heartbeat {
struct pqi_config_table_section_header header;
__le32 heartbeat_counter;
};
#define PQI_MAX_OUTSTANDING_REQUESTS ((u32)~0)
#define PQI_MAX_TRANSFER_SIZE (4 * 1024U * 1024U)
@ -645,8 +696,6 @@ struct pqi_encryption_info {
#define PQI_HBA_BUS 2
#define PQI_MAX_BUS PQI_HBA_BUS
#pragma pack(1)
struct report_lun_header {
__be32 list_length;
u8 extended_response;
@ -870,7 +919,6 @@ struct pqi_io_request {
struct list_head request_list_entry;
};
#define PQI_EVENT_HEARTBEAT 0
#define PQI_NUM_SUPPORTED_EVENTS 6
struct pqi_event {
@ -943,7 +991,6 @@ struct pqi_ctrl_info {
u8 inbound_spanning_supported : 1;
u8 outbound_spanning_supported : 1;
u8 pqi_mode_enabled : 1;
u8 heartbeat_timer_started : 1;
u8 update_time_worker_scheduled : 1;
struct list_head scsi_device_list;
@ -963,7 +1010,8 @@ struct pqi_ctrl_info {
atomic_t num_interrupts;
int previous_num_interrupts;
unsigned int num_heartbeats_requested;
u32 previous_heartbeat_count;
__le32 __iomem *heartbeat_counter;
struct timer_list heartbeat_timer;
struct semaphore sync_request_sem;

View file

@ -267,6 +267,14 @@ static inline void pqi_cancel_rescan_worker(struct pqi_ctrl_info *ctrl_info)
cancel_delayed_work_sync(&ctrl_info->rescan_work);
}
static inline u32 pqi_read_heartbeat_counter(struct pqi_ctrl_info *ctrl_info)
{
if (!ctrl_info->heartbeat_counter)
return 0;
return readl(ctrl_info->heartbeat_counter);
}
static int pqi_map_single(struct pci_dev *pci_dev,
struct pqi_sg_descriptor *sg_descriptor, void *buffer,
size_t buffer_length, int data_direction)
@ -2708,23 +2716,18 @@ static inline unsigned int pqi_num_elements_free(unsigned int pi,
return elements_in_queue - num_elements_used - 1;
}
#define PQI_EVENT_ACK_TIMEOUT 30
static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
static void pqi_send_event_ack(struct pqi_ctrl_info *ctrl_info,
struct pqi_event_acknowledge_request *iu, size_t iu_length)
{
pqi_index_t iq_pi;
pqi_index_t iq_ci;
unsigned long flags;
void *next_element;
unsigned long timeout;
struct pqi_queue_group *queue_group;
queue_group = &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP];
put_unaligned_le16(queue_group->oq_id, &iu->header.response_queue_id);
timeout = (PQI_EVENT_ACK_TIMEOUT * HZ) + jiffies;
while (1) {
spin_lock_irqsave(&queue_group->submit_lock[RAID_PATH], flags);
@ -2738,11 +2741,8 @@ static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
spin_unlock_irqrestore(
&queue_group->submit_lock[RAID_PATH], flags);
if (time_after(jiffies, timeout)) {
dev_err(&ctrl_info->pci_dev->dev,
"sending event acknowledge timed out\n");
if (pqi_ctrl_offline(ctrl_info))
return;
}
}
next_element = queue_group->iq_element_array[RAID_PATH] +
@ -2751,7 +2751,6 @@ static void pqi_start_event_ack(struct pqi_ctrl_info *ctrl_info,
memcpy(next_element, iu, iu_length);
iq_pi = (iq_pi + 1) % ctrl_info->num_elements_per_iq;
queue_group->iq_pi_copy[RAID_PATH] = iq_pi;
/*
@ -2777,7 +2776,7 @@ static void pqi_acknowledge_event(struct pqi_ctrl_info *ctrl_info,
request.event_id = event->event_id;
request.additional_event_id = event->additional_event_id;
pqi_start_event_ack(ctrl_info, &request, sizeof(request));
pqi_send_event_ack(ctrl_info, &request, sizeof(request));
}
static void pqi_event_worker(struct work_struct *work)
@ -2785,7 +2784,6 @@ static void pqi_event_worker(struct work_struct *work)
unsigned int i;
struct pqi_ctrl_info *ctrl_info;
struct pqi_event *event;
bool got_non_heartbeat_event = false;
ctrl_info = container_of(work, struct pqi_ctrl_info, event_work);
@ -2797,8 +2795,6 @@ static void pqi_event_worker(struct work_struct *work)
if (event->pending) {
event->pending = false;
pqi_acknowledge_event(ctrl_info, event);
if (i != PQI_EVENT_TYPE_HEARTBEAT)
got_non_heartbeat_event = true;
}
event++;
}
@ -2848,57 +2844,58 @@ static void pqi_take_ctrl_offline(struct pqi_ctrl_info *ctrl_info)
}
}
#define PQI_HEARTBEAT_TIMER_INTERVAL (5 * HZ)
#define PQI_MAX_HEARTBEAT_REQUESTS 5
#define PQI_HEARTBEAT_TIMER_INTERVAL (10 * HZ)
static void pqi_heartbeat_timer_handler(unsigned long data)
{
int num_interrupts;
u32 heartbeat_count;
struct pqi_ctrl_info *ctrl_info = (struct pqi_ctrl_info *)data;
if (!ctrl_info->heartbeat_timer_started)
pqi_check_ctrl_health(ctrl_info);
if (pqi_ctrl_offline(ctrl_info))
return;
num_interrupts = atomic_read(&ctrl_info->num_interrupts);
heartbeat_count = pqi_read_heartbeat_counter(ctrl_info);
if (num_interrupts == ctrl_info->previous_num_interrupts) {
ctrl_info->num_heartbeats_requested++;
if (ctrl_info->num_heartbeats_requested >
PQI_MAX_HEARTBEAT_REQUESTS) {
if (heartbeat_count == ctrl_info->previous_heartbeat_count) {
dev_err(&ctrl_info->pci_dev->dev,
"no heartbeat detected - last heartbeat count: %u\n",
heartbeat_count);
pqi_take_ctrl_offline(ctrl_info);
return;
}
ctrl_info->events[PQI_EVENT_HEARTBEAT].pending = true;
schedule_work(&ctrl_info->event_work);
} else {
ctrl_info->num_heartbeats_requested = 0;
ctrl_info->previous_num_interrupts = num_interrupts;
}
ctrl_info->previous_num_interrupts = num_interrupts;
ctrl_info->previous_heartbeat_count = heartbeat_count;
mod_timer(&ctrl_info->heartbeat_timer,
jiffies + PQI_HEARTBEAT_TIMER_INTERVAL);
}
static void pqi_start_heartbeat_timer(struct pqi_ctrl_info *ctrl_info)
{
if (!ctrl_info->heartbeat_counter)
return;
ctrl_info->previous_num_interrupts =
atomic_read(&ctrl_info->num_interrupts);
ctrl_info->previous_heartbeat_count =
pqi_read_heartbeat_counter(ctrl_info);
init_timer(&ctrl_info->heartbeat_timer);
ctrl_info->heartbeat_timer.expires =
jiffies + PQI_HEARTBEAT_TIMER_INTERVAL;
ctrl_info->heartbeat_timer.data = (unsigned long)ctrl_info;
ctrl_info->heartbeat_timer.function = pqi_heartbeat_timer_handler;
ctrl_info->heartbeat_timer_started = true;
add_timer(&ctrl_info->heartbeat_timer);
}
static inline void pqi_stop_heartbeat_timer(struct pqi_ctrl_info *ctrl_info)
{
if (ctrl_info->heartbeat_timer_started) {
ctrl_info->heartbeat_timer_started = false;
del_timer_sync(&ctrl_info->heartbeat_timer);
}
del_timer_sync(&ctrl_info->heartbeat_timer);
}
static inline int pqi_event_type_to_event_index(unsigned int event_type)
@ -2925,12 +2922,10 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
struct pqi_event_queue *event_queue;
struct pqi_event_response *response;
struct pqi_event *event;
bool need_delayed_work;
int event_index;
event_queue = &ctrl_info->event_queue;
num_events = 0;
need_delayed_work = false;
oq_ci = event_queue->oq_ci_copy;
while (1) {
@ -2953,10 +2948,6 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
event->event_id = response->event_id;
event->additional_event_id =
response->additional_event_id;
if (event_index != PQI_EVENT_TYPE_HEARTBEAT) {
event->pending = true;
need_delayed_work = true;
}
}
}
@ -2966,9 +2957,7 @@ static unsigned int pqi_process_event_intr(struct pqi_ctrl_info *ctrl_info)
if (num_events) {
event_queue->oq_ci_copy = oq_ci;
writel(oq_ci, event_queue->oq_ci);
if (need_delayed_work)
schedule_work(&ctrl_info->event_work);
schedule_work(&ctrl_info->event_work);
}
return num_events;
@ -3220,7 +3209,7 @@ static int pqi_alloc_operational_queues(struct pqi_ctrl_info *ctrl_info)
if (!ctrl_info->queue_memory_base) {
dev_err(&ctrl_info->pci_dev->dev,
"failed to allocate memory for PQI admin queues\n");
"unable to allocate memory for PQI admin queues\n");
return -ENOMEM;
}
@ -5672,6 +5661,55 @@ static int pqi_get_ctrl_firmware_version(struct pqi_ctrl_info *ctrl_info)
return rc;
}
static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
{
u32 table_length;
u32 section_offset;
void __iomem *table_iomem_addr;
struct pqi_config_table *config_table;
struct pqi_config_table_section_header *section;
table_length = ctrl_info->config_table_length;
config_table = kmalloc(table_length, GFP_KERNEL);
if (!config_table) {
dev_err(&ctrl_info->pci_dev->dev,
"unable to allocate memory for PQI configuration table\n");
return -ENOMEM;
}
/*
* Copy the config table contents from I/O memory space into the
* temporary buffer.
*/
table_iomem_addr = ctrl_info->iomem_base +
ctrl_info->config_table_offset;
memcpy_fromio(config_table, table_iomem_addr, table_length);
section_offset =
get_unaligned_le32(&config_table->first_section_offset);
while (section_offset) {
section = (void *)config_table + section_offset;
switch (get_unaligned_le16(&section->section_id)) {
case PQI_CONFIG_TABLE_SECTION_HEARTBEAT:
ctrl_info->heartbeat_counter = table_iomem_addr +
section_offset +
offsetof(struct pqi_config_table_heartbeat,
heartbeat_counter);
break;
}
section_offset =
get_unaligned_le16(&section->next_section_offset);
}
kfree(config_table);
return 0;
}
/* Switches the controller from PQI mode back into SIS mode. */
static int pqi_revert_to_sis_mode(struct pqi_ctrl_info *ctrl_info)
@ -5783,6 +5821,10 @@ static int pqi_ctrl_init(struct pqi_ctrl_info *ctrl_info)
ctrl_info->pqi_mode_enabled = true;
pqi_save_ctrl_mode(ctrl_info, PQI_MODE);
rc = pqi_process_config_table(ctrl_info);
if (rc)
return rc;
rc = pqi_alloc_admin_queues(ctrl_info);
if (rc) {
dev_err(&ctrl_info->pci_dev->dev,
@ -6091,6 +6133,8 @@ static struct pqi_ctrl_info *pqi_alloc_ctrl_info(int numa_node)
INIT_DELAYED_WORK(&ctrl_info->rescan_work, pqi_rescan_worker);
INIT_DELAYED_WORK(&ctrl_info->update_time_work, pqi_update_time_worker);
init_timer(&ctrl_info->heartbeat_timer);
sema_init(&ctrl_info->sync_request_sem,
PQI_RESERVED_IO_SLOTS_SYNCHRONOUS_REQUESTS);
init_waitqueue_head(&ctrl_info->block_requests_wait);

View file

@ -422,6 +422,10 @@ void sis_soft_reset(struct pqi_ctrl_info *ctrl_info)
void sis_shutdown_ctrl(struct pqi_ctrl_info *ctrl_info)
{
if (readl(&ctrl_info->registers->sis_firmware_status) &
SIS_CTRL_KERNEL_PANIC)
return;
writel(SIS_TRIGGER_SHUTDOWN,
&ctrl_info->registers->sis_host_to_ctrl_doorbell);
}