Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull more s390 updates from Martin Schwidefsky:
 "The second patch set for the 4.14 merge window:

   - Convert the dasd device driver to the blk-mq interface.

   - Provide three zcrypt interfaces for vfio_ap. These will be required
     for KVM guest access to the crypto cards attached via the AP bus.

   - A couple of memory management bug fixes."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux:
  s390/dasd: blk-mq conversion
  s390/mm: use a single lock for the fields in mm_context_t
  s390/mm: fix race on mm->context.flush_mm
  s390/mm: fix local TLB flushing vs. detach of an mm address space
  s390/zcrypt: externalize AP queue interrupt control
  s390/zcrypt: externalize AP config info query
  s390/zcrypt: externalize test AP queue
  s390/mm: use VM_BUG_ON in crst_table_[upgrade|downgrade]
This commit is contained in:
Linus Torvalds 2017-09-12 06:01:59 -07:00
commit 260d16580d
13 changed files with 416 additions and 274 deletions

126
arch/s390/include/asm/ap.h Normal file
View File

@ -0,0 +1,126 @@
/*
* Adjunct processor (AP) interfaces
*
* Copyright IBM Corp. 2017
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
* as published by the Free Software Foundation.
*
* Author(s): Tony Krowiak <akrowia@linux.vnet.ibm.com>
* Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
*/
#ifndef _ASM_S390_AP_H_
#define _ASM_S390_AP_H_
/**
* The ap_qid_t identifier of an ap queue.
* If the AP facilities test (APFT) facility is available,
* card and queue index are 8 bit values, otherwise
* card index is 6 bit and queue index a 4 bit value.
*/
typedef unsigned int ap_qid_t;
#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255))
#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63)
#define AP_QID_QUEUE(_qid) ((_qid) & 255)
/**
* struct ap_queue_status - Holds the AP queue status.
* @queue_empty: Shows if queue is empty
* @replies_waiting: Waiting replies
* @queue_full: Is 1 if the queue is full
* @irq_enabled: Shows if interrupts are enabled for the AP
* @response_code: Holds the 8 bit response code
*
* The ap queue status word is returned by all three AP functions
* (PQAP, NQAP and DQAP). There's a set of flags in the first
* byte, followed by a 1 byte response code.
*/
struct ap_queue_status {
unsigned int queue_empty : 1;
unsigned int replies_waiting : 1;
unsigned int queue_full : 1;
unsigned int _pad1 : 4;
unsigned int irq_enabled : 1;
unsigned int response_code : 8;
unsigned int _pad2 : 16;
};
/**
* ap_test_queue(): Test adjunct processor queue.
* @qid: The AP queue number
* @tbit: Test facilities bit
* @info: Pointer to queue descriptor
*
* Returns AP queue status structure.
*/
struct ap_queue_status ap_test_queue(ap_qid_t qid,
int tbit,
unsigned long *info);
struct ap_config_info {
unsigned int apsc : 1; /* S bit */
unsigned int apxa : 1; /* N bit */
unsigned int qact : 1; /* C bit */
unsigned int rc8a : 1; /* R bit */
unsigned char _reserved1 : 4;
unsigned char _reserved2[3];
unsigned char Na; /* max # of APs - 1 */
unsigned char Nd; /* max # of Domains - 1 */
unsigned char _reserved3[10];
unsigned int apm[8]; /* AP ID mask */
unsigned int aqm[8]; /* AP queue mask */
unsigned int adm[8]; /* AP domain mask */
unsigned char _reserved4[16];
} __aligned(8);
/*
* ap_query_configuration(): Fetch cryptographic config info
*
* Returns the ap configuration info fetched via PQAP(QCI).
* On success 0 is returned, on failure a negative errno
* is returned, e.g. if the PQAP(QCI) instruction is not
* available, the return value will be -EOPNOTSUPP.
*/
int ap_query_configuration(struct ap_config_info *info);
/*
* struct ap_qirq_ctrl - convenient struct for easy invocation
* of the ap_queue_irq_ctrl() function. This struct is passed
* as GR1 parameter to the PQAP(AQIC) instruction. For details
* please see the AR documentation.
*/
struct ap_qirq_ctrl {
unsigned int _res1 : 8;
unsigned int zone : 8; /* zone info */
unsigned int ir : 1; /* ir flag: enable (1) or disable (0) irq */
unsigned int _res2 : 4;
unsigned int gisc : 3; /* guest isc field */
unsigned int _res3 : 6;
unsigned int gf : 2; /* gisa format */
unsigned int _res4 : 1;
unsigned int gisa : 27; /* gisa origin */
unsigned int _res5 : 1;
unsigned int isc : 3; /* irq sub class */
};
/**
* ap_queue_irq_ctrl(): Control interruption on a AP queue.
* @qid: The AP queue number
* @qirqctrl: struct ap_qirq_ctrl, see above
* @ind: The notification indicator byte
*
* Returns AP queue status.
*
* Control interruption on the given AP queue.
* Just a simple wrapper function for the low level PQAP(AQIC)
* instruction available for other kernel modules.
*/
struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid,
struct ap_qirq_ctrl qirqctrl,
void *ind);
#endif /* _ASM_S390_AP_H_ */

View File

@ -5,12 +5,11 @@
#include <linux/errno.h>
typedef struct {
spinlock_t lock;
cpumask_t cpu_attach_mask;
atomic_t flush_count;
unsigned int flush_mm;
spinlock_t pgtable_lock;
struct list_head pgtable_list;
spinlock_t gmap_lock;
struct list_head gmap_list;
unsigned long gmap_asce;
unsigned long asce;
@ -27,10 +26,8 @@ typedef struct {
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
.context.pgtable_lock = \
__SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \
.context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
.context.gmap_lock = __SPIN_LOCK_UNLOCKED(name.context.gmap_lock), \
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
static inline int tprot(unsigned long addr)

View File

@ -17,9 +17,8 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
spin_lock_init(&mm->context.pgtable_lock);
spin_lock_init(&mm->context.lock);
INIT_LIST_HEAD(&mm->context.pgtable_list);
spin_lock_init(&mm->context.gmap_lock);
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
@ -103,7 +102,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (prev == next)
return;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(next));
/* Clear old ASCE by loading the kernel ASCE. */
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
@ -121,9 +119,8 @@ static inline void finish_arch_post_lock_switch(void)
preempt_disable();
while (atomic_read(&mm->context.flush_count))
cpu_relax();
if (mm->context.flush_mm)
__tlb_flush_mm(mm);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
__tlb_flush_mm_lazy(mm);
preempt_enable();
}
set_fs(current->thread.mm_segment);
@ -136,6 +133,7 @@ static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
switch_mm(prev, next, current);
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
set_user_asce(next);
}

View File

@ -48,23 +48,6 @@ static inline void __tlb_flush_global(void)
* Flush TLB entries for a specific mm on all CPUs (in case gmap is used
* this implicates multiple ASCEs!).
*/
static inline void __tlb_flush_full(struct mm_struct *mm)
{
preempt_disable();
atomic_inc(&mm->context.flush_count);
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
/* Local TLB flush */
__tlb_flush_local();
} else {
/* Global TLB flush */
__tlb_flush_global();
/* Reset TLB flush mask */
cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
}
atomic_dec(&mm->context.flush_count);
preempt_enable();
}
static inline void __tlb_flush_mm(struct mm_struct *mm)
{
unsigned long gmap_asce;
@ -76,16 +59,18 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
*/
preempt_disable();
atomic_inc(&mm->context.flush_count);
/* Reset TLB flush mask */
cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
barrier();
gmap_asce = READ_ONCE(mm->context.gmap_asce);
if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
if (gmap_asce)
__tlb_flush_idte(gmap_asce);
__tlb_flush_idte(mm->context.asce);
} else {
__tlb_flush_full(mm);
/* Global TLB flush */
__tlb_flush_global();
}
/* Reset TLB flush mask */
cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
atomic_dec(&mm->context.flush_count);
preempt_enable();
}
@ -99,7 +84,6 @@ static inline void __tlb_flush_kernel(void)
}
#else
#define __tlb_flush_global() __tlb_flush_local()
#define __tlb_flush_full(mm) __tlb_flush_local()
/*
* Flush TLB entries for a specific ASCE on all CPUs.
@ -117,10 +101,12 @@ static inline void __tlb_flush_kernel(void)
static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{
spin_lock(&mm->context.lock);
if (mm->context.flush_mm) {
__tlb_flush_mm(mm);
mm->context.flush_mm = 0;
__tlb_flush_mm(mm);
}
spin_unlock(&mm->context.lock);
}
/*

View File

@ -100,14 +100,14 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
if (!gmap)
return NULL;
gmap->mm = mm;
spin_lock(&mm->context.gmap_lock);
spin_lock(&mm->context.lock);
list_add_rcu(&gmap->list, &mm->context.gmap_list);
if (list_is_singular(&mm->context.gmap_list))
gmap_asce = gmap->asce;
else
gmap_asce = -1UL;
WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
spin_unlock(&mm->context.gmap_lock);
spin_unlock(&mm->context.lock);
return gmap;
}
EXPORT_SYMBOL_GPL(gmap_create);
@ -248,7 +248,7 @@ void gmap_remove(struct gmap *gmap)
spin_unlock(&gmap->shadow_lock);
}
/* Remove gmap from the pre-mm list */
spin_lock(&gmap->mm->context.gmap_lock);
spin_lock(&gmap->mm->context.lock);
list_del_rcu(&gmap->list);
if (list_empty(&gmap->mm->context.gmap_list))
gmap_asce = 0;
@ -258,7 +258,7 @@ void gmap_remove(struct gmap *gmap)
else
gmap_asce = -1UL;
WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
spin_unlock(&gmap->mm->context.gmap_lock);
spin_unlock(&gmap->mm->context.lock);
synchronize_rcu();
/* Put reference */
gmap_put(gmap);

View File

@ -83,7 +83,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
int rc, notify;
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
if (end >= TASK_SIZE_MAX)
return -ENOMEM;
rc = 0;
@ -124,7 +124,7 @@ void crst_table_downgrade(struct mm_struct *mm)
pgd_t *pgd;
/* downgrade should only happen from 3 to 2 levels (compat only) */
BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
if (current->active_mm == mm) {
clear_user_asce();
@ -188,7 +188,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
/* Try to get a fragment of a 4K page as a 2K page table */
if (!mm_alloc_pgste(mm)) {
table = NULL;
spin_lock_bh(&mm->context.pgtable_lock);
spin_lock_bh(&mm->context.lock);
if (!list_empty(&mm->context.pgtable_list)) {
page = list_first_entry(&mm->context.pgtable_list,
struct page, lru);
@ -203,7 +203,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
list_del(&page->lru);
}
}
spin_unlock_bh(&mm->context.pgtable_lock);
spin_unlock_bh(&mm->context.lock);
if (table)
return table;
}
@ -227,9 +227,9 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
/* Return the first 2K fragment of the page */
atomic_set(&page->_mapcount, 1);
clear_table(table, _PAGE_INVALID, PAGE_SIZE);
spin_lock_bh(&mm->context.pgtable_lock);
spin_lock_bh(&mm->context.lock);
list_add(&page->lru, &mm->context.pgtable_list);
spin_unlock_bh(&mm->context.pgtable_lock);
spin_unlock_bh(&mm->context.lock);
}
return table;
}
@ -243,13 +243,13 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
if (!mm_alloc_pgste(mm)) {
/* Free 2K page table fragment of a 4K page */
bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.pgtable_lock);
spin_lock_bh(&mm->context.lock);
mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
if (mask & 3)
list_add(&page->lru, &mm->context.pgtable_list);
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.pgtable_lock);
spin_unlock_bh(&mm->context.lock);
if (mask != 0)
return;
}
@ -275,13 +275,13 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
return;
}
bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.pgtable_lock);
spin_lock_bh(&mm->context.lock);
mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
if (mask & 3)
list_add_tail(&page->lru, &mm->context.pgtable_list);
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.pgtable_lock);
spin_unlock_bh(&mm->context.lock);
table = (unsigned long *) (__pa(table) | (1U << bit));
tlb_remove_table(tlb, table);
}

View File

@ -62,7 +62,6 @@ MODULE_LICENSE("GPL");
static int dasd_alloc_queue(struct dasd_block *);
static void dasd_setup_queue(struct dasd_block *);
static void dasd_free_queue(struct dasd_block *);
static void dasd_flush_request_queue(struct dasd_block *);
static int dasd_flush_block_queue(struct dasd_block *);
static void dasd_device_tasklet(struct dasd_device *);
static void dasd_block_tasklet(struct dasd_block *);
@ -158,7 +157,6 @@ struct dasd_block *dasd_alloc_block(void)
/* open_count = 0 means device online but not in use */
atomic_set(&block->open_count, -1);
spin_lock_init(&block->request_queue_lock);
atomic_set(&block->tasklet_scheduled, 0);
tasklet_init(&block->tasklet,
(void (*)(unsigned long)) dasd_block_tasklet,
@ -391,7 +389,6 @@ static int dasd_state_ready_to_basic(struct dasd_device *device)
device->state = DASD_STATE_READY;
return rc;
}
dasd_flush_request_queue(block);
dasd_destroy_partitions(block);
block->blocks = 0;
block->bp_block = 0;
@ -1645,8 +1642,10 @@ void dasd_generic_handle_state_change(struct dasd_device *device)
dasd_device_remove_stop_bits(device, DASD_STOPPED_PENDING);
dasd_schedule_device_bh(device);
if (device->block)
if (device->block) {
dasd_schedule_block_bh(device->block);
blk_mq_run_hw_queues(device->block->request_queue, true);
}
}
EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
@ -2638,6 +2637,7 @@ static void dasd_block_timeout(unsigned long ptr)
dasd_device_remove_stop_bits(block->base, DASD_STOPPED_PENDING);
spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags);
dasd_schedule_block_bh(block);
blk_mq_run_hw_queues(block->request_queue, true);
}
/*
@ -2677,115 +2677,11 @@ static void __dasd_process_erp(struct dasd_device *device,
erp_fn(cqr);
}
/*
* Fetch requests from the block device queue.
*/
static void __dasd_process_request_queue(struct dasd_block *block)
{
struct request_queue *queue;
struct request *req;
struct dasd_ccw_req *cqr;
struct dasd_device *basedev;
unsigned long flags;
queue = block->request_queue;
basedev = block->base;
/* No queue ? Then there is nothing to do. */
if (queue == NULL)
return;
/*
* We requeue request from the block device queue to the ccw
* queue only in two states. In state DASD_STATE_READY the
* partition detection is done and we need to requeue requests
* for that. State DASD_STATE_ONLINE is normal block device
* operation.
*/
if (basedev->state < DASD_STATE_READY) {
while ((req = blk_fetch_request(block->request_queue)))
__blk_end_request_all(req, BLK_STS_IOERR);
return;
}
/*
* if device is stopped do not fetch new requests
* except failfast is active which will let requests fail
* immediately in __dasd_block_start_head()
*/
if (basedev->stopped && !(basedev->features & DASD_FEATURE_FAILFAST))
return;
/* Now we try to fetch requests from the request queue */
while ((req = blk_peek_request(queue))) {
if (basedev->features & DASD_FEATURE_READONLY &&
rq_data_dir(req) == WRITE) {
DBF_DEV_EVENT(DBF_ERR, basedev,
"Rejecting write request %p",
req);
blk_start_request(req);
__blk_end_request_all(req, BLK_STS_IOERR);
continue;
}
if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
(basedev->features & DASD_FEATURE_FAILFAST ||
blk_noretry_request(req))) {
DBF_DEV_EVENT(DBF_ERR, basedev,
"Rejecting failfast request %p",
req);
blk_start_request(req);
__blk_end_request_all(req, BLK_STS_TIMEOUT);
continue;
}
cqr = basedev->discipline->build_cp(basedev, block, req);
if (IS_ERR(cqr)) {
if (PTR_ERR(cqr) == -EBUSY)
break; /* normal end condition */
if (PTR_ERR(cqr) == -ENOMEM)
break; /* terminate request queue loop */
if (PTR_ERR(cqr) == -EAGAIN) {
/*
* The current request cannot be build right
* now, we have to try later. If this request
* is the head-of-queue we stop the device
* for 1/2 second.
*/
if (!list_empty(&block->ccw_queue))
break;
spin_lock_irqsave(
get_ccwdev_lock(basedev->cdev), flags);
dasd_device_set_stop_bits(basedev,
DASD_STOPPED_PENDING);
spin_unlock_irqrestore(
get_ccwdev_lock(basedev->cdev), flags);
dasd_block_set_timer(block, HZ/2);
break;
}
DBF_DEV_EVENT(DBF_ERR, basedev,
"CCW creation failed (rc=%ld) "
"on request %p",
PTR_ERR(cqr), req);
blk_start_request(req);
__blk_end_request_all(req, BLK_STS_IOERR);
continue;
}
/*
* Note: callback is set to dasd_return_cqr_cb in
* __dasd_block_start_head to cover erp requests as well
*/
cqr->callback_data = (void *) req;
cqr->status = DASD_CQR_FILLED;
req->completion_data = cqr;
blk_start_request(req);
list_add_tail(&cqr->blocklist, &block->ccw_queue);
INIT_LIST_HEAD(&cqr->devlist);
dasd_profile_start(block, cqr, req);
}
}
static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
{
struct request *req;
int status;
blk_status_t error = BLK_STS_OK;
int status;
req = (struct request *) cqr->callback_data;
dasd_profile_end(cqr->block, cqr, req);
@ -2809,7 +2705,19 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
break;
}
}
__blk_end_request_all(req, error);
/*
* We need to take care for ETIMEDOUT errors here since the
* complete callback does not get called in this case.
* Take care of all errors here and avoid additional code to
* transfer the error value to the complete callback.
*/
if (error) {
blk_mq_end_request(req, error);
blk_mq_run_hw_queues(req->q, true);
} else {
blk_mq_complete_request(req);
}
}
/*
@ -2938,27 +2846,30 @@ static void dasd_block_tasklet(struct dasd_block *block)
struct list_head final_queue;
struct list_head *l, *n;
struct dasd_ccw_req *cqr;
struct dasd_queue *dq;
atomic_set(&block->tasklet_scheduled, 0);
INIT_LIST_HEAD(&final_queue);
spin_lock(&block->queue_lock);
spin_lock_irq(&block->queue_lock);
/* Finish off requests on ccw queue */
__dasd_process_block_ccw_queue(block, &final_queue);
spin_unlock(&block->queue_lock);
spin_unlock_irq(&block->queue_lock);
/* Now call the callback function of requests with final status */
spin_lock_irq(&block->request_queue_lock);
list_for_each_safe(l, n, &final_queue) {
cqr = list_entry(l, struct dasd_ccw_req, blocklist);
dq = cqr->dq;
spin_lock_irq(&dq->lock);
list_del_init(&cqr->blocklist);
__dasd_cleanup_cqr(cqr);
spin_unlock_irq(&dq->lock);
}
spin_lock(&block->queue_lock);
/* Get new request from the block device request queue */
__dasd_process_request_queue(block);
spin_lock_irq(&block->queue_lock);
/* Now check if the head of the ccw queue needs to be started. */
__dasd_block_start_head(block);
spin_unlock(&block->queue_lock);
spin_unlock_irq(&block->request_queue_lock);
spin_unlock_irq(&block->queue_lock);
if (waitqueue_active(&shutdown_waitq))
wake_up(&shutdown_waitq);
dasd_put_device(block->base);
@ -2977,14 +2888,13 @@ static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
{
struct dasd_block *block = cqr->block;
struct request *req;
unsigned long flags;
if (!block)
return -EINVAL;
spin_lock_irqsave(&block->request_queue_lock, flags);
spin_lock_irq(&cqr->dq->lock);
req = (struct request *) cqr->callback_data;
blk_requeue_request(block->request_queue, req);
spin_unlock_irqrestore(&block->request_queue_lock, flags);
blk_mq_requeue_request(req, false);
spin_unlock_irq(&cqr->dq->lock);
return 0;
}
@ -2999,6 +2909,7 @@ static int dasd_flush_block_queue(struct dasd_block *block)
struct dasd_ccw_req *cqr, *n;
int rc, i;
struct list_head flush_queue;
unsigned long flags;
INIT_LIST_HEAD(&flush_queue);
spin_lock_bh(&block->queue_lock);
@ -3037,11 +2948,11 @@ restart_cb:
goto restart_cb;
}
/* call the callback function */
spin_lock_irq(&block->request_queue_lock);
spin_lock_irqsave(&cqr->dq->lock, flags);
cqr->endclk = get_tod_clock();
list_del_init(&cqr->blocklist);
__dasd_cleanup_cqr(cqr);
spin_unlock_irq(&block->request_queue_lock);
spin_unlock_irqrestore(&cqr->dq->lock, flags);
}
return rc;
}
@ -3069,42 +2980,114 @@ EXPORT_SYMBOL(dasd_schedule_block_bh);
/*
* Dasd request queue function. Called from ll_rw_blk.c
*/
static void do_dasd_request(struct request_queue *queue)
static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *qd)
{
struct dasd_block *block;
struct dasd_block *block = hctx->queue->queuedata;
struct dasd_queue *dq = hctx->driver_data;
struct request *req = qd->rq;
struct dasd_device *basedev;
struct dasd_ccw_req *cqr;
blk_status_t rc = BLK_STS_OK;
block = queue->queuedata;
basedev = block->base;
spin_lock_irq(&dq->lock);
if (basedev->state < DASD_STATE_READY) {
DBF_DEV_EVENT(DBF_ERR, basedev,
"device not ready for request %p", req);
rc = BLK_STS_IOERR;
goto out;
}
/*
* if device is stopped do not fetch new requests
* except failfast is active which will let requests fail
* immediately in __dasd_block_start_head()
*/
if (basedev->stopped && !(basedev->features & DASD_FEATURE_FAILFAST)) {
DBF_DEV_EVENT(DBF_ERR, basedev,
"device stopped request %p", req);
rc = BLK_STS_RESOURCE;
goto out;
}
if (basedev->features & DASD_FEATURE_READONLY &&
rq_data_dir(req) == WRITE) {
DBF_DEV_EVENT(DBF_ERR, basedev,
"Rejecting write request %p", req);
rc = BLK_STS_IOERR;
goto out;
}
if (test_bit(DASD_FLAG_ABORTALL, &basedev->flags) &&
(basedev->features & DASD_FEATURE_FAILFAST ||
blk_noretry_request(req))) {
DBF_DEV_EVENT(DBF_ERR, basedev,
"Rejecting failfast request %p", req);
rc = BLK_STS_IOERR;
goto out;
}
cqr = basedev->discipline->build_cp(basedev, block, req);
if (IS_ERR(cqr)) {
if (PTR_ERR(cqr) == -EBUSY ||
PTR_ERR(cqr) == -ENOMEM ||
PTR_ERR(cqr) == -EAGAIN) {
rc = BLK_STS_RESOURCE;
goto out;
}
DBF_DEV_EVENT(DBF_ERR, basedev,
"CCW creation failed (rc=%ld) on request %p",
PTR_ERR(cqr), req);
rc = BLK_STS_IOERR;
goto out;
}
/*
* Note: callback is set to dasd_return_cqr_cb in
* __dasd_block_start_head to cover erp requests as well
*/
cqr->callback_data = req;
cqr->status = DASD_CQR_FILLED;
cqr->dq = dq;
req->completion_data = cqr;
blk_mq_start_request(req);
spin_lock(&block->queue_lock);
/* Get new request from the block device request queue */
__dasd_process_request_queue(block);
/* Now check if the head of the ccw queue needs to be started. */
__dasd_block_start_head(block);
list_add_tail(&cqr->blocklist, &block->ccw_queue);
INIT_LIST_HEAD(&cqr->devlist);
dasd_profile_start(block, cqr, req);
dasd_schedule_block_bh(block);
spin_unlock(&block->queue_lock);
out:
spin_unlock_irq(&dq->lock);
return rc;
}
/*
* Block timeout callback, called from the block layer
*
* request_queue lock is held on entry.
*
* Return values:
* BLK_EH_RESET_TIMER if the request should be left running
* BLK_EH_NOT_HANDLED if the request is handled or terminated
* by the driver.
*/
enum blk_eh_timer_return dasd_times_out(struct request *req)
enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved)
{
struct dasd_ccw_req *cqr = req->completion_data;
struct dasd_block *block = req->q->queuedata;
struct dasd_device *device;
unsigned long flags;
int rc = 0;
if (!cqr)
return BLK_EH_NOT_HANDLED;
spin_lock_irqsave(&cqr->dq->lock, flags);
device = cqr->startdev ? cqr->startdev : block->base;
if (!device->blk_timeout)
if (!device->blk_timeout) {
spin_unlock_irqrestore(&cqr->dq->lock, flags);
return BLK_EH_RESET_TIMER;
}
DBF_DEV_EVENT(DBF_WARNING, device,
" dasd_times_out cqr %p status %x",
cqr, cqr->status);
@ -3154,19 +3137,64 @@ enum blk_eh_timer_return dasd_times_out(struct request *req)
}
dasd_schedule_block_bh(block);
spin_unlock(&block->queue_lock);
spin_unlock_irqrestore(&cqr->dq->lock, flags);
return rc ? BLK_EH_RESET_TIMER : BLK_EH_NOT_HANDLED;
}
static int dasd_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int idx)
{
struct dasd_queue *dq = kzalloc(sizeof(*dq), GFP_KERNEL);
if (!dq)
return -ENOMEM;
spin_lock_init(&dq->lock);
hctx->driver_data = dq;
return 0;
}
static void dasd_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int idx)
{
kfree(hctx->driver_data);
hctx->driver_data = NULL;
}
static void dasd_request_done(struct request *req)
{
blk_mq_end_request(req, 0);
blk_mq_run_hw_queues(req->q, true);
}
static struct blk_mq_ops dasd_mq_ops = {
.queue_rq = do_dasd_request,
.complete = dasd_request_done,
.timeout = dasd_times_out,
.init_hctx = dasd_init_hctx,
.exit_hctx = dasd_exit_hctx,
};
/*
* Allocate and initialize request queue and default I/O scheduler.
*/
static int dasd_alloc_queue(struct dasd_block *block)
{
block->request_queue = blk_init_queue(do_dasd_request,
&block->request_queue_lock);
if (block->request_queue == NULL)
return -ENOMEM;
int rc;
block->tag_set.ops = &dasd_mq_ops;
block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES;
block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV;
block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
rc = blk_mq_alloc_tag_set(&block->tag_set);
if (rc)
return rc;
block->request_queue = blk_mq_init_queue(&block->tag_set);
if (IS_ERR(block->request_queue))
return PTR_ERR(block->request_queue);
block->request_queue->queuedata = block;
@ -3229,26 +3257,11 @@ static void dasd_free_queue(struct dasd_block *block)
{
if (block->request_queue) {
blk_cleanup_queue(block->request_queue);
blk_mq_free_tag_set(&block->tag_set);
block->request_queue = NULL;
}
}
/*
* Flush request on the request queue.
*/
static void dasd_flush_request_queue(struct dasd_block *block)
{
struct request *req;
if (!block->request_queue)
return;
spin_lock_irq(&block->request_queue_lock);
while ((req = blk_fetch_request(block->request_queue)))
__blk_end_request_all(req, BLK_STS_IOERR);
spin_unlock_irq(&block->request_queue_lock);
}
static int dasd_open(struct block_device *bdev, fmode_t mode)
{
struct dasd_device *base;
@ -3744,8 +3757,10 @@ int dasd_generic_path_operational(struct dasd_device *device)
return 1;
}
dasd_schedule_device_bh(device);
if (device->block)
if (device->block) {
dasd_schedule_block_bh(device->block);
blk_mq_run_hw_queues(device->block->request_queue, true);
}
if (!device->stopped)
wake_up(&generic_waitq);
@ -4008,8 +4023,10 @@ int dasd_generic_restore_device(struct ccw_device *cdev)
*/
device->stopped |= DASD_UNRESUMED_PM;
if (device->block)
if (device->block) {
dasd_schedule_block_bh(device->block);
blk_mq_run_hw_queues(device->block->request_queue, true);
}
clear_bit(DASD_FLAG_SUSPENDED, &device->flags);
dasd_put_device(device);

View File

@ -1326,7 +1326,7 @@ dasd_timeout_store(struct device *dev, struct device_attribute *attr,
{
struct dasd_device *device;
struct request_queue *q;
unsigned long val, flags;
unsigned long val;
device = dasd_device_from_cdev(to_ccwdev(dev));
if (IS_ERR(device) || !device->block)
@ -1342,16 +1342,10 @@ dasd_timeout_store(struct device *dev, struct device_attribute *attr,
dasd_put_device(device);
return -ENODEV;
}
spin_lock_irqsave(&device->block->request_queue_lock, flags);
if (!val)
blk_queue_rq_timed_out(q, NULL);
else
blk_queue_rq_timed_out(q, dasd_times_out);
device->blk_timeout = val;
blk_queue_rq_timeout(q, device->blk_timeout * HZ);
spin_unlock_irqrestore(&device->block->request_queue_lock, flags);
dasd_put_device(device);
return count;

View File

@ -56,6 +56,7 @@
#include <asm/dasd.h>
#include <asm/idals.h>
#include <linux/bitops.h>
#include <linux/blk-mq.h>
/* DASD discipline magic */
#define DASD_ECKD_MAGIC 0xC5C3D2C4
@ -185,6 +186,7 @@ struct dasd_ccw_req {
char status; /* status of this request */
short retries; /* A retry counter */
unsigned long flags; /* flags of this request */
struct dasd_queue *dq;
/* ... and how */
unsigned long starttime; /* jiffies time of request start */
@ -248,6 +250,16 @@ struct dasd_ccw_req {
#define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */
#define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */
/*
* There is no reliable way to determine the number of available CPUs on
* LPAR but there is no big performance difference between 1 and the
* maximum CPU number.
* 64 is a good trade off performance wise.
*/
#define DASD_NR_HW_QUEUES 64
#define DASD_MAX_LCU_DEV 256
#define DASD_REQ_PER_DEV 4
/* Signature for error recovery functions. */
typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *);
@ -539,6 +551,7 @@ struct dasd_block {
struct gendisk *gdp;
struct request_queue *request_queue;
spinlock_t request_queue_lock;
struct blk_mq_tag_set tag_set;
struct block_device *bdev;
atomic_t open_count;
@ -563,6 +576,10 @@ struct dasd_attention_data {
__u8 lpum;
};
struct dasd_queue {
spinlock_t lock;
};
/* reasons why device (ccw_device_start) was stopped */
#define DASD_STOPPED_NOT_ACC 1 /* not accessible */
#define DASD_STOPPED_QUIESCE 2 /* Quiesced */
@ -731,7 +748,7 @@ void dasd_free_device(struct dasd_device *);
struct dasd_block *dasd_alloc_block(void);
void dasd_free_block(struct dasd_block *);
enum blk_eh_timer_return dasd_times_out(struct request *req);
enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved);
void dasd_enable_device(struct dasd_device *);
void dasd_set_target_state(struct dasd_device *, int);

View File

@ -69,16 +69,19 @@ static inline struct ap_queue_status ap_rapq(ap_qid_t qid)
}
/**
* ap_aqic(): Enable interruption for a specific AP.
* ap_aqic(): Control interruption for a specific AP.
* @qid: The AP queue number
* @qirqctrl: struct ap_qirq_ctrl (64 bit value)
* @ind: The notification indicator byte
*
* Returns AP queue status.
*/
static inline struct ap_queue_status ap_aqic(ap_qid_t qid, void *ind)
static inline struct ap_queue_status ap_aqic(ap_qid_t qid,
struct ap_qirq_ctrl qirqctrl,
void *ind)
{
register unsigned long reg0 asm ("0") = qid | (3UL << 24);
register unsigned long reg1_in asm ("1") = (8UL << 44) | AP_ISC;
register struct ap_qirq_ctrl reg1_in asm ("1") = qirqctrl;
register struct ap_queue_status reg1_out asm ("1");
register void *reg2 asm ("2") = ind;

View File

@ -165,27 +165,52 @@ static int ap_configuration_available(void)
return test_facility(12);
}
/**
* ap_apft_available(): Test if AP facilities test (APFT)
* facility is available.
*
* Returns 1 if APFT is is available.
*/
static int ap_apft_available(void)
{
return test_facility(15);
}
/**
* ap_test_queue(): Test adjunct processor queue.
* @qid: The AP queue number
* @tbit: Test facilities bit
* @info: Pointer to queue descriptor
*
* Returns AP queue status structure.
*/
static inline struct ap_queue_status
ap_test_queue(ap_qid_t qid, unsigned long *info)
struct ap_queue_status ap_test_queue(ap_qid_t qid,
int tbit,
unsigned long *info)
{
if (test_facility(15))
qid |= 1UL << 23; /* set APFT T bit*/
if (tbit)
qid |= 1UL << 23; /* set T bit*/
return ap_tapq(qid, info);
}
EXPORT_SYMBOL(ap_test_queue);
static inline int ap_query_configuration(void)
/*
* ap_query_configuration(): Fetch cryptographic config info
*
* Returns the ap configuration info fetched via PQAP(QCI).
* On success 0 is returned, on failure a negative errno
* is returned, e.g. if the PQAP(QCI) instruction is not
* available, the return value will be -EOPNOTSUPP.
*/
int ap_query_configuration(struct ap_config_info *info)
{
if (!ap_configuration)
if (!ap_configuration_available())
return -EOPNOTSUPP;
return ap_qci(ap_configuration);
if (!info)
return -EINVAL;
return ap_qci(info);
}
EXPORT_SYMBOL(ap_query_configuration);
/**
* ap_init_configuration(): Allocate and query configuration array.
@ -198,7 +223,7 @@ static void ap_init_configuration(void)
ap_configuration = kzalloc(sizeof(*ap_configuration), GFP_KERNEL);
if (!ap_configuration)
return;
if (ap_query_configuration() != 0) {
if (ap_query_configuration(ap_configuration) != 0) {
kfree(ap_configuration);
ap_configuration = NULL;
return;
@ -261,7 +286,7 @@ static int ap_query_queue(ap_qid_t qid, int *queue_depth, int *device_type,
if (!ap_test_config_card_id(AP_QID_CARD(qid)))
return -ENODEV;
status = ap_test_queue(qid, &info);
status = ap_test_queue(qid, ap_apft_available(), &info);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
*queue_depth = (int)(info & 0xff);
@ -940,7 +965,9 @@ static int ap_select_domain(void)
for (j = 0; j < AP_DEVICES; j++) {
if (!ap_test_config_card_id(j))
continue;
status = ap_test_queue(AP_MKQID(j, i), NULL);
status = ap_test_queue(AP_MKQID(j, i),
ap_apft_available(),
NULL);
if (status.response_code != AP_RESPONSE_NORMAL)
continue;
count++;
@ -993,7 +1020,7 @@ static void ap_scan_bus(struct work_struct *unused)
AP_DBF(DBF_DEBUG, "ap_scan_bus running\n");
ap_query_configuration();
ap_query_configuration(ap_configuration);
if (ap_select_domain() != 0)
goto out;

View File

@ -28,6 +28,7 @@
#include <linux/device.h>
#include <linux/types.h>
#include <asm/ap.h>
#define AP_DEVICES 64 /* Number of AP devices. */
#define AP_DOMAINS 256 /* Number of AP domains. */
@ -40,41 +41,6 @@ extern int ap_domain_index;
extern spinlock_t ap_list_lock;
extern struct list_head ap_card_list;
/**
* The ap_qid_t identifier of an ap queue. It contains a
* 6 bit card index and a 4 bit queue index (domain).
*/
typedef unsigned int ap_qid_t;
#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255))
#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63)
#define AP_QID_QUEUE(_qid) ((_qid) & 255)
/**
* structy ap_queue_status - Holds the AP queue status.
* @queue_empty: Shows if queue is empty
* @replies_waiting: Waiting replies
* @queue_full: Is 1 if the queue is full
* @pad: A 4 bit pad
* @int_enabled: Shows if interrupts are enabled for the AP
* @response_code: Holds the 8 bit response code
* @pad2: A 16 bit pad
*
* The ap queue status word is returned by all three AP functions
* (PQAP, NQAP and DQAP). There's a set of flags in the first
* byte, followed by a 1 byte response code.
*/
struct ap_queue_status {
unsigned int queue_empty : 1;
unsigned int replies_waiting : 1;
unsigned int queue_full : 1;
unsigned int pad1 : 4;
unsigned int int_enabled : 1;
unsigned int response_code : 8;
unsigned int pad2 : 16;
} __packed;
static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
{
return (*ptr & (0x80000000u >> nr)) != 0;
@ -238,17 +204,6 @@ struct ap_message {
struct ap_message *);
};
struct ap_config_info {
unsigned int special_command:1;
unsigned int ap_extended:1;
unsigned char reserved1:6;
unsigned char reserved2[15];
unsigned int apm[8]; /* AP ID mask */
unsigned int aqm[8]; /* AP queue mask */
unsigned int adm[8]; /* AP domain mask */
unsigned char reserved4[16];
} __packed;
/**
* ap_init_message() - Initialize ap_message.
* Initialize a message before using. Otherwise this might result in

View File

@ -15,6 +15,25 @@
#include "ap_bus.h"
#include "ap_asm.h"
/**
* ap_queue_irq_ctrl(): Control interruption on a AP queue.
* @qirqctrl: struct ap_qirq_ctrl (64 bit value)
* @ind: The notification indicator byte
*
* Returns AP queue status.
*
* Control interruption on the given AP queue.
* Just a simple wrapper function for the low level PQAP(AQIC)
* instruction available for other kernel modules.
*/
struct ap_queue_status ap_queue_irq_ctrl(ap_qid_t qid,
struct ap_qirq_ctrl qirqctrl,
void *ind)
{
return ap_aqic(qid, qirqctrl, ind);
}
EXPORT_SYMBOL(ap_queue_irq_ctrl);
/**
* ap_queue_enable_interruption(): Enable interruption on an AP queue.
* @qid: The AP queue number
@ -27,8 +46,11 @@
static int ap_queue_enable_interruption(struct ap_queue *aq, void *ind)
{
struct ap_queue_status status;
struct ap_qirq_ctrl qirqctrl = { 0 };
status = ap_aqic(aq->qid, ind);
qirqctrl.ir = 1;
qirqctrl.isc = AP_ISC;
status = ap_aqic(aq->qid, qirqctrl, ind);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_OTHERWISE_CHANGED:
@ -362,7 +384,7 @@ static enum ap_wait ap_sm_setirq_wait(struct ap_queue *aq)
/* Get the status with TAPQ */
status = ap_tapq(aq->qid, NULL);
if (status.int_enabled == 1) {
if (status.irq_enabled == 1) {
/* Irqs are now enabled */
aq->interrupt = AP_INTR_ENABLED;
aq->state = (aq->queue_count > 0) ?