From 238ec4efeee4461d5cff2ed3e5a15a3ab850959b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:07 +0200 Subject: [PATCH 01/48] [S390] zero page cache synonyms If the zero page is mapped to virtual user space addresses that differ only in bit 2^12 or 2^13 we get L1 cache synonyms which can affect performance. Follow the mips model and use multiple zero pages to avoid the synonyms. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 22 +++++++++++++-- arch/s390/mm/init.c | 49 ++++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3157441ee1da..22a294571000 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -46,11 +46,27 @@ extern void vmem_map_init(void); #define update_mmu_cache(vma, address, ptep) do { } while (0) /* - * ZERO_PAGE is a global shared page that is always zero: used + * ZERO_PAGE is a global shared page that is always zero; used * for zero-mapped memory areas etc.. */ -extern char empty_zero_page[PAGE_SIZE]; -#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +extern unsigned long empty_zero_page; +extern unsigned long zero_page_mask; + +#define ZERO_PAGE(vaddr) \ + (virt_to_page((void *)(empty_zero_page + \ + (((unsigned long)(vaddr)) &zero_page_mask)))) + +#define is_zero_pfn is_zero_pfn +static inline int is_zero_pfn(unsigned long pfn) +{ + extern unsigned long zero_pfn; + unsigned long offset_from_zero_pfn = pfn - zero_pfn; + return offset_from_zero_pfn <= (zero_page_mask >> PAGE_SHIFT); +} + +#define my_zero_pfn(addr) page_to_pfn(ZERO_PAGE(addr)) + #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 94b8ba2ec857..0744fb3536b1 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -42,9 +42,52 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); -char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); +unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); +static unsigned long setup_zero_pages(void) +{ + struct cpuid cpu_id; + unsigned int order; + unsigned long size; + struct page *page; + int i; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x9672: /* g5 */ + case 0x2064: /* z900 */ + case 0x2066: /* z900 */ + case 0x2084: /* z990 */ + case 0x2086: /* z990 */ + case 0x2094: /* z9-109 */ + case 0x2096: /* z9-109 */ + order = 0; + break; + case 0x2097: /* z10 */ + case 0x2098: /* z10 */ + default: + order = 2; + break; + } + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Out of memory in setup_zero_pages"); + + page = virt_to_page((void *) empty_zero_page); + split_page(page, order); + for (i = 1 << order; i > 0; i--) { + SetPageReserved(page); + page++; + } + + size = PAGE_SIZE << order; + zero_page_mask = (size - 1) & PAGE_MASK; + + return 1UL << order; +} + /* * paging_init() sets up the page tables */ @@ -92,14 +135,12 @@ void __init mem_init(void) max_mapnr = num_physpages = max_low_pfn; high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - /* clear the zero-page */ - memset(empty_zero_page, 0, PAGE_SIZE); - /* Setup guest page hinting */ cmma_init(); /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); + totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */ reservedpages = 0; From 7ea8d32199a3751527bf06b91c03de92d5ad5b3e Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Mon, 25 Oct 2010 16:10:08 +0200 Subject: [PATCH 02/48] [S390] dasd: let recovery cqr inherit flags from failed cqr The usual way to recover a failed DASD ECKD request (cqr) is to create a new request with an appropriate recovery CCW program. Certain features, e.g. failfast, can be enabled per request and are stored in the requests flags. These flags have to be copied from the failed to the recovery request, to let the recovery request use the same features as the original one. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_3990_erp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index e82d427ff5eb..968c76cf7127 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -221,6 +221,7 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier) ccw->cmd_code = CCW_CMD_DCTL; ccw->count = 4; ccw->cda = (__u32)(addr_t) DCTL_data; + dctl_cqr->flags = erp->flags; dctl_cqr->function = dasd_3990_erp_DCTL; dctl_cqr->refers = erp; dctl_cqr->startdev = device; @@ -1710,6 +1711,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense) ccw->cda = cpa; /* fill erp related fields */ + erp->flags = default_erp->flags; erp->function = dasd_3990_erp_action_1B_32; erp->refers = default_erp->refers; erp->startdev = device; @@ -2354,6 +2356,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) ccw->cda = (long)(cqr->cpaddr); } + erp->flags = cqr->flags; erp->function = dasd_3990_erp_add_erp; erp->refers = cqr; erp->startdev = device; From 014859430ba72ffeb363a4acd2200851765fe6de Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:09 +0200 Subject: [PATCH 03/48] [S390] standardize Kbuild rules Introducing this Kbuild file allow us to: make arch/s390/ And thus building all the core part of s390. Same as on other architectures. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kbuild | 6 ++++++ arch/s390/Makefile | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 arch/s390/Kbuild diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild new file mode 100644 index 000000000000..ae4b01060edd --- /dev/null +++ b/arch/s390/Kbuild @@ -0,0 +1,6 @@ +obj-y += kernel/ +obj-y += mm/ +obj-y += crypto/ +obj-y += appldata/ +obj-y += hypfs/ +obj-y += kvm/ diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 0c9e6c6d2a64..9d318476f81f 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -94,8 +94,8 @@ head-y := arch/s390/kernel/head.o head-y += arch/s390/kernel/$(if $(CONFIG_64BIT),head64.o,head31.o) head-y += arch/s390/kernel/init_task.o -core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ - arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ +# See arch/s390/Kbuild for content of core part of the kernel +core-y += arch/s390/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ From 87799ebab760dd1460f6e4193d4f71ba416d1451 Mon Sep 17 00:00:00 2001 From: Christof Schmitt Date: Mon, 25 Oct 2010 16:10:10 +0200 Subject: [PATCH 04/48] [S390] set ARCH_HAS_SG_CHAIN for s390 Set ARCH_HAS_SG_CHAIN for scatter lists on s390. Without this flag the SCSI code limits the maximum number of segments, so set it to make proper use of the FCP channel hardware. Signed-off-by: Christof Schmitt Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/scatterlist.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h index 35d786fe93ae..6d45ef6c12a7 100644 --- a/arch/s390/include/asm/scatterlist.h +++ b/arch/s390/include/asm/scatterlist.h @@ -1 +1,3 @@ #include + +#define ARCH_HAS_SG_CHAIN From 80217147a3d80c8a4e48f06e2f6e965455f3fe2a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:11 +0200 Subject: [PATCH 05/48] [S390] lockless get_user_pages_fast() Implement get_user_pages_fast without locking in the fastpath on s390. Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/include/asm/pgalloc.h | 4 + arch/s390/include/asm/pgtable.h | 1 + arch/s390/include/asm/tlb.h | 13 +- arch/s390/mm/Makefile | 2 +- arch/s390/mm/gup.c | 225 ++++++++++++++++++++++++++++++++ arch/s390/mm/hugetlbpage.c | 2 +- arch/s390/mm/init.c | 2 - arch/s390/mm/pgtable.c | 173 +++++++++++++++++++++--- 9 files changed, 395 insertions(+), 28 deletions(-) create mode 100644 arch/s390/mm/gup.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 75976a141947..7afc17340500 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -101,6 +101,7 @@ config S390 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO + select HAVE_GET_USER_PAGES_FAST select ARCH_INLINE_SPIN_TRYLOCK select ARCH_INLINE_SPIN_TRYLOCK_BH select ARCH_INLINE_SPIN_LOCK diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 68940d0bad91..082eb4e50e8b 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -21,9 +21,11 @@ unsigned long *crst_table_alloc(struct mm_struct *, int); void crst_table_free(struct mm_struct *, unsigned long *); +void crst_table_free_rcu(struct mm_struct *, unsigned long *); unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); +void page_table_free_rcu(struct mm_struct *, unsigned long *); void disable_noexec(struct mm_struct *, struct task_struct *); static inline void clear_table(unsigned long *s, unsigned long val, size_t n) @@ -176,4 +178,6 @@ static inline void pmd_populate(struct mm_struct *mm, #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte) #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte) +extern void rcu_table_freelist_finish(void); + #endif /* _S390_PGALLOC_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 22a294571000..785229ae39cb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -316,6 +316,7 @@ extern unsigned long VMALLOC_START; /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ +#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ #define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index fd1c00d08bf5..f1f644f2240a 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -64,10 +64,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb, if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < TLB_NR_PTRS)) __tlb_flush_mm(tlb->mm); while (tlb->nr_ptes > 0) - pte_free(tlb->mm, tlb->array[--tlb->nr_ptes]); + page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]); while (tlb->nr_pxds < TLB_NR_PTRS) - /* pgd_free frees the pointer as region or segment table */ - pgd_free(tlb->mm, tlb->array[tlb->nr_pxds++]); + crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]); } static inline void tlb_finish_mmu(struct mmu_gather *tlb, @@ -75,6 +74,8 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb, { tlb_flush_mmu(tlb, start, end); + rcu_table_freelist_finish(); + /* keep the page table cache within bounds */ check_pgt_cache(); @@ -103,7 +104,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pte_free(tlb->mm, pte); + page_table_free(tlb->mm, (unsigned long *) pte); } /* @@ -124,7 +125,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pmd_free(tlb->mm, pmd); + crst_table_free(tlb->mm, (unsigned long *) pmd); #endif } @@ -146,7 +147,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, if (tlb->nr_ptes >= tlb->nr_pxds) tlb_flush_mmu(tlb, 0, 0); } else - pud_free(tlb->mm, pud); + crst_table_free(tlb->mm, (unsigned long *) pud); #endif } diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index eec054484419..6fbc6f3fbdf2 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -3,6 +3,6 @@ # obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ - page-states.o + page-states.o gup.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c new file mode 100644 index 000000000000..38e641cdd977 --- /dev/null +++ b/arch/s390/mm/gup.c @@ -0,0 +1,225 @@ +/* + * Lockless get_user_pages_fast for s390 + * + * Copyright IBM Corp. 2010 + * Author(s): Martin Schwidefsky + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * The performance critical leaf functions are made noinline otherwise gcc + * inlines everything into a single function which results in too much + * register pressure. + */ +static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + pte_t *ptep, pte; + struct page *page; + + result = write ? 0 : _PAGE_RO; + mask = result | _PAGE_INVALID | _PAGE_SPECIAL; + + ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); + do { + pte = *ptep; + barrier(); + if ((pte_val(pte) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); + page = pte_page(pte); + if (!page_cache_get_speculative(page)) + return 0; + if (unlikely(pte_val(pte) != pte_val(*ptep))) { + put_page(page); + return 0; + } + pages[*nr] = page; + (*nr)++; + + } while (ptep++, addr += PAGE_SIZE, addr != end); + + return 1; +} + +static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long mask, result; + struct page *head, *page; + int refs; + + result = write ? 0 : _SEGMENT_ENTRY_RO; + mask = result | _SEGMENT_ENTRY_INV; + if ((pmd_val(pmd) & mask) != result) + return 0; + VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); + + refs = 0; + head = pmd_page(pmd); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { + *nr -= refs; + while (refs--) + put_page(head); + } + + return 1; +} + + +static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pmd_t *pmdp, pmd; + + pmdp = (pmd_t *) pudp; +#ifdef CONFIG_64BIT + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + pmdp = (pmd_t *) pud_deref(pud); + pmdp += pmd_index(addr); +#endif + do { + pmd = *pmdp; + barrier(); + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + return 0; + if (unlikely(pmd_huge(pmd))) { + if (!gup_huge_pmd(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } while (pmdp++, addr = next, addr != end); + + return 1; +} + +static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) +{ + unsigned long next; + pud_t *pudp, pud; + + pudp = (pud_t *) pgdp; +#ifdef CONFIG_64BIT + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2) + pudp = (pud_t *) pgd_deref(pgd); + pudp += pud_index(addr); +#endif + do { + pud = *pudp; + barrier(); + next = pud_addr_end(addr, end); + if (pud_none(pud)) + return 0; + if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr)) + return 0; + } while (pudp++, addr = next, addr != end); + + return 1; +} + +/** + * get_user_pages_fast() - pin user pages in memory + * @start: starting user address + * @nr_pages: number of pages from start to pin + * @write: whether pages will be written to + * @pages: array that receives pointers to the pages pinned. + * Should be at least nr_pages long. + * + * Attempt to pin user pages in memory without taking mm->mmap_sem. + * If not successful, it will fall back to taking the lock and + * calling get_user_pages(). + * + * Returns number of pages pinned. This may be fewer than the number + * requested. If nr_pages is 0 or negative, returns 0. If no pages + * were pinned, returns -errno. + */ +int get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages) +{ + struct mm_struct *mm = current->mm; + unsigned long addr, len, end; + unsigned long next; + pgd_t *pgdp, pgd; + int nr = 0; + + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; + if (end < start) + goto slow_irqon; + + /* + * local_irq_disable() doesn't prevent pagetable teardown, but does + * prevent the pagetables from being freed on s390. + * + * So long as we atomically load page table pointers versus teardown, + * we can follow the address down to the the page and take a ref on it. + */ + local_irq_disable(); + pgdp = pgd_offset(mm, addr); + do { + pgd = *pgdp; + barrier(); + next = pgd_addr_end(addr, end); + if (pgd_none(pgd)) + goto slow; + if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr)) + goto slow; + } while (pgdp++, addr = next, addr != end); + local_irq_enable(); + + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); + return nr; + + { + int ret; +slow: + local_irq_enable(); +slow_irqon: + /* Try to get the remaining pages with get_user_pages */ + start += nr << PAGE_SHIFT; + pages += nr; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); + up_read(&mm->mmap_sem); + + /* Have to be a bit careful with return values */ + if (nr > 0) { + if (ret < 0) + ret = nr; + else + ret += nr; + } + + return ret; + } +} diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f28c43d2f61d..639cd21f2218 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -68,7 +68,7 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; - pte_free(&init_mm, ptep); + page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; } diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 0744fb3536b1..852a3fec1ece 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -38,8 +38,6 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); unsigned long empty_zero_page, zero_page_mask; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 8d999249d357..19338d228c9b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,67 @@ #include #include +struct rcu_table_freelist { + struct rcu_head rcu; + struct mm_struct *mm; + unsigned int pgt_index; + unsigned int crst_index; + unsigned long *table[0]; +}; + +#define RCU_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \ + / sizeof(unsigned long)) + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist); + +static void __page_table_free(struct mm_struct *mm, unsigned long *table); +static void __crst_table_free(struct mm_struct *mm, unsigned long *table); + +static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm) +{ + struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist); + struct rcu_table_freelist *batch = *batchp; + + if (batch) + return batch; + batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC); + if (batch) { + batch->mm = mm; + batch->pgt_index = 0; + batch->crst_index = RCU_FREELIST_SIZE; + *batchp = batch; + } + return batch; +} + +static void rcu_table_freelist_callback(struct rcu_head *head) +{ + struct rcu_table_freelist *batch = + container_of(head, struct rcu_table_freelist, rcu); + + while (batch->pgt_index > 0) + __page_table_free(batch->mm, batch->table[--batch->pgt_index]); + while (batch->crst_index < RCU_FREELIST_SIZE) + __crst_table_free(batch->mm, batch->table[batch->crst_index++]); + free_page((unsigned long) batch); +} + +void rcu_table_freelist_finish(void) +{ + struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); + + if (!batch) + return; + call_rcu(&batch->rcu, rcu_table_freelist_callback); + __get_cpu_var(rcu_table_freelist) = NULL; +} + +static void smp_sync(void *arg) +{ +} + #ifndef CONFIG_64BIT #define ALLOC_ORDER 1 #define TABLES_PER_PAGE 4 @@ -78,23 +140,53 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) } page->index = page_to_phys(shadow); } - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.crst_list); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } +static void __crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + unsigned long *shadow = get_shadow_table(table); + + if (shadow) + free_pages((unsigned long) shadow, ALLOC_ORDER); + free_pages((unsigned long) table, ALLOC_ORDER); +} + void crst_table_free(struct mm_struct *mm, unsigned long *table) { - unsigned long *shadow = get_shadow_table(table); struct page *page = virt_to_page(table); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_del(&page->lru); - spin_unlock(&mm->context.list_lock); - if (shadow) - free_pages((unsigned long) shadow, ALLOC_ORDER); - free_pages((unsigned long) table, ALLOC_ORDER); + spin_unlock_bh(&mm->context.list_lock); + __crst_table_free(mm, table); +} + +void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) +{ + struct rcu_table_freelist *batch; + struct page *page = virt_to_page(table); + + spin_lock_bh(&mm->context.list_lock); + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (atomic_read(&mm->mm_users) < 2 && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + __crst_table_free(mm, table); + return; + } + batch = rcu_table_freelist_get(mm); + if (!batch) { + smp_call_function(smp_sync, NULL, 1); + __crst_table_free(mm, table); + return; + } + batch->table[--batch->crst_index] = table; + if (batch->pgt_index >= batch->crst_index) + rcu_table_freelist_finish(); } #ifdef CONFIG_64BIT @@ -108,7 +200,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) table = crst_table_alloc(mm, mm->context.noexec); if (!table) return -ENOMEM; - spin_lock(&mm->page_table_lock); + spin_lock_bh(&mm->page_table_lock); if (mm->context.asce_limit < limit) { pgd = (unsigned long *) mm->pgd; if (mm->context.asce_limit <= (1UL << 31)) { @@ -130,7 +222,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) mm->task_size = mm->context.asce_limit; table = NULL; } - spin_unlock(&mm->page_table_lock); + spin_unlock_bh(&mm->page_table_lock); if (table) crst_table_free(mm, table); if (mm->context.asce_limit < limit) @@ -182,7 +274,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long bits; bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, @@ -191,7 +283,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page = NULL; } if (!page) { - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; @@ -202,7 +294,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) clear_table_pgstes(table); else clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } table = (unsigned long *) page_to_phys(page); @@ -213,10 +305,25 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page->flags |= bits; if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) list_move_tail(&page->lru, &mm->context.pgtable_list); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); return table; } +static void __page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned long bits; + + bits = ((unsigned long) table) & 15; + table = (unsigned long *)(((unsigned long) table) ^ bits); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + page->flags ^= bits; + if (!(page->flags & FRAG_MASK)) { + pgtable_page_dtor(page); + __free_page(page); + } +} + void page_table_free(struct mm_struct *mm, unsigned long *table) { struct page *page; @@ -225,7 +332,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); page->flags ^= bits; if (page->flags & FRAG_MASK) { /* Page now has some free pgtable fragments. */ @@ -234,18 +341,48 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } else /* All fragments of the 4K page have been freed. */ list_del(&page->lru); - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); if (page) { pgtable_page_dtor(page); __free_page(page); } } +void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) +{ + struct rcu_table_freelist *batch; + struct page *page; + unsigned long bits; + + if (atomic_read(&mm->mm_users) < 2 && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { + page_table_free(mm, table); + return; + } + batch = rcu_table_freelist_get(mm); + if (!batch) { + smp_call_function(smp_sync, NULL, 1); + page_table_free(mm, table); + return; + } + bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; + bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + spin_lock_bh(&mm->context.list_lock); + /* Delayed freeing with rcu prevents reuse of pgtable fragments */ + list_del_init(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *)(((unsigned long) table) | bits); + batch->table[batch->pgt_index++] = table; + if (batch->pgt_index >= batch->crst_index) + rcu_table_freelist_finish(); +} + void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) { struct page *page; - spin_lock(&mm->context.list_lock); + spin_lock_bh(&mm->context.list_lock); /* Free shadow region and segment tables. */ list_for_each_entry(page, &mm->context.crst_list, lru) if (page->index) { @@ -255,7 +392,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) /* "Free" second halves of page tables. */ list_for_each_entry(page, &mm->context.pgtable_list, lru) page->flags &= ~SECOND_HALVES; - spin_unlock(&mm->context.list_lock); + spin_unlock_bh(&mm->context.list_lock); mm->context.noexec = 0; update_mm(mm, tsk); } From 6931be0803ddae2791f3c646c8e1e0f82ca26013 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:12 +0200 Subject: [PATCH 06/48] [S390] cpu hotplug/idle: move cpu_die call to enabled context There is no difference if cpu_die is called from enabled or disabled context. Except that the fast_gup code might be called via cpu_die -> idle_task_exit -> __mm_drop -> crst_table_free. Which in turn grabs and releases a spinlock using the _bh ops, which is not allowed in irq disabled context, since spin_unlock_bh will unconditionally enable interrupts again. To get rid of the warning emitted by the softirq code just move the code to enabled context. In this case this doesn't fix a bug, we just get rid of a warning. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index d3a2d1c6438e..ec2e03b22ead 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -76,17 +76,17 @@ unsigned long thread_saved_pc(struct task_struct *tsk) static void default_idle(void) { /* CPU is going idle. */ - local_irq_disable(); - if (need_resched()) { - local_irq_enable(); - return; - } #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) { preempt_enable_no_resched(); cpu_die(); } #endif + local_irq_disable(); + if (need_resched()) { + local_irq_enable(); + return; + } local_mcck_disable(); if (test_thread_flag(TIF_MCCK_PENDING)) { local_mcck_enable(); From 92f842eac7ee321c8a0749aba2513541b4ac226f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:13 +0200 Subject: [PATCH 07/48] [S390] store indication fault optimization Use the store indication bit in the translation exception code on page faults to avoid the protection faults that immediatly follow the page fault if the access has been a write. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 1 + arch/s390/mm/fault.c | 21 ++++++++++++++++++--- arch/s390/mm/init.c | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 785229ae39cb..85cd4b039de6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -38,6 +38,7 @@ extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); extern void vmem_map_init(void); +extern void fault_init(void); /* * The S390 doesn't have any external MMU info: the kernel page diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 2505b2ea0ef1..b49d12073f10 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -52,6 +52,19 @@ #define VM_FAULT_BADMAP 0x020000 #define VM_FAULT_BADACCESS 0x040000 +static unsigned long store_indication; + +void fault_init(void) +{ + unsigned long long facility_list[2]; + + if (stfle(facility_list, 2) < 2) + return; + if ((facility_list[0] & (1ULL << 61)) && + (facility_list[1] & (1ULL << 52))) + store_indication = 0xc00; +} + static inline int notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -294,7 +307,7 @@ static inline int do_exception(struct pt_regs *regs, int access, struct mm_struct *mm; struct vm_area_struct *vma; unsigned long address; - int fault; + int fault, write; if (notify_page_fault(regs)) return 0; @@ -348,8 +361,10 @@ static inline int do_exception(struct pt_regs *regs, int access, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, - (access == VM_WRITE) ? FAULT_FLAG_WRITE : 0); + write = (access == VM_WRITE || + (trans_exc_code & store_indication) == 0x400) ? + FAULT_FLAG_WRITE : 0; + fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) goto out_up; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 852a3fec1ece..bb409332a484 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -124,6 +124,7 @@ void __init paging_init(void) #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); + fault_init(); } void __init mem_init(void) From e2b8d7af0e3a9234de06606f9151f28cf847a8d6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:14 +0200 Subject: [PATCH 08/48] [S390] add support for nonquiescing sske Improve performance of the sske operation by using the nonquiescing variant if the affected page has no mappings established. On machines with no support for the new sske variant the mask bit will be ignored. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 8 ++++++-- arch/s390/include/asm/pgtable.h | 8 ++++---- arch/s390/kernel/early.c | 3 ++- arch/s390/kernel/setup.c | 3 ++- include/asm-generic/pgtable.h | 2 +- include/linux/page-flags.h | 2 +- mm/rmap.c | 4 ++-- 7 files changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index af650fb47206..a8729ea7e9ac 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -108,9 +108,13 @@ typedef pte_t *pgtable_t; #define __pgprot(x) ((pgprot_t) { (x) } ) static inline void -page_set_storage_key(unsigned long addr, unsigned int skey) +page_set_storage_key(unsigned long addr, unsigned int skey, int mapped) { - asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); + if (!mapped) + asm volatile(".insn rrf,0xb22b0000,%0,%1,8,0" + : : "d" (skey), "a" (addr)); + else + asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); } static inline unsigned int diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 85cd4b039de6..f79e7bb9ae1e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -590,7 +590,7 @@ static inline void rcp_unlock(pte_t *ptep) } /* forward declaration for SetPageUptodate in page-flags.h*/ -static inline void page_clear_dirty(struct page *page); +static inline void page_clear_dirty(struct page *page, int mapped); #include static inline void ptep_rcp_copy(pte_t *ptep) @@ -800,7 +800,7 @@ static inline int kvm_s390_test_and_clear_page_dirty(struct mm_struct *mm, } dirty = test_and_clear_bit_simple(KVM_UD_BIT, pgste); if (skey & _PAGE_CHANGED) - page_clear_dirty(page); + page_clear_dirty(page, 1); rcp_unlock(ptep); return dirty; } @@ -975,9 +975,9 @@ static inline int page_test_dirty(struct page *page) } #define __HAVE_ARCH_PAGE_CLEAR_DIRTY -static inline void page_clear_dirty(struct page *page) +static inline void page_clear_dirty(struct page *page, int mapped) { - page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY); + page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, mapped); } /* diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index c00856ad4e5a..0badc6344eb4 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -208,7 +208,8 @@ static noinline __init void init_kernel_storage_key(void) end_pfn = PFN_UP(__pa(&_end)); for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++) - page_set_storage_key(init_pfn << PAGE_SHIFT, PAGE_DEFAULT_KEY); + page_set_storage_key(init_pfn << PAGE_SHIFT, + PAGE_DEFAULT_KEY, 0); } static __initdata struct sysinfo_3_2_2 vmms __aligned(PAGE_SIZE); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c8e8e1354e1d..9071e984dcf1 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -627,7 +627,8 @@ setup_memory(void) add_active_range(0, start_chunk, end_chunk); pfn = max(start_chunk, start_pfn); for (; pfn < end_chunk; pfn++) - page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY); + page_set_storage_key(PFN_PHYS(pfn), + PAGE_DEFAULT_KEY, 0); } psw_set_key(PAGE_DEFAULT_KEY); diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f4d4120e5128..6f3c6ae4fe03 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -108,7 +108,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #endif #ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY -#define page_clear_dirty(page) do { } while (0) +#define page_clear_dirty(page, mapped) do { } while (0) #endif #ifndef __HAVE_ARCH_PAGE_TEST_DIRTY diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6fa317801e1c..5f38c460367e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -310,7 +310,7 @@ static inline void SetPageUptodate(struct page *page) { #ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_clear_dirty(page); + page_clear_dirty(page, 0); #else /* * Memory barrier must be issued before setting the PG_uptodate bit, diff --git a/mm/rmap.c b/mm/rmap.c index 92e6757f196e..5f17fad1bee8 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -745,7 +745,7 @@ int page_mkclean(struct page *page) if (mapping) { ret = page_mkclean_file(mapping, page); if (page_test_dirty(page)) { - page_clear_dirty(page); + page_clear_dirty(page, 1); ret = 1; } } @@ -942,7 +942,7 @@ void page_remove_rmap(struct page *page) * containing the swap entry, but page not yet written to swap. */ if ((!PageAnon(page) || PageSwapCache(page)) && page_test_dirty(page)) { - page_clear_dirty(page); + page_clear_dirty(page, 1); set_page_dirty(page); } /* From 8b8c12b120071d1db2212a439ccfebcb0ab0cf2a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:15 +0200 Subject: [PATCH 09/48] [S390] add z196 instructions to kernel disassembler Add the new instructions introduced with z196 to the kernel disassembler. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 145 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 132 insertions(+), 13 deletions(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index b39b27d68b45..c83726c9fe03 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -113,7 +113,7 @@ enum { INSTR_INVALID, INSTR_E, INSTR_RIE_R0IU, INSTR_RIE_R0UU, INSTR_RIE_RRP, INSTR_RIE_RRPU, - INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, + INSTR_RIE_RRUUU, INSTR_RIE_RUPI, INSTR_RIE_RUPU, INSTR_RIE_RRI0, INSTR_RIL_RI, INSTR_RIL_RP, INSTR_RIL_RU, INSTR_RIL_UP, INSTR_RIS_R0RDU, INSTR_RIS_R0UU, INSTR_RIS_RURDI, INSTR_RIS_RURDU, INSTR_RI_RI, INSTR_RI_RP, INSTR_RI_RU, INSTR_RI_UP, @@ -122,13 +122,14 @@ enum { INSTR_RRE_RR, INSTR_RRE_RR_OPT, INSTR_RRF_0UFF, INSTR_RRF_F0FF, INSTR_RRF_F0FF2, INSTR_RRF_F0FR, INSTR_RRF_FFRU, INSTR_RRF_FUFF, INSTR_RRF_M0RR, INSTR_RRF_R0RR, - INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, INSTR_RRF_U0RR, - INSTR_RRF_UUFF, INSTR_RRR_F0FF, INSTR_RRS_RRRDU, + INSTR_RRF_R0RR2, INSTR_RRF_RURR, INSTR_RRF_U0FF, INSTR_RRF_U0RF, + INSTR_RRF_U0RR, INSTR_RRF_UUFF, INSTR_RRR_F0FF, INSTR_RRS_RRRDU, INSTR_RR_FF, INSTR_RR_R0, INSTR_RR_RR, INSTR_RR_U0, INSTR_RR_UR, INSTR_RSE_CCRD, INSTR_RSE_RRRD, INSTR_RSE_RURD, INSTR_RSI_RRP, INSTR_RSL_R0RD, INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, + INSTR_RSY_RDRM, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, INSTR_RS_RURD, INSTR_RXE_FRRD, INSTR_RXE_RRRD, @@ -139,7 +140,7 @@ enum { INSTR_SIY_IRD, INSTR_SIY_URD, INSTR_SI_URD, INSTR_SSE_RDRD, - INSTR_SSF_RRDRD, + INSTR_SSF_RRDRD, INSTR_SSF_RRDRD2, INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD, INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3, INSTR_S_00, INSTR_S_RD, @@ -152,7 +153,7 @@ struct operand { }; struct insn { - const char name[6]; + const char name[5]; unsigned char opfrag; unsigned char format; }; @@ -217,6 +218,7 @@ static const unsigned char formats[][7] = { [INSTR_RIE_RRP] = { 0xff, R_8,R_12,J16_16,0,0,0 }, [INSTR_RIE_RRUUU] = { 0xff, R_8,R_12,U8_16,U8_24,U8_32,0 }, [INSTR_RIE_RUPI] = { 0xff, R_8,I8_32,U4_12,J16_16,0,0 }, + [INSTR_RIE_RRI0] = { 0xff, R_8,R_12,I16_16,0,0,0 }, [INSTR_RIL_RI] = { 0x0f, R_8,I32_16,0,0,0,0 }, [INSTR_RIL_RP] = { 0x0f, R_8,J32_16,0,0,0,0 }, [INSTR_RIL_RU] = { 0x0f, R_8,U32_16,0,0,0,0 }, @@ -248,6 +250,7 @@ static const unsigned char formats[][7] = { [INSTR_RRF_FUFF] = { 0xff, F_24,F_16,F_28,U4_20,0,0 }, [INSTR_RRF_M0RR] = { 0xff, R_24,R_28,M_16,0,0,0 }, [INSTR_RRF_R0RR] = { 0xff, R_24,R_16,R_28,0,0,0 }, + [INSTR_RRF_R0RR2] = { 0xff, R_24,R_28,R_16,0,0,0 }, [INSTR_RRF_RURR] = { 0xff, R_24,R_28,R_16,U4_20,0,0 }, [INSTR_RRF_U0FF] = { 0xff, F_24,U4_16,F_28,0,0,0 }, [INSTR_RRF_U0RF] = { 0xff, R_24,U4_16,F_28,0,0,0 }, @@ -269,6 +272,7 @@ static const unsigned char formats[][7] = { [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, + [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, [INSTR_RS_CCRD] = { 0xff, C_8,C_12,D_20,B_16,0,0 }, [INSTR_RS_R0RD] = { 0xff, R_8,D_20,B_16,0,0,0 }, @@ -290,6 +294,7 @@ static const unsigned char formats[][7] = { [INSTR_SI_URD] = { 0xff, D_20,B_16,U8_8,0,0,0 }, [INSTR_SSE_RDRD] = { 0xff, D_20,B_16,D_36,B_32,0,0 }, [INSTR_SSF_RRDRD] = { 0x00, D_20,B_16,D_36,B_32,R_8,0 }, + [INSTR_SSF_RRDRD2]= { 0x00, R_8,D_20,B_16,D_36,B_32,0 }, [INSTR_SS_L0RDRD] = { 0xff, D_20,L8_8,B_16,D_36,B_32,0 }, [INSTR_SS_LIRDRD] = { 0xff, D_20,L4_8,B_16,D_36,B_32,U4_12 }, [INSTR_SS_LLRDRD] = { 0xff, D_20,L4_8,B_16,D_36,L4_12,B_32 }, @@ -300,6 +305,36 @@ static const unsigned char formats[][7] = { [INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 }, }; +enum { + LONG_INSN_ALGHSIK, + LONG_INSN_ALHSIK, + LONG_INSN_CLFHSI, + LONG_INSN_CLGFRL, + LONG_INSN_CLGHRL, + LONG_INSN_CLGHSI, + LONG_INSN_CLHHSI, + LONG_INSN_LLGFRL, + LONG_INSN_LLGHRL, + LONG_INSN_POPCNT, + LONG_INSN_RISBHG, + LONG_INSN_RISBLG, +}; + +static char *long_insn_name[] = { + [LONG_INSN_ALGHSIK] = "alghsik", + [LONG_INSN_ALHSIK] = "alhsik", + [LONG_INSN_CLFHSI] = "clfhsi", + [LONG_INSN_CLGFRL] = "clgfrl", + [LONG_INSN_CLGHRL] = "clghrl", + [LONG_INSN_CLGHSI] = "clghsi", + [LONG_INSN_CLHHSI] = "clhhsi", + [LONG_INSN_LLGFRL] = "llgfrl", + [LONG_INSN_LLGHRL] = "llghrl", + [LONG_INSN_POPCNT] = "popcnt", + [LONG_INSN_RISBHG] = "risbhg", + [LONG_INSN_RISBLG] = "risblk", +}; + static struct insn opcode[] = { #ifdef CONFIG_64BIT { "lmd", 0xef, INSTR_SS_RRRDRD3 }, @@ -881,6 +916,35 @@ static struct insn opcode_b9[] = { { "pfmf", 0xaf, INSTR_RRE_RR }, { "trte", 0xbf, INSTR_RRF_M0RR }, { "trtre", 0xbd, INSTR_RRF_M0RR }, + { "ahhhr", 0xc8, INSTR_RRF_R0RR2 }, + { "shhhr", 0xc9, INSTR_RRF_R0RR2 }, + { "alhhh", 0xca, INSTR_RRF_R0RR2 }, + { "alhhl", 0xca, INSTR_RRF_R0RR2 }, + { "slhhh", 0xcb, INSTR_RRF_R0RR2 }, + { "chhr ", 0xcd, INSTR_RRE_RR }, + { "clhhr", 0xcf, INSTR_RRE_RR }, + { "ahhlr", 0xd8, INSTR_RRF_R0RR2 }, + { "shhlr", 0xd9, INSTR_RRF_R0RR2 }, + { "slhhl", 0xdb, INSTR_RRF_R0RR2 }, + { "chlr", 0xdd, INSTR_RRE_RR }, + { "clhlr", 0xdf, INSTR_RRE_RR }, + { { 0, LONG_INSN_POPCNT }, 0xe1, INSTR_RRE_RR }, + { "locgr", 0xe2, INSTR_RRF_M0RR }, + { "ngrk", 0xe4, INSTR_RRF_R0RR2 }, + { "ogrk", 0xe6, INSTR_RRF_R0RR2 }, + { "xgrk", 0xe7, INSTR_RRF_R0RR2 }, + { "agrk", 0xe8, INSTR_RRF_R0RR2 }, + { "sgrk", 0xe9, INSTR_RRF_R0RR2 }, + { "algrk", 0xea, INSTR_RRF_R0RR2 }, + { "slgrk", 0xeb, INSTR_RRF_R0RR2 }, + { "locr", 0xf2, INSTR_RRF_M0RR }, + { "nrk", 0xf4, INSTR_RRF_R0RR2 }, + { "ork", 0xf6, INSTR_RRF_R0RR2 }, + { "xrk", 0xf7, INSTR_RRF_R0RR2 }, + { "ark", 0xf8, INSTR_RRF_R0RR2 }, + { "srk", 0xf9, INSTR_RRF_R0RR2 }, + { "alrk", 0xfa, INSTR_RRF_R0RR2 }, + { "slrk", 0xfb, INSTR_RRF_R0RR2 }, #endif { "kmac", 0x1e, INSTR_RRE_RR }, { "lrvr", 0x1f, INSTR_RRE_RR }, @@ -949,9 +1013,9 @@ static struct insn opcode_c4[] = { { "lgfrl", 0x0c, INSTR_RIL_RP }, { "lhrl", 0x05, INSTR_RIL_RP }, { "lghrl", 0x04, INSTR_RIL_RP }, - { "llgfrl", 0x0e, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGFRL }, 0x0e, INSTR_RIL_RP }, { "llhrl", 0x02, INSTR_RIL_RP }, - { "llghrl", 0x06, INSTR_RIL_RP }, + { { 0, LONG_INSN_LLGHRL }, 0x06, INSTR_RIL_RP }, { "strl", 0x0f, INSTR_RIL_RP }, { "stgrl", 0x0b, INSTR_RIL_RP }, { "sthrl", 0x07, INSTR_RIL_RP }, @@ -968,9 +1032,9 @@ static struct insn opcode_c6[] = { { "cghrl", 0x04, INSTR_RIL_RP }, { "clrl", 0x0f, INSTR_RIL_RP }, { "clgrl", 0x0a, INSTR_RIL_RP }, - { "clgfrl", 0x0e, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGFRL }, 0x0e, INSTR_RIL_RP }, { "clhrl", 0x07, INSTR_RIL_RP }, - { "clghrl", 0x06, INSTR_RIL_RP }, + { { 0, LONG_INSN_CLGHRL }, 0x06, INSTR_RIL_RP }, { "pfdrl", 0x02, INSTR_RIL_UP }, { "exrl", 0x00, INSTR_RIL_RP }, #endif @@ -982,6 +1046,20 @@ static struct insn opcode_c8[] = { { "mvcos", 0x00, INSTR_SSF_RRDRD }, { "ectg", 0x01, INSTR_SSF_RRDRD }, { "csst", 0x02, INSTR_SSF_RRDRD }, + { "lpd", 0x04, INSTR_SSF_RRDRD2 }, + { "lpdg ", 0x05, INSTR_SSF_RRDRD2 }, +#endif + { "", 0, INSTR_INVALID } +}; + +static struct insn opcode_cc[] = { +#ifdef CONFIG_64BIT + { "brcth", 0x06, INSTR_RIL_RP }, + { "aih", 0x08, INSTR_RIL_RI }, + { "alsih", 0x0a, INSTR_RIL_RI }, + { "alsih", 0x0b, INSTR_RIL_RI }, + { "cih", 0x0d, INSTR_RIL_RI }, + { "clih ", 0x0f, INSTR_RIL_RI }, #endif { "", 0, INSTR_INVALID } }; @@ -1063,6 +1141,16 @@ static struct insn opcode_e3[] = { { "mfy", 0x5c, INSTR_RXY_RRRD }, { "mhy", 0x7c, INSTR_RXY_RRRD }, { "pfd", 0x36, INSTR_RXY_URRD }, + { "lbh", 0xc0, INSTR_RXY_RRRD }, + { "llch", 0xc2, INSTR_RXY_RRRD }, + { "stch", 0xc3, INSTR_RXY_RRRD }, + { "lhh", 0xc4, INSTR_RXY_RRRD }, + { "llhh", 0xc6, INSTR_RXY_RRRD }, + { "sthh", 0xc7, INSTR_RXY_RRRD }, + { "lfh", 0xca, INSTR_RXY_RRRD }, + { "stfh", 0xcb, INSTR_RXY_RRRD }, + { "chf", 0xcd, INSTR_RXY_RRRD }, + { "clhf", 0xcf, INSTR_RXY_RRRD }, #endif { "lrv", 0x1e, INSTR_RXY_RRRD }, { "lrvh", 0x1f, INSTR_RXY_RRRD }, @@ -1080,9 +1168,9 @@ static struct insn opcode_e5[] = { { "chhsi", 0x54, INSTR_SIL_RDI }, { "chsi", 0x5c, INSTR_SIL_RDI }, { "cghsi", 0x58, INSTR_SIL_RDI }, - { "clhhsi", 0x55, INSTR_SIL_RDU }, - { "clfhsi", 0x5d, INSTR_SIL_RDU }, - { "clghsi", 0x59, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLHHSI }, 0x55, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLFHSI }, 0x5d, INSTR_SIL_RDU }, + { { 0, LONG_INSN_CLGHSI }, 0x59, INSTR_SIL_RDU }, { "mvhhi", 0x44, INSTR_SIL_RDI }, { "mvhi", 0x4c, INSTR_SIL_RDI }, { "mvghi", 0x48, INSTR_SIL_RDI }, @@ -1137,6 +1225,24 @@ static struct insn opcode_eb[] = { { "alsi", 0x6e, INSTR_SIY_IRD }, { "algsi", 0x7e, INSTR_SIY_IRD }, { "ecag", 0x4c, INSTR_RSY_RRRD }, + { "srak", 0xdc, INSTR_RSY_RRRD }, + { "slak", 0xdd, INSTR_RSY_RRRD }, + { "srlk", 0xde, INSTR_RSY_RRRD }, + { "sllk", 0xdf, INSTR_RSY_RRRD }, + { "locg", 0xe2, INSTR_RSY_RDRM }, + { "stocg", 0xe3, INSTR_RSY_RDRM }, + { "lang", 0xe4, INSTR_RSY_RRRD }, + { "laog", 0xe6, INSTR_RSY_RRRD }, + { "laxg", 0xe7, INSTR_RSY_RRRD }, + { "laag", 0xe8, INSTR_RSY_RRRD }, + { "laalg", 0xea, INSTR_RSY_RRRD }, + { "loc", 0xf2, INSTR_RSY_RDRM }, + { "stoc", 0xf3, INSTR_RSY_RDRM }, + { "lan", 0xf4, INSTR_RSY_RRRD }, + { "lao", 0xf6, INSTR_RSY_RRRD }, + { "lax", 0xf7, INSTR_RSY_RRRD }, + { "laa", 0xf8, INSTR_RSY_RRRD }, + { "laal", 0xfa, INSTR_RSY_RRRD }, #endif { "rll", 0x1d, INSTR_RSY_RRRD }, { "mvclu", 0x8e, INSTR_RSY_RRRD }, @@ -1172,6 +1278,12 @@ static struct insn opcode_ec[] = { { "rxsbg", 0x57, INSTR_RIE_RRUUU }, { "rosbg", 0x56, INSTR_RIE_RRUUU }, { "risbg", 0x55, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBLG }, 0x51, INSTR_RIE_RRUUU }, + { { 0, LONG_INSN_RISBHG }, 0x5D, INSTR_RIE_RRUUU }, + { "ahik", 0xd8, INSTR_RIE_RRI0 }, + { "aghik", 0xd9, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALHSIK }, 0xda, INSTR_RIE_RRI0 }, + { { 0, LONG_INSN_ALGHSIK }, 0xdb, INSTR_RIE_RRI0 }, #endif { "", 0, INSTR_INVALID } }; @@ -1321,6 +1433,9 @@ static struct insn *find_insn(unsigned char *code) case 0xc8: table = opcode_c8; break; + case 0xcc: + table = opcode_cc; + break; case 0xe3: table = opcode_e3; opfrag = code[5]; @@ -1367,7 +1482,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr = buffer; insn = find_insn(code); if (insn) { - ptr += sprintf(ptr, "%.5s\t", insn->name); + if (insn->name[0] == '\0') + ptr += sprintf(ptr, "%s\t", + long_insn_name[(int) insn->name[1]]); + else + ptr += sprintf(ptr, "%.5s\t", insn->name); /* Extract the operands. */ separator = 0; for (ops = formats[insn->format] + 1, i = 0; From 7aca2eda5c2a45884ff0ce0bb1ebfa8f83c1e0f9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:16 +0200 Subject: [PATCH 10/48] [S390] sysinfo: display capacity adjustment indicator Display machine capacity adjustment indicator and capacity change reason if available in /proc/sysinfo. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sysinfo.h | 5 ++++- arch/s390/kernel/sysinfo.c | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index 22bdb2a0ee5f..c2ece4dd6c0a 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -15,7 +15,10 @@ #define __ASM_S390_SYSINFO_H struct sysinfo_1_1_1 { - char reserved_0[32]; + unsigned short :16; + unsigned char ccr; + unsigned char cai; + char reserved_0[28]; char manufacturer[16]; char type[4]; char reserved_1[12]; diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index a0ffc7717ed6..a91274b4eb8f 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -74,6 +74,13 @@ static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) "Model Temp. Capacity: %-16.16s %08u\n", info->model_temp_cap, *(u32 *) info->model_temp_cap_rating); + if (info->cai) { + len += sprintf(page + len, + "Capacity Adj. Ind.: %d\n", + info->cai); + len += sprintf(page + len, "Capacity Ch. Reason: %d\n", + info->ccr); + } return len; } From f861e4057263033ad9134bfd6745f91b2165b351 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:17 +0200 Subject: [PATCH 11/48] [S390] Add config option for z196 code generation. Add a kernel config option for the IBM zEnterprise 196. This will produce faster code on newer compilers using the -march=z196 option. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 8 ++++++++ arch/s390/Makefile | 1 + arch/s390/kernel/head.S | 8 ++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 7afc17340500..21a7030abbb4 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -287,6 +287,14 @@ config MARCH_Z10 machines such as the z990, z890, z900, z800, z9-109, z9-ec and z9-bc. +config MARCH_Z196 + bool "IBM zEnterprise 196" + help + Select this to enable optimizations for IBM zEnterprise 196. + The kernel will be slightly faster but will not work on older + machines such as the z990, z890, z900, z800, z9-109, z9-ec, + z9-bc, z10-ec and z10-bc. + endchoice config PACK_STACK diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 9d318476f81f..d5b8a6ade525 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -40,6 +40,7 @@ cflags-$(CONFIG_MARCH_Z900) += $(call cc-option,-march=z900) cflags-$(CONFIG_MARCH_Z990) += $(call cc-option,-march=z990) cflags-$(CONFIG_MARCH_Z9_109) += $(call cc-option,-march=z9-109) cflags-$(CONFIG_MARCH_Z10) += $(call cc-option,-march=z10) +cflags-$(CONFIG_MARCH_Z196) += $(call cc-option,-march=z196) #KBUILD_IMAGE is necessary for make rpm KBUILD_IMAGE :=arch/s390/boot/image diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index db1696e210af..7061398341d5 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -488,7 +488,9 @@ startup: .align 16 2: .long 0x000a0000,0x8badcccc #if defined(CONFIG_64BIT) -#if defined(CONFIG_MARCH_Z10) +#if defined(CONFIG_MARCH_Z196) + .long 0xc100efe3, 0xf46c0000 +#elif defined(CONFIG_MARCH_Z10) .long 0xc100efe3, 0xf0680000 #elif defined(CONFIG_MARCH_Z9_109) .long 0xc100efc3, 0x00000000 @@ -498,7 +500,9 @@ startup: .long 0xc0000000, 0x00000000 #endif #else -#if defined(CONFIG_MARCH_Z10) +#if defined(CONFIG_MARCH_Z196) + .long 0x8100c880, 0x00000000 +#elif defined(CONFIG_MARCH_Z10) .long 0x8100c880, 0x00000000 #elif defined(CONFIG_MARCH_Z9_109) .long 0x8100c880, 0x00000000 From fdb6d070effba1871f0bb980cf3c3b8738803414 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:18 +0200 Subject: [PATCH 12/48] [S390] switch_to: dont restore/save access & fpu regs for kernel threads If the previous task was a kernel thread there is no need to save the contents of the fpu and access registers since they aren't used in kernel mode. For the same reason it is not necessary to restore these registers if the next task is a kernel thread. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/system.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 1f2ebc4afd82..c35d0a383f00 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -85,14 +85,18 @@ static inline void restore_access_regs(unsigned int *acrs) asm volatile("lam 0,15,%0" : : "Q" (*acrs)); } -#define switch_to(prev,next,last) do { \ - if (prev == next) \ - break; \ - save_fp_regs(&prev->thread.fp_regs); \ - restore_fp_regs(&next->thread.fp_regs); \ - save_access_regs(&prev->thread.acrs[0]); \ - restore_access_regs(&next->thread.acrs[0]); \ - prev = __switch_to(prev,next); \ +#define switch_to(prev,next,last) do { \ + if (prev == next) \ + break; \ + if (prev->mm) { \ + save_fp_regs(&prev->thread.fp_regs); \ + save_access_regs(&prev->thread.acrs[0]); \ + } \ + if (next->mm) { \ + restore_fp_regs(&next->thread.fp_regs); \ + restore_access_regs(&next->thread.acrs[0]); \ + } \ + prev = __switch_to(prev,next); \ } while (0) extern void account_vtime(struct task_struct *, struct task_struct *); From 189b93d00fd6f71b013ad472fd72c0f043b160a9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:19 +0200 Subject: [PATCH 13/48] [S390] switch_to: get rid of prev == next check schedule() makes sure that prev != next before calling switch_to(). Therefore remove the redundant check. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/system.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index c35d0a383f00..3c079dd5ee79 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -86,8 +86,6 @@ static inline void restore_access_regs(unsigned int *acrs) } #define switch_to(prev,next,last) do { \ - if (prev == next) \ - break; \ if (prev->mm) { \ save_fp_regs(&prev->thread.fp_regs); \ save_access_regs(&prev->thread.acrs[0]); \ From bf2106ae114807772506e62cbf8a7d8e2c5403a1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 25 Oct 2010 16:10:20 +0200 Subject: [PATCH 14/48] [S390] drivers/s390/char: Use static const char arrays Signed-off-by: Joe Perches Signed-off-by: Martin Schwidefsky --- drivers/s390/char/vmlogrdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 0d6dc4b92cc2..9f661426e4a1 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -215,7 +215,7 @@ static void vmlogrdr_iucv_message_pending(struct iucv_path *path, static int vmlogrdr_get_recording_class_AB(void) { - char cp_command[]="QUERY COMMAND RECORDING "; + static const char cp_command[] = "QUERY COMMAND RECORDING "; char cp_response[80]; char *tail; int len,i; @@ -638,7 +638,7 @@ static ssize_t vmlogrdr_recording_status_show(struct device_driver *driver, char *buf) { - char cp_command[] = "QUERY RECORDING "; + static const char cp_command[] = "QUERY RECORDING "; int len; cpcmd(cp_command, buf, 4096, NULL); From a20852d2b7ca3c6e7b232eecf09631b66dde2a46 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:21 +0200 Subject: [PATCH 15/48] [S390] cmm: fix crash on case conversion When the cmm module is compiled into the kernel it will crash when writing to the R/O data section. Reason is the lower to upper case conversion of the "sender" module parameter which ignored the fact that the pointer is preinitialized. Introduced with 41b42876 "cmm, smsgiucv_app: convert sender to uppercase" Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/cmm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index a9550dca3e4b..c66ffd8dbbb7 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -23,7 +23,10 @@ #include #include -static char *sender = "VMRMSVM"; +#ifdef CONFIG_CMM_IUCV +static char *cmm_default_sender = "VMRMSVM"; +#endif +static char *sender; module_param(sender, charp, 0400); MODULE_PARM_DESC(sender, "Guest name that may send SMSG messages (default VMRMSVM)"); @@ -440,6 +443,8 @@ static int __init cmm_init(void) int len = strlen(sender); while (len--) sender[len] = toupper(sender[len]); + } else { + sender = cmm_default_sender; } rc = smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); From c77f7cf74b9bc0e56c8698097228c1784e9f03c9 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 25 Oct 2010 16:10:22 +0200 Subject: [PATCH 16/48] [S390] hvc_iucv: do not call iucv_unregister if iucv_register failed If the iucv_register() functions fails, the error recovery calls iucv_unregister() which might cause the following stack backtrace: (<0000000000100ab2> show_trace+0xee/0x144) <00000000004f1842> panic+0xb6/0x248 <00000000001010a6> die+0x15a/0x16c <000000000011d936> do_no_context+0xa6/0xe4 <00000000004f84dc> do_protection_exception+0x2e8/0x3a4 <0000000000113afc> pgm_exit+0x0/0x14 <00000000004e786e> iucv_unregister+0x5a/0x17c (<00000000004e785e> iucv_unregister+0x4a/0x17c) <000000000076de74> hvc_iucv_init+0x228/0x5dc <00000000001000c2> do_one_initcall+0x3e/0x19c <00000000007524a2> kernel_init+0x28e/0x404 <0000000000105dd6> kernel_thread_starter+0x6/0xc <0000000000105dd0> kernel_thread_starter+0x0/0xc Remove the call to iucv_unregister() and remove the goto label as unregistering is the last step in the hvc_iucv initialization. If iucv_register() fails, simply clean up hvc terminals and free resources. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- drivers/char/hvc_iucv.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/hvc_iucv.c b/drivers/char/hvc_iucv.c index 7b01bc609de3..c3425bb3a1f6 100644 --- a/drivers/char/hvc_iucv.c +++ b/drivers/char/hvc_iucv.c @@ -1303,13 +1303,11 @@ static int __init hvc_iucv_init(void) if (rc) { pr_err("Registering IUCV handlers failed with error code=%d\n", rc); - goto out_error_iucv; + goto out_error_hvc; } return 0; -out_error_iucv: - iucv_unregister(&hvc_iucv_handler, 0); out_error_hvc: for (i = 0; i < hvc_iucv_devices; i++) if (hvc_iucv_table[i]) From a8481c2afeed297426f67ce3fd4ad7eea6b7ddf5 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:23 +0200 Subject: [PATCH 17/48] [S390] css: fix sparse warning fix this sparse warning: drivers/s390/cio/css.c:580:6: warning: symbol 'css_schedule_eval_all_unreg' was not declared. Should it be static? Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/css.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index ca8e1c240c3c..064d3003ed20 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -577,7 +577,7 @@ static int __unset_registered(struct device *dev, void *data) return 0; } -void css_schedule_eval_all_unreg(void) +static void css_schedule_eval_all_unreg(void) { unsigned long flags; struct idset *unreg_set; From ed3640b285d831065eb0507cdca7d125f6f78e8d Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 25 Oct 2010 16:10:24 +0200 Subject: [PATCH 18/48] [S390] dasd fix dump_sense_dbf The dasd_eckd_dump_sense_dbf function uses a macro for s390 debug feature that can handle up to 8 parameters (for the DASD device driver). Fix the function to use only the maximum number of parameters. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 59b4ecfb967b..ea0e565ebc9d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -3093,16 +3093,20 @@ dasd_eckd_dump_sense_dbf(struct dasd_device *device, struct irb *irb, char *reason) { u64 *sense; + u32 stat; sense = (u64 *) dasd_get_sense(irb); + stat = scsw_cstat(&irb->scsw); + stat <<= 8; + stat |= scsw_dstat(&irb->scsw); + stat <<= 8; + stat |= scsw_cc(&irb->scsw); + if (sense) { DBF_DEV_EVENT(DBF_EMERG, device, - "%s: %s %02x%02x%02x %016llx %016llx %016llx " - "%016llx", reason, - scsw_is_tm(&irb->scsw) ? "t" : "c", - scsw_cc(&irb->scsw), scsw_cstat(&irb->scsw), - scsw_dstat(&irb->scsw), sense[0], sense[1], - sense[2], sense[3]); + "%s: %s %06x %016llx %016llx %016llx %016llx", + reason, scsw_is_tm(&irb->scsw) ? "t" : "c", stat, + sense[0], sense[1], sense[2], sense[3]); } else { DBF_DEV_EVENT(DBF_EMERG, device, "%s", "SORRY - NO VALID SENSE AVAILABLE\n"); From f2777077aa1f6f8a7c76b83f240975289a9fb894 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 25 Oct 2010 16:10:25 +0200 Subject: [PATCH 19/48] [S390] cio: remove custom implementation of hex_to_bin() Signed-off-by: Andy Shevchenko Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/blacklist.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 13cb60162e42..76058a5166ed 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -79,17 +79,15 @@ static int pure_hex(char **cp, unsigned int *val, int min_digit, int max_digit, int max_val) { int diff; - unsigned int value; diff = 0; *val = 0; - while (isxdigit(**cp) && (diff <= max_digit)) { + while (diff <= max_digit) { + int value = hex_to_bin(**cp); - if (isdigit(**cp)) - value = **cp - '0'; - else - value = tolower(**cp) - 'a' + 10; + if (value < 0) + break; *val = *val * 16 + value; (*cp)++; diff++; From 74b6127e6c35abf06364468636dd261850639f8b Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:26 +0200 Subject: [PATCH 20/48] [S390] cio: fix memleak in resume path If a ccwdevice is lost during hibernation and a different ccwdevice is attached to the same subchannel, we will deregister the old ccw device and register the new one. Since deregistration is not allowed in this context, we handle this action later. However, some parts of the registration process for the new device were started anyway, so that the old device structure is no longer accessible. Fix this by deferring both actions to the afterwards scheduled subchannel event. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 51bd3687d163..07b1a074beaf 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1468,9 +1468,13 @@ static int io_subchannel_sch_event(struct subchannel *sch, int process) goto out; break; case IO_SCH_UNREG_ATTACH: + if (cdev->private->flags.resuming) { + /* Device will be handled later. */ + rc = 0; + goto out; + } /* Unregister ccw device. */ - if (!cdev->private->flags.resuming) - ccw_device_unregister(cdev); + ccw_device_unregister(cdev); break; default: break; From b730f3a933958362ee1080c257f2cc158149310a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:27 +0200 Subject: [PATCH 21/48] [S390] cio: add lock to struct channel_path Serialize access to members of struct channel_path with a mutex. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chp.c | 35 +++++++++++++++++++++++------------ drivers/s390/cio/chp.h | 6 ++++-- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 6c9fa15aac7b..b1addd78689d 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -1,7 +1,7 @@ /* * drivers/s390/cio/chp.c * - * Copyright IBM Corp. 1999,2007 + * Copyright IBM Corp. 1999,2010 * Author(s): Cornelia Huck (cornelia.huck@de.ibm.com) * Arnd Bergmann (arndb@de.ibm.com) * Peter Oberparleiter @@ -241,11 +241,13 @@ static ssize_t chp_status_show(struct device *dev, struct device_attribute *attr, char *buf) { struct channel_path *chp = to_channelpath(dev); + int status; - if (!chp) - return 0; - return (chp_get_status(chp->chpid) ? sprintf(buf, "online\n") : - sprintf(buf, "offline\n")); + mutex_lock(&chp->lock); + status = chp->state; + mutex_unlock(&chp->lock); + + return status ? sprintf(buf, "online\n") : sprintf(buf, "offline\n"); } static ssize_t chp_status_write(struct device *dev, @@ -261,15 +263,18 @@ static ssize_t chp_status_write(struct device *dev, if (!num_args) return count; - if (!strnicmp(cmd, "on", 2) || !strcmp(cmd, "1")) + if (!strnicmp(cmd, "on", 2) || !strcmp(cmd, "1")) { + mutex_lock(&cp->lock); error = s390_vary_chpid(cp->chpid, 1); - else if (!strnicmp(cmd, "off", 3) || !strcmp(cmd, "0")) + mutex_unlock(&cp->lock); + } else if (!strnicmp(cmd, "off", 3) || !strcmp(cmd, "0")) { + mutex_lock(&cp->lock); error = s390_vary_chpid(cp->chpid, 0); - else + mutex_unlock(&cp->lock); + } else error = -EINVAL; return error < 0 ? error : count; - } static DEVICE_ATTR(status, 0644, chp_status_show, chp_status_write); @@ -315,10 +320,12 @@ static ssize_t chp_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct channel_path *chp = to_channelpath(dev); + u8 type; - if (!chp) - return 0; - return sprintf(buf, "%x\n", chp->desc.desc); + mutex_lock(&chp->lock); + type = chp->desc.desc; + mutex_unlock(&chp->lock); + return sprintf(buf, "%x\n", type); } static DEVICE_ATTR(type, 0444, chp_type_show, NULL); @@ -395,6 +402,7 @@ int chp_new(struct chp_id chpid) chp->state = 1; chp->dev.parent = &channel_subsystems[chpid.cssid]->device; chp->dev.release = chp_release; + mutex_init(&chp->lock); /* Obtain channel path description and fill it in. */ ret = chsc_determine_base_channel_path_desc(chpid, &chp->desc); @@ -464,7 +472,10 @@ void *chp_get_chp_desc(struct chp_id chpid) desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL); if (!desc) return NULL; + + mutex_lock(&chp->lock); memcpy(desc, &chp->desc, sizeof(struct channel_path_desc)); + mutex_unlock(&chp->lock); return desc; } diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h index 26c3d2246176..695ec20f1c59 100644 --- a/drivers/s390/cio/chp.h +++ b/drivers/s390/cio/chp.h @@ -1,7 +1,7 @@ /* * drivers/s390/cio/chp.h * - * Copyright IBM Corp. 2007 + * Copyright IBM Corp. 2007,2010 * Author(s): Peter Oberparleiter */ @@ -10,6 +10,7 @@ #include #include +#include #include #include "chsc.h" #include "css.h" @@ -40,14 +41,15 @@ static inline int chp_test_bit(u8 *bitmap, int num) struct channel_path { + struct device dev; struct chp_id chpid; + struct mutex lock; /* Serialize access to below members. */ int state; struct channel_path_desc desc; /* Channel-measurement related stuff: */ int cmg; int shared; void *cmg_chars; - struct device dev; }; int chp_get_status(struct chp_id chpid); From 34aec07c170b972a29c954b37047184bd0f9f294 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:28 +0200 Subject: [PATCH 22/48] [S390] chsc: initialization fixes This patch fixes: * kfree vs. free_page usage * structure definition for determine_css_characteristics * naming convention for the chsc init function * deregistration of crw handlers in the cleanup path Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 12 ++++++------ drivers/s390/cio/chsc.h | 4 ++-- drivers/s390/cio/css.c | 18 +++++++++--------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 4cbb1a6ca33c..f26cc3e16181 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -2,7 +2,7 @@ * drivers/s390/cio/chsc.c * S/390 common I/O routines -- channel subsystem call * - * Copyright IBM Corp. 1999,2008 + * Copyright IBM Corp. 1999,2010 * Author(s): Ingo Adlung (adlung@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) * Arnd Bergmann (arndb@de.ibm.com) @@ -813,7 +813,7 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) return ret; } -int __init chsc_alloc_sei_area(void) +int __init chsc_init(void) { int ret; @@ -825,14 +825,14 @@ int __init chsc_alloc_sei_area(void) } ret = crw_register_handler(CRW_RSC_CSS, chsc_process_crw); if (ret) - kfree(sei_page); + free_page((unsigned long)sei_page); return ret; } -void __init chsc_free_sei_area(void) +void __init chsc_init_cleanup(void) { crw_unregister_handler(CRW_RSC_CSS); - kfree(sei_page); + free_page((unsigned long)sei_page); } int chsc_enable_facility(int operation_code) @@ -895,7 +895,7 @@ chsc_determine_css_characteristics(void) struct chsc_header response; u32 reserved4; u32 general_char[510]; - u32 chsc_char[518]; + u32 chsc_char[508]; } __attribute__ ((packed)) *scsc_area; scsc_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 5453013f094b..6d669dd0dd84 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -60,8 +60,8 @@ struct chsc_ssd_info { extern int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd); extern int chsc_determine_css_characteristics(void); -extern int chsc_alloc_sei_area(void); -extern void chsc_free_sei_area(void); +extern int chsc_init(void); +extern void chsc_init_cleanup(void); extern int chsc_enable_facility(int); struct channel_subsystem; diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 064d3003ed20..fa1ad3aab66f 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1,7 +1,7 @@ /* * driver for channel subsystem * - * Copyright IBM Corp. 2002, 2009 + * Copyright IBM Corp. 2002, 2010 * * Author(s): Arnd Bergmann (arndb@de.ibm.com) * Cornelia Huck (cornelia.huck@de.ibm.com) @@ -863,14 +863,14 @@ static int __init css_bus_init(void) { int ret, i; + ret = chsc_init(); + if (ret) + return ret; + ret = chsc_determine_css_characteristics(); if (ret == -ENOMEM) goto out; - ret = chsc_alloc_sei_area(); - if (ret) - goto out; - /* Try to enable MSS. */ ret = chsc_enable_facility(CHSC_SDA_OC_MSS); if (ret) @@ -956,9 +956,9 @@ static int __init css_bus_init(void) } bus_unregister(&css_bus_type); out: - crw_unregister_handler(CRW_RSC_CSS); - chsc_free_sei_area(); + crw_unregister_handler(CRW_RSC_SCH); idset_free(slow_subchannel_set); + chsc_init_cleanup(); pr_alert("The CSS device driver initialization failed with " "errno=%d\n", ret); return ret; @@ -978,9 +978,9 @@ static void __init css_bus_cleanup(void) device_unregister(&css->device); } bus_unregister(&css_bus_type); - crw_unregister_handler(CRW_RSC_CSS); - chsc_free_sei_area(); + crw_unregister_handler(CRW_RSC_SCH); idset_free(slow_subchannel_set); + chsc_init_cleanup(); isc_unregister(IO_SCH_ISC); } From 34196f82b16749e119db5572271944c4add0a9aa Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:29 +0200 Subject: [PATCH 23/48] [S390] chsc: consolidate memory allocations Most wrappers around the channel subsystem call have their own logic to allocate memory (with proper alignment) or use preallocated or static memory. This patch converts most users of the channel subsystem call to use the same preallocated page (proteced by a spinlock). Note: The sei_page which is used in our crw handler to call "store event information" has to coexist, since a) in crw context, while accessing the sei_page, sleeping is allowed (which will conflict with the spinlock protection of the chsc_page) b) in crw context, while accessing the sei_page, channel subsystem calls are allowed (which itself would require the page). Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 224 ++++++++++++++++++++-------------------- drivers/s390/cio/chsc.h | 2 +- drivers/s390/cio/css.c | 24 +---- 3 files changed, 115 insertions(+), 135 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index f26cc3e16181..d12c152cb691 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -29,8 +29,8 @@ #include "chsc.h" static void *sei_page; -static DEFINE_SPINLOCK(siosl_lock); -static DEFINE_SPINLOCK(sda_lock); +static void *chsc_page; +static DEFINE_SPINLOCK(chsc_page_lock); /** * chsc_error_from_response() - convert a chsc response to an error @@ -85,17 +85,15 @@ struct chsc_ssd_area { int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd) { - unsigned long page; struct chsc_ssd_area *ssd_area; int ccode; int ret; int i; int mask; - page = get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!page) - return -ENOMEM; - ssd_area = (struct chsc_ssd_area *) page; + spin_lock_irq(&chsc_page_lock); + memset(chsc_page, 0, PAGE_SIZE); + ssd_area = chsc_page; ssd_area->request.length = 0x0010; ssd_area->request.code = 0x0004; ssd_area->ssid = schid.ssid; @@ -106,25 +104,25 @@ int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd) /* Check response. */ if (ccode > 0) { ret = (ccode == 3) ? -ENODEV : -EBUSY; - goto out_free; + goto out; } ret = chsc_error_from_response(ssd_area->response.code); if (ret != 0) { CIO_MSG_EVENT(2, "chsc: ssd failed for 0.%x.%04x (rc=%04x)\n", schid.ssid, schid.sch_no, ssd_area->response.code); - goto out_free; + goto out; } if (!ssd_area->sch_valid) { ret = -ENODEV; - goto out_free; + goto out; } /* Copy data */ ret = 0; memset(ssd, 0, sizeof(struct chsc_ssd_info)); if ((ssd_area->st != SUBCHANNEL_TYPE_IO) && (ssd_area->st != SUBCHANNEL_TYPE_MSG)) - goto out_free; + goto out; ssd->path_mask = ssd_area->path_mask; ssd->fla_valid_mask = ssd_area->fla_valid_mask; for (i = 0; i < 8; i++) { @@ -136,8 +134,8 @@ int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd) if (ssd_area->fla_valid_mask & mask) ssd->fla[i] = ssd_area->fla[i]; } -out_free: - free_page(page); +out: + spin_unlock_irq(&chsc_page_lock); return ret; } @@ -552,7 +550,7 @@ chsc_add_cmg_attr(struct channel_subsystem *css) return ret; } -int __chsc_do_secm(struct channel_subsystem *css, int enable, void *page) +int __chsc_do_secm(struct channel_subsystem *css, int enable) { struct { struct chsc_header request; @@ -573,7 +571,9 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable, void *page) } __attribute__ ((packed)) *secm_area; int ret, ccode; - secm_area = page; + spin_lock_irq(&chsc_page_lock); + memset(chsc_page, 0, PAGE_SIZE); + secm_area = chsc_page; secm_area->request.length = 0x0050; secm_area->request.code = 0x0016; @@ -584,8 +584,10 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable, void *page) secm_area->operation_code = enable ? 0 : 1; ccode = chsc(secm_area); - if (ccode > 0) - return (ccode == 3) ? -ENODEV : -EBUSY; + if (ccode > 0) { + ret = (ccode == 3) ? -ENODEV : -EBUSY; + goto out; + } switch (secm_area->response.code) { case 0x0102: @@ -598,37 +600,32 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable, void *page) if (ret != 0) CIO_CRW_EVENT(2, "chsc: secm failed (rc=%04x)\n", secm_area->response.code); +out: + spin_unlock_irq(&chsc_page_lock); return ret; } int chsc_secm(struct channel_subsystem *css, int enable) { - void *secm_area; int ret; - secm_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!secm_area) - return -ENOMEM; - if (enable && !css->cm_enabled) { css->cub_addr1 = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); css->cub_addr2 = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!css->cub_addr1 || !css->cub_addr2) { free_page((unsigned long)css->cub_addr1); free_page((unsigned long)css->cub_addr2); - free_page((unsigned long)secm_area); return -ENOMEM; } } - ret = __chsc_do_secm(css, enable, secm_area); + ret = __chsc_do_secm(css, enable); if (!ret) { css->cm_enabled = enable; if (css->cm_enabled) { ret = chsc_add_cmg_attr(css); if (ret) { - memset(secm_area, 0, PAGE_SIZE); - __chsc_do_secm(css, 0, secm_area); + __chsc_do_secm(css, 0); css->cm_enabled = 0; } } else @@ -638,7 +635,6 @@ chsc_secm(struct channel_subsystem *css, int enable) free_page((unsigned long)css->cub_addr1); free_page((unsigned long)css->cub_addr2); } - free_page((unsigned long)secm_area); return ret; } @@ -669,13 +665,12 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt, return -EINVAL; if ((rfmt == 2) && !css_general_characteristics.cib) return -EINVAL; - scpd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!scpd_area) - return -ENOMEM; + spin_lock_irq(&chsc_page_lock); + memset(chsc_page, 0, PAGE_SIZE); + scpd_area = chsc_page; scpd_area->request.length = 0x0010; scpd_area->request.code = 0x0002; - scpd_area->cssid = chpid.cssid; scpd_area->first_chpid = chpid.id; scpd_area->last_chpid = chpid.id; @@ -698,7 +693,7 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt, CIO_CRW_EVENT(2, "chsc: scpd failed (rc=%04x)\n", scpd_area->response.code); out: - free_page((unsigned long)scpd_area); + spin_unlock_irq(&chsc_page_lock); return ret; } EXPORT_SYMBOL_GPL(chsc_determine_channel_path_desc); @@ -725,33 +720,22 @@ static void chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv, struct cmg_chars *chars) { - switch (chp->cmg) { - case 2: - case 3: - chp->cmg_chars = kmalloc(sizeof(struct cmg_chars), - GFP_KERNEL); - if (chp->cmg_chars) { - int i, mask; - struct cmg_chars *cmg_chars; + struct cmg_chars *cmg_chars; + int i, mask; - cmg_chars = chp->cmg_chars; - for (i = 0; i < NR_MEASUREMENT_CHARS; i++) { - mask = 0x80 >> (i + 3); - if (cmcv & mask) - cmg_chars->values[i] = chars->values[i]; - else - cmg_chars->values[i] = 0; - } - } - break; - default: - /* No cmg-dependent data. */ - break; + cmg_chars = chp->cmg_chars; + for (i = 0; i < NR_MEASUREMENT_CHARS; i++) { + mask = 0x80 >> (i + 3); + if (cmcv & mask) + cmg_chars->values[i] = chars->values[i]; + else + cmg_chars->values[i] = 0; } } int chsc_get_channel_measurement_chars(struct channel_path *chp) { + struct cmg_chars *cmg_chars; int ccode, ret; struct { @@ -775,13 +759,16 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) u32 data[NR_MEASUREMENT_CHARS]; } __attribute__ ((packed)) *scmc_area; - scmc_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!scmc_area) + chp->cmg_chars = NULL; + cmg_chars = kmalloc(sizeof(*cmg_chars), GFP_KERNEL); + if (!cmg_chars) return -ENOMEM; + spin_lock_irq(&chsc_page_lock); + memset(chsc_page, 0, PAGE_SIZE); + scmc_area = chsc_page; scmc_area->request.length = 0x0010; scmc_area->request.code = 0x0022; - scmc_area->first_chpid = chp->chpid.id; scmc_area->last_chpid = chp->chpid.id; @@ -792,24 +779,30 @@ int chsc_get_channel_measurement_chars(struct channel_path *chp) } ret = chsc_error_from_response(scmc_area->response.code); - if (ret == 0) { - /* Success. */ - if (!scmc_area->not_valid) { - chp->cmg = scmc_area->cmg; - chp->shared = scmc_area->shared; - chsc_initialize_cmg_chars(chp, scmc_area->cmcv, - (struct cmg_chars *) - &scmc_area->data); - } else { - chp->cmg = -1; - chp->shared = -1; - } - } else { + if (ret) { CIO_CRW_EVENT(2, "chsc: scmc failed (rc=%04x)\n", scmc_area->response.code); + goto out; } + if (scmc_area->not_valid) { + chp->cmg = -1; + chp->shared = -1; + goto out; + } + chp->cmg = scmc_area->cmg; + chp->shared = scmc_area->shared; + if (chp->cmg != 2 && chp->cmg != 3) { + /* No cmg-dependent data. */ + goto out; + } + chp->cmg_chars = cmg_chars; + chsc_initialize_cmg_chars(chp, scmc_area->cmcv, + (struct cmg_chars *) &scmc_area->data); out: - free_page((unsigned long)scmc_area); + spin_unlock_irq(&chsc_page_lock); + if (!chp->cmg_chars) + kfree(cmg_chars); + return ret; } @@ -818,27 +811,33 @@ int __init chsc_init(void) int ret; sei_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!sei_page) { - CIO_MSG_EVENT(0, "Can't allocate page for processing of " - "chsc machine checks!\n"); - return -ENOMEM; + chsc_page = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!sei_page || !chsc_page) { + ret = -ENOMEM; + goto out_err; } ret = crw_register_handler(CRW_RSC_CSS, chsc_process_crw); if (ret) - free_page((unsigned long)sei_page); + goto out_err; + return ret; +out_err: + free_page((unsigned long)chsc_page); + free_page((unsigned long)sei_page); return ret; } void __init chsc_init_cleanup(void) { crw_unregister_handler(CRW_RSC_CSS); + free_page((unsigned long)chsc_page); free_page((unsigned long)sei_page); } int chsc_enable_facility(int operation_code) { + unsigned long flags; int ret; - static struct { + struct { struct chsc_header request; u8 reserved1:4; u8 format:4; @@ -851,32 +850,33 @@ int chsc_enable_facility(int operation_code) u32 reserved5:4; u32 format2:4; u32 reserved6:24; - } __attribute__ ((packed, aligned(4096))) sda_area; + } __attribute__ ((packed)) *sda_area; - spin_lock(&sda_lock); - memset(&sda_area, 0, sizeof(sda_area)); - sda_area.request.length = 0x0400; - sda_area.request.code = 0x0031; - sda_area.operation_code = operation_code; + spin_lock_irqsave(&chsc_page_lock, flags); + memset(chsc_page, 0, PAGE_SIZE); + sda_area = chsc_page; + sda_area->request.length = 0x0400; + sda_area->request.code = 0x0031; + sda_area->operation_code = operation_code; - ret = chsc(&sda_area); + ret = chsc(sda_area); if (ret > 0) { ret = (ret == 3) ? -ENODEV : -EBUSY; goto out; } - switch (sda_area.response.code) { + switch (sda_area->response.code) { case 0x0101: ret = -EOPNOTSUPP; break; default: - ret = chsc_error_from_response(sda_area.response.code); + ret = chsc_error_from_response(sda_area->response.code); } if (ret != 0) CIO_CRW_EVENT(2, "chsc: sda (oc=%x) failed (rc=%04x)\n", - operation_code, sda_area.response.code); - out: - spin_unlock(&sda_lock); + operation_code, sda_area->response.code); +out: + spin_unlock_irqrestore(&chsc_page_lock, flags); return ret; } @@ -898,10 +898,9 @@ chsc_determine_css_characteristics(void) u32 chsc_char[508]; } __attribute__ ((packed)) *scsc_area; - scsc_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); - if (!scsc_area) - return -ENOMEM; - + spin_lock_irq(&chsc_page_lock); + memset(chsc_page, 0, PAGE_SIZE); + scsc_area = chsc_page; scsc_area->request.length = 0x0010; scsc_area->request.code = 0x0010; @@ -921,7 +920,7 @@ chsc_determine_css_characteristics(void) CIO_CRW_EVENT(2, "chsc: scsc failed (rc=%04x)\n", scsc_area->response.code); exit: - free_page ((unsigned long) scsc_area); + spin_unlock_irq(&chsc_page_lock); return result; } @@ -976,29 +975,29 @@ int chsc_sstpi(void *page, void *result, size_t size) return (rr->response.code == 0x0001) ? 0 : -EIO; } -static struct { - struct chsc_header request; - u32 word1; - struct subchannel_id sid; - u32 word3; - struct chsc_header response; - u32 word[11]; -} __attribute__ ((packed)) siosl_area __attribute__ ((__aligned__(PAGE_SIZE))); - int chsc_siosl(struct subchannel_id schid) { + struct { + struct chsc_header request; + u32 word1; + struct subchannel_id sid; + u32 word3; + struct chsc_header response; + u32 word[11]; + } __attribute__ ((packed)) *siosl_area; unsigned long flags; int ccode; int rc; - spin_lock_irqsave(&siosl_lock, flags); - memset(&siosl_area, 0, sizeof(siosl_area)); - siosl_area.request.length = 0x0010; - siosl_area.request.code = 0x0046; - siosl_area.word1 = 0x80000000; - siosl_area.sid = schid; + spin_lock_irqsave(&chsc_page_lock, flags); + memset(chsc_page, 0, PAGE_SIZE); + siosl_area = chsc_page; + siosl_area->request.length = 0x0010; + siosl_area->request.code = 0x0046; + siosl_area->word1 = 0x80000000; + siosl_area->sid = schid; - ccode = chsc(&siosl_area); + ccode = chsc(siosl_area); if (ccode > 0) { if (ccode == 3) rc = -ENODEV; @@ -1008,17 +1007,16 @@ int chsc_siosl(struct subchannel_id schid) schid.ssid, schid.sch_no, ccode); goto out; } - rc = chsc_error_from_response(siosl_area.response.code); + rc = chsc_error_from_response(siosl_area->response.code); if (rc) CIO_MSG_EVENT(2, "chsc: siosl failed for 0.%x.%04x (rc=%04x)\n", schid.ssid, schid.sch_no, - siosl_area.response.code); + siosl_area->response.code); else CIO_MSG_EVENT(4, "chsc: siosl succeeded for 0.%x.%04x\n", schid.ssid, schid.sch_no); out: - spin_unlock_irqrestore(&siosl_lock, flags); - + spin_unlock_irqrestore(&chsc_page_lock, flags); return rc; } EXPORT_SYMBOL_GPL(chsc_siosl); diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 6d669dd0dd84..852b61fc56ea 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -66,7 +66,7 @@ extern void chsc_init_cleanup(void); extern int chsc_enable_facility(int); struct channel_subsystem; extern int chsc_secm(struct channel_subsystem *, int); -int __chsc_do_secm(struct channel_subsystem *css, int enable, void *page); +int __chsc_do_secm(struct channel_subsystem *css, int enable); int chsc_chp_vary(struct chp_id chpid, int on); int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt, diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index fa1ad3aab66f..5e1235c6aba0 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -790,7 +790,6 @@ static struct notifier_block css_reboot_notifier = { static int css_power_event(struct notifier_block *this, unsigned long event, void *ptr) { - void *secm_area; int ret, i; switch (event) { @@ -806,15 +805,8 @@ static int css_power_event(struct notifier_block *this, unsigned long event, mutex_unlock(&css->mutex); continue; } - secm_area = (void *)get_zeroed_page(GFP_KERNEL | - GFP_DMA); - if (secm_area) { - if (__chsc_do_secm(css, 0, secm_area)) - ret = NOTIFY_BAD; - free_page((unsigned long)secm_area); - } else + if (__chsc_do_secm(css, 0)) ret = NOTIFY_BAD; - mutex_unlock(&css->mutex); } break; @@ -830,15 +822,8 @@ static int css_power_event(struct notifier_block *this, unsigned long event, mutex_unlock(&css->mutex); continue; } - secm_area = (void *)get_zeroed_page(GFP_KERNEL | - GFP_DMA); - if (secm_area) { - if (__chsc_do_secm(css, 1, secm_area)) - ret = NOTIFY_BAD; - free_page((unsigned long)secm_area); - } else + if (__chsc_do_secm(css, 1)) ret = NOTIFY_BAD; - mutex_unlock(&css->mutex); } /* search for subchannels, which appeared during hibernation */ @@ -867,10 +852,7 @@ static int __init css_bus_init(void) if (ret) return ret; - ret = chsc_determine_css_characteristics(); - if (ret == -ENOMEM) - goto out; - + chsc_determine_css_characteristics(); /* Try to enable MSS. */ ret = chsc_enable_facility(CHSC_SDA_OC_MSS); if (ret) From 906c9768c7750ce76b85597174b9c3599a6ca9f6 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:30 +0200 Subject: [PATCH 24/48] [S390] chsc: use the global page to determine the chp desriptor chsc_determine_channel_path_desc is called by a wrapper who allocates a response struct. The response data is then memcpy'ed to this response struct by chsc_determine_channel_path_desc. Change chsc_determine_base_channel_path_desc to use the global chsc_page and deliver it to the function doing the actual chsc call. The channel path desriptor is then directly read from the response data. As a result we get rid of the additional allocation for the response struct. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 54 +++++++++++-------------------------- drivers/s390/cio/chsc.h | 22 +++++++++++++-- drivers/s390/cio/chsc_sch.c | 12 ++++++--- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index d12c152cb691..27aca3906a53 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -639,36 +639,18 @@ chsc_secm(struct channel_subsystem *css, int enable) } int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt, - int c, int m, - struct chsc_response_struct *resp) + int c, int m, void *page) { + struct chsc_scpd *scpd_area; int ccode, ret; - struct { - struct chsc_header request; - u32 : 2; - u32 m : 1; - u32 c : 1; - u32 fmt : 4; - u32 cssid : 8; - u32 : 4; - u32 rfmt : 4; - u32 first_chpid : 8; - u32 : 24; - u32 last_chpid : 8; - u32 zeroes1; - struct chsc_header response; - u8 data[PAGE_SIZE - 20]; - } __attribute__ ((packed)) *scpd_area; - if ((rfmt == 1) && !css_general_characteristics.fcs) return -EINVAL; if ((rfmt == 2) && !css_general_characteristics.cib) return -EINVAL; - spin_lock_irq(&chsc_page_lock); - memset(chsc_page, 0, PAGE_SIZE); - scpd_area = chsc_page; + memset(page, 0, PAGE_SIZE); + scpd_area = page; scpd_area->request.length = 0x0010; scpd_area->request.code = 0x0002; scpd_area->cssid = chpid.cssid; @@ -680,20 +662,13 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt, scpd_area->rfmt = rfmt; ccode = chsc(scpd_area); - if (ccode > 0) { - ret = (ccode == 3) ? -ENODEV : -EBUSY; - goto out; - } + if (ccode > 0) + return (ccode == 3) ? -ENODEV : -EBUSY; ret = chsc_error_from_response(scpd_area->response.code); - if (ret == 0) - /* Success. */ - memcpy(resp, &scpd_area->response, scpd_area->response.length); - else + if (ret) CIO_CRW_EVENT(2, "chsc: scpd failed (rc=%04x)\n", scpd_area->response.code); -out: - spin_unlock_irq(&chsc_page_lock); return ret; } EXPORT_SYMBOL_GPL(chsc_determine_channel_path_desc); @@ -702,17 +677,18 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid, struct channel_path_desc *desc) { struct chsc_response_struct *chsc_resp; + struct chsc_scpd *scpd_area; int ret; - chsc_resp = kzalloc(sizeof(*chsc_resp), GFP_KERNEL); - if (!chsc_resp) - return -ENOMEM; - ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, chsc_resp); + spin_lock_irq(&chsc_page_lock); + scpd_area = chsc_page; + ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, scpd_area); if (ret) - goto out_free; + goto out; + chsc_resp = (void *)&scpd_area->response; memcpy(desc, &chsc_resp->data, sizeof(*desc)); -out_free: - kfree(chsc_resp); +out: + spin_unlock_irq(&chsc_page_lock); return ret; } diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h index 852b61fc56ea..6693f5e3176f 100644 --- a/drivers/s390/cio/chsc.h +++ b/drivers/s390/cio/chsc.h @@ -57,6 +57,25 @@ struct chsc_ssd_info { struct chp_id chpid[8]; u16 fla[8]; }; + +struct chsc_scpd { + struct chsc_header request; + u32:2; + u32 m:1; + u32 c:1; + u32 fmt:4; + u32 cssid:8; + u32:4; + u32 rfmt:4; + u32 first_chpid:8; + u32:24; + u32 last_chpid:8; + u32 zeroes1; + struct chsc_header response; + u8 data[PAGE_SIZE - 20]; +} __attribute__ ((packed)); + + extern int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd); extern int chsc_determine_css_characteristics(void); @@ -70,8 +89,7 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable); int chsc_chp_vary(struct chp_id chpid, int on); int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt, - int c, int m, - struct chsc_response_struct *resp); + int c, int m, void *page); int chsc_determine_base_channel_path_desc(struct chp_id chpid, struct channel_path_desc *desc); void chsc_chp_online(struct chp_id chpid); diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index f2b77e7bfc6f..3c3f3ffe2179 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -688,25 +688,31 @@ static int chsc_ioctl_conf_comp_list(void __user *user_ccl) static int chsc_ioctl_chpd(void __user *user_chpd) { + struct chsc_scpd *scpd_area; struct chsc_cpd_info *chpd; int ret; chpd = kzalloc(sizeof(*chpd), GFP_KERNEL); - if (!chpd) - return -ENOMEM; + scpd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!scpd_area || !chpd) { + ret = -ENOMEM; + goto out_free; + } if (copy_from_user(chpd, user_chpd, sizeof(*chpd))) { ret = -EFAULT; goto out_free; } ret = chsc_determine_channel_path_desc(chpd->chpid, chpd->fmt, chpd->rfmt, chpd->c, chpd->m, - &chpd->chpdb); + scpd_area); if (ret) goto out_free; + memcpy(&chpd->chpdb, &scpd_area->response, scpd_area->response.length); if (copy_to_user(user_chpd, chpd, sizeof(*chpd))) ret = -EFAULT; out_free: kfree(chpd); + free_page((unsigned long)scpd_area); return ret; } From c38a90a34cfc880eb2f7234c511cdb7d8bdebc45 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:31 +0200 Subject: [PATCH 25/48] [S390] cio: update descriptor in chsc_chp_vary Update the channel path descriptor at the beginning of to the vary_on operation. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chp.c | 6 ------ drivers/s390/cio/chp.h | 6 ++++++ drivers/s390/cio/chsc.c | 8 +++++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index b1addd78689d..2d32233943a9 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -54,12 +54,6 @@ static struct work_struct cfg_work; /* Wait queue for configure completion events. */ static wait_queue_head_t cfg_wait_queue; -/* Return channel_path struct for given chpid. */ -static inline struct channel_path *chpid_to_chp(struct chp_id chpid) -{ - return channel_subsystems[chpid.cssid]->chps[chpid.id]; -} - /* Set vary state for given chpid. */ static void set_chp_logically_online(struct chp_id chpid, int onoff) { diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h index 695ec20f1c59..12b4903d6fe3 100644 --- a/drivers/s390/cio/chp.h +++ b/drivers/s390/cio/chp.h @@ -52,6 +52,12 @@ struct channel_path { void *cmg_chars; }; +/* Return channel_path struct for given chpid. */ +static inline struct channel_path *chpid_to_chp(struct chp_id chpid) +{ + return channel_subsystems[chpid.cssid]->chps[chpid.id]; +} + int chp_get_status(struct chp_id chpid); u8 chp_get_sch_opm(struct subchannel *sch); int chp_is_registered(struct chp_id chpid); diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 27aca3906a53..44d7cc2f9738 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -495,6 +495,7 @@ __s390_vary_chpid_on(struct subchannel_id schid, void *data) */ int chsc_chp_vary(struct chp_id chpid, int on) { + struct channel_path *chp = chpid_to_chp(chpid); struct chp_link link; memset(&link, 0, sizeof(struct chp_link)); @@ -504,11 +505,12 @@ int chsc_chp_vary(struct chp_id chpid, int on) /* * Redo PathVerification on the devices the chpid connects to */ - - if (on) + if (on) { + /* Try to update the channel path descritor. */ + chsc_determine_base_channel_path_desc(chpid, &chp->desc); for_each_subchannel_staged(s390_subchannel_vary_chpid_on, __s390_vary_chpid_on, &link); - else + } else for_each_subchannel_staged(s390_subchannel_vary_chpid_off, NULL, &link); From 62da177ac2bc1f6f3707230070af268b1e689651 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:32 +0200 Subject: [PATCH 26/48] [S390] css: update descriptor after hibernate Update the channel path descriptors after hibernation. This is done unlocked, since we are the only active task at this time. Note: chsc_determine_base_channel_path_desc is changed to use spin_lock_irqsave, since it's called with interrupts disabled in this case. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chsc.c | 5 +++-- drivers/s390/cio/css.c | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index 44d7cc2f9738..1aaddea673e0 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -680,9 +680,10 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid, { struct chsc_response_struct *chsc_resp; struct chsc_scpd *scpd_area; + unsigned long flags; int ret; - spin_lock_irq(&chsc_page_lock); + spin_lock_irqsave(&chsc_page_lock, flags); scpd_area = chsc_page; ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, scpd_area); if (ret) @@ -690,7 +691,7 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid, chsc_resp = (void *)&scpd_area->response; memcpy(desc, &chsc_resp->data, sizeof(*desc)); out: - spin_unlock_irq(&chsc_page_lock); + spin_unlock_irqrestore(&chsc_page_lock, flags); return ret; } diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 5e1235c6aba0..771576bb2928 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1030,7 +1030,16 @@ subsys_initcall_sync(channel_subsystem_init_sync); void channel_subsystem_reinit(void) { + struct channel_path *chp; + struct chp_id chpid; + chsc_enable_facility(CHSC_SDA_OC_MSS); + chp_id_for_each(&chpid) { + chp = chpid_to_chp(chpid); + if (!chp) + continue; + chsc_determine_base_channel_path_desc(chpid, &chp->desc); + } } #ifdef CONFIG_PROC_FS From eb4f5d93d70458ea644e0f46737f0bcf87280e83 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:33 +0200 Subject: [PATCH 27/48] [S390] css: update subchannel description after hibernate Update the subchannel descriptor while resuming from hibernate in order to obtain current link addresses. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/css.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 771576bb2928..a5050e217150 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -1191,6 +1191,7 @@ static int css_pm_restore(struct device *dev) struct subchannel *sch = to_subchannel(dev); struct css_driver *drv; + css_update_ssd_info(sch); if (!sch->dev.driver) return 0; drv = to_cssdriver(sch->dev.driver); From 585b954e1f2fa325d425b0786e4525ac7c9ae575 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:34 +0200 Subject: [PATCH 28/48] [S390] cio: notify drivers of channel path events This patch adds a notification mechanism to inform ccw drivers about changes to channel paths, which occured while the device is online. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ccwdev.h | 12 ++++++++++++ drivers/s390/cio/device.c | 9 +++++++++ drivers/s390/cio/device_fsm.c | 32 ++++++++++++++++++++++++++++++++ drivers/s390/cio/device_pgid.c | 23 +++++++++++++++++------ drivers/s390/cio/io_sch.h | 7 +++++-- 5 files changed, 75 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index f3ba0fa98de6..e8501115eca8 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -91,6 +91,16 @@ struct ccw_device { void (*handler) (struct ccw_device *, unsigned long, struct irb *); }; +/* + * Possible events used by the path_event notifier. + */ +#define PE_NONE 0x0 +#define PE_PATH_GONE 0x1 /* A path is no longer available. */ +#define PE_PATH_AVAILABLE 0x2 /* A path has become available and + was successfully verified. */ +#define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had + to be established again. */ + /* * Possible CIO actions triggered by the unit check handler. */ @@ -109,6 +119,7 @@ enum uc_todo { * @set_online: called when setting device online * @set_offline: called when setting device offline * @notify: notify driver of device state changes + * @path_event: notify driver of channel path events * @shutdown: called at device shutdown * @prepare: prepare for pm state transition * @complete: undo work done in @prepare @@ -127,6 +138,7 @@ struct ccw_driver { int (*set_online) (struct ccw_device *); int (*set_offline) (struct ccw_device *); int (*notify) (struct ccw_device *, int); + void (*path_event) (struct ccw_device *, int *); void (*shutdown) (struct ccw_device *); int (*prepare) (struct ccw_device *); void (*complete) (struct ccw_device *); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 07b1a074beaf..881bdfd99140 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1147,6 +1147,7 @@ static void io_subchannel_terminate_path(struct subchannel *sch, u8 mask) static int io_subchannel_chp_event(struct subchannel *sch, struct chp_link *link, int event) { + struct ccw_device *cdev = sch_get_cdev(sch); int mask; mask = chp_ssd_get_mask(&sch->ssd_info, link); @@ -1156,22 +1157,30 @@ static int io_subchannel_chp_event(struct subchannel *sch, case CHP_VARY_OFF: sch->opm &= ~mask; sch->lpm &= ~mask; + if (cdev) + cdev->private->path_gone_mask |= mask; io_subchannel_terminate_path(sch, mask); break; case CHP_VARY_ON: sch->opm |= mask; sch->lpm |= mask; + if (cdev) + cdev->private->path_new_mask |= mask; io_subchannel_verify(sch); break; case CHP_OFFLINE: if (cio_update_schib(sch)) return -ENODEV; + if (cdev) + cdev->private->path_gone_mask |= mask; io_subchannel_terminate_path(sch, mask); break; case CHP_ONLINE: if (cio_update_schib(sch)) return -ENODEV; sch->lpm |= mask & sch->opm; + if (cdev) + cdev->private->path_new_mask |= mask; io_subchannel_verify(sch); break; } diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index c9b852647f01..4395c01a9dac 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -349,9 +349,13 @@ int ccw_device_notify(struct ccw_device *cdev, int event) static void ccw_device_oper_notify(struct ccw_device *cdev) { + struct subchannel *sch = to_subchannel(cdev->dev.parent); + if (ccw_device_notify(cdev, CIO_OPER) == NOTIFY_OK) { /* Reenable channel measurements, if needed. */ ccw_device_sched_todo(cdev, CDEV_TODO_ENABLE_CMF); + /* Save indication for new paths. */ + cdev->private->path_new_mask = sch->vpm; return; } /* Driver doesn't want device back. */ @@ -462,6 +466,32 @@ static void ccw_device_request_event(struct ccw_device *cdev, enum dev_event e) } } +static void ccw_device_report_path_events(struct ccw_device *cdev) +{ + struct subchannel *sch = to_subchannel(cdev->dev.parent); + int path_event[8]; + int chp, mask; + + for (chp = 0, mask = 0x80; chp < 8; chp++, mask >>= 1) { + path_event[chp] = PE_NONE; + if (mask & cdev->private->path_gone_mask & ~(sch->vpm)) + path_event[chp] |= PE_PATH_GONE; + if (mask & cdev->private->path_new_mask & sch->vpm) + path_event[chp] |= PE_PATH_AVAILABLE; + if (mask & cdev->private->pgid_reset_mask & sch->vpm) + path_event[chp] |= PE_PATHGROUP_ESTABLISHED; + } + if (cdev->online && cdev->drv->path_event) + cdev->drv->path_event(cdev, path_event); +} + +static void ccw_device_reset_path_events(struct ccw_device *cdev) +{ + cdev->private->path_gone_mask = 0; + cdev->private->path_new_mask = 0; + cdev->private->pgid_reset_mask = 0; +} + void ccw_device_verify_done(struct ccw_device *cdev, int err) { @@ -498,6 +528,7 @@ ccw_device_verify_done(struct ccw_device *cdev, int err) &cdev->private->irb); memset(&cdev->private->irb, 0, sizeof(struct irb)); } + ccw_device_report_path_events(cdev); break; case -ETIME: case -EUSERS: @@ -516,6 +547,7 @@ ccw_device_verify_done(struct ccw_device *cdev, int err) ccw_device_done(cdev, DEV_STATE_NOT_OPER); break; } + ccw_device_reset_path_events(cdev); } /* diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c index 82a5ad0d63f6..07a4fd29f096 100644 --- a/drivers/s390/cio/device_pgid.c +++ b/drivers/s390/cio/device_pgid.c @@ -213,6 +213,17 @@ static void spid_start(struct ccw_device *cdev) spid_do(cdev); } +static int pgid_is_reset(struct pgid *p) +{ + char *c; + + for (c = (char *)p + 1; c < (char *)(p + 1); c++) { + if (*c != 0) + return 0; + } + return 1; +} + static int pgid_cmp(struct pgid *p1, struct pgid *p2) { return memcmp((char *) p1 + 1, (char *) p2 + 1, @@ -223,7 +234,7 @@ static int pgid_cmp(struct pgid *p1, struct pgid *p2) * Determine pathgroup state from PGID data. */ static void pgid_analyze(struct ccw_device *cdev, struct pgid **p, - int *mismatch, int *reserved, int *reset) + int *mismatch, int *reserved, u8 *reset) { struct pgid *pgid = &cdev->private->pgid[0]; struct pgid *first = NULL; @@ -238,9 +249,8 @@ static void pgid_analyze(struct ccw_device *cdev, struct pgid **p, continue; if (pgid->inf.ps.state2 == SNID_STATE2_RESVD_ELSE) *reserved = 1; - if (pgid->inf.ps.state1 == SNID_STATE1_RESET) { - /* A PGID was reset. */ - *reset = 1; + if (pgid_is_reset(pgid)) { + *reset |= lpm; continue; } if (!first) { @@ -307,7 +317,7 @@ static void snid_done(struct ccw_device *cdev, int rc) struct pgid *pgid; int mismatch = 0; int reserved = 0; - int reset = 0; + u8 reset = 0; u8 donepm; if (rc) @@ -321,11 +331,12 @@ static void snid_done(struct ccw_device *cdev, int rc) donepm = pgid_to_donepm(cdev); sch->vpm = donepm & sch->opm; cdev->private->pgid_todo_mask &= ~donepm; + cdev->private->pgid_reset_mask |= reset; pgid_fill(cdev, pgid); } out: CIO_MSG_EVENT(2, "snid: device 0.%x.%04x: rc=%d pvm=%02x vpm=%02x " - "todo=%02x mism=%d rsvd=%d reset=%d\n", id->ssid, + "todo=%02x mism=%d rsvd=%d reset=%02x\n", id->ssid, id->devno, rc, cdev->private->pgid_valid_mask, sch->vpm, cdev->private->pgid_todo_mask, mismatch, reserved, reset); switch (rc) { diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index 469ef93f2302..d024d2c21897 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -151,8 +151,11 @@ struct ccw_device_private { struct subchannel_id schid; /* subchannel number */ struct ccw_request req; /* internal I/O request */ int iretry; - u8 pgid_valid_mask; /* mask of valid PGIDs */ - u8 pgid_todo_mask; /* mask of PGIDs to be adjusted */ + u8 pgid_valid_mask; /* mask of valid PGIDs */ + u8 pgid_todo_mask; /* mask of PGIDs to be adjusted */ + u8 pgid_reset_mask; /* mask of PGIDs which were reset */ + u8 path_gone_mask; /* mask of paths, that became unavailable */ + u8 path_new_mask; /* mask of paths, that became available */ struct { unsigned int fast:1; /* post with "channel end" */ unsigned int repall:1; /* report every interrupt status */ From 36bf96801e3a2c4efae0bb8c1897a530fc9ca13e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:35 +0200 Subject: [PATCH 29/48] [S390] fix SIGBUS handling Raise SIGBUS with a siginfo structure. Deliver BUS_ADRERR as si_code and the address of the fault in the si_addr field. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index b49d12073f10..bae2c282221c 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -212,14 +212,21 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code, unsigned long trans_exc_code) { struct task_struct *tsk = current; + unsigned long address; + struct siginfo si; /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - tsk->thread.prot_addr = trans_exc_code & __FAIL_ADDR_MASK; + address = trans_exc_code & __FAIL_ADDR_MASK; + tsk->thread.prot_addr = address; tsk->thread.trap_no = int_code; - force_sig(SIGBUS, tsk); + si.si_signo = SIGBUS; + si.si_errno = 0; + si.si_code = BUS_ADRERR; + si.si_addr = (void __user *) address; + force_sig_info(SIGBUS, &si, tsk); } #ifdef CONFIG_S390_EXEC_PROTECT @@ -279,10 +286,11 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code, if (fault & VM_FAULT_OOM) pagefault_out_of_memory(); else if (fault & VM_FAULT_SIGBUS) { - do_sigbus(regs, int_code, trans_exc_code); /* Kernel mode? Handle exceptions or die */ if (!(regs->psw.mask & PSW_MASK_PSTATE)) do_no_context(regs, int_code, trans_exc_code); + else + do_sigbus(regs, int_code, trans_exc_code); } else BUG(); break; From 84afdcee620b1640f2a145c07febae4ed68947f9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:36 +0200 Subject: [PATCH 30/48] [S390] pgtable: move pte_mkhuge() from hugetlb.h to pgtable.h All architectures besides s390 have pte_mkhuge() defined in pgtable.h. So move the function to pgtable.h on s390 as well. Fixes a compile error introduced with "hugetlb: hugepage migration core" in linux-next which only happens on s390. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/hugetlb.h | 26 -------------------------- arch/s390/include/asm/pgtable.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index bb8343d157bc..b56403c2df28 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -37,32 +37,6 @@ static inline int prepare_hugepage_range(struct file *file, int arch_prepare_hugepage(struct page *page); void arch_release_hugepage(struct page *page); -static inline pte_t pte_mkhuge(pte_t pte) -{ - /* - * PROT_NONE needs to be remapped from the pte type to the ste type. - * The HW invalid bit is also different for pte and ste. The pte - * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE - * bit, so we don't have to clear it. - */ - if (pte_val(pte) & _PAGE_INVALID) { - if (pte_val(pte) & _PAGE_SWT) - pte_val(pte) |= _HPAGE_TYPE_NONE; - pte_val(pte) |= _SEGMENT_ENTRY_INV; - } - /* - * Clear SW pte bits SWT and SWX, there are no SW bits in a segment - * table entry. - */ - pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); - /* - * Also set the change-override bit because we don't need dirty bit - * tracking for hugetlbfs pages. - */ - pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); - return pte; -} - static inline pte_t huge_pte_wrprotect(pte_t pte) { pte_val(pte) |= _PAGE_RO; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f79e7bb9ae1e..986dc9476c21 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -772,6 +772,34 @@ static inline pte_t pte_mkspecial(pte_t pte) return pte; } +#ifdef CONFIG_HUGETLB_PAGE +static inline pte_t pte_mkhuge(pte_t pte) +{ + /* + * PROT_NONE needs to be remapped from the pte type to the ste type. + * The HW invalid bit is also different for pte and ste. The pte + * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE + * bit, so we don't have to clear it. + */ + if (pte_val(pte) & _PAGE_INVALID) { + if (pte_val(pte) & _PAGE_SWT) + pte_val(pte) |= _HPAGE_TYPE_NONE; + pte_val(pte) |= _SEGMENT_ENTRY_INV; + } + /* + * Clear SW pte bits SWT and SWX, there are no SW bits in a segment + * table entry. + */ + pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX); + /* + * Also set the change-override bit because we don't need dirty bit + * tracking for hugetlbfs pages. + */ + pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); + return pte; +} +#endif + #ifdef CONFIG_PGSTE /* * Get (and clear) the user dirty bit for a PTE. From 1e54622e0403891b10f2105663e0f9dd595a1f17 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:37 +0200 Subject: [PATCH 31/48] [S390] cleanup lowcore access from program checks Read all required fields for program checks from the lowcore in the first level interrupt handler in entry[64].S. If the context that caused the fault was enabled for interrupts we can now re-enable the irqs in entry[64].S. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 35 ++----- arch/s390/kernel/entry.h | 2 +- arch/s390/kernel/entry64.S | 36 ++----- arch/s390/kernel/traps.c | 170 ++++++++++++++------------------- arch/s390/mm/fault.c | 36 +++---- 6 files changed, 109 insertions(+), 171 deletions(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 5232278d79ad..76328deb31f7 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -84,6 +84,7 @@ int main(void) DEFINE(__LC_SVC_INT_CODE, offsetof(struct _lowcore, svc_code)); DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc)); DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code)); + DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code)); DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_perc_atmid)); DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address)); DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index bea9ee37ac9d..adf25246f72e 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -72,25 +72,9 @@ STACK_SIZE = 1 << STACK_SHIFT l %r1,BASED(.Ltrace_irq_off_caller) basr %r14,%r1 .endm - - .macro TRACE_IRQS_CHECK_ON - tm SP_PSW(%r15),0x03 # irqs enabled? - bz BASED(0f) - TRACE_IRQS_ON -0: - .endm - - .macro TRACE_IRQS_CHECK_OFF - tm SP_PSW(%r15),0x03 # irqs enabled? - bz BASED(0f) - TRACE_IRQS_OFF -0: - .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF -#define TRACE_IRQS_CHECK_ON -#define TRACE_IRQS_CHECK_OFF #endif #ifdef CONFIG_LOCKDEP @@ -198,6 +182,12 @@ STACK_SIZE = 1 << STACK_SHIFT lpsw \psworg # back to caller .endm + .macro REENABLE_IRQS + mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) + ni __SF_EMPTY(%r15),0xbf + ssm __SF_EMPTY(%r15) + .endm + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -440,13 +430,11 @@ kernel_execve: br %r14 # execve succeeded. 0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts - TRACE_IRQS_OFF l %r15,__LC_KERNEL_STACK # load ksp s %r15,BASED(.Lc_spsize) # make room for registers & psw l %r9,__LC_THREAD_INFO mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15) - TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts l %r1,BASED(.Lexecve_tail) basr %r14,%r1 @@ -483,9 +471,10 @@ pgm_check_handler: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime: - TRACE_IRQS_CHECK_OFF l %r9,__LC_THREAD_INFO # load pointer to thread_info struct l %r3,__LC_PGM_ILC # load program interruption code + l %r4,__LC_TRANS_EXC_CODE + REENABLE_IRQS la %r8,0x7f nr %r8,%r3 pgm_do_call: @@ -495,7 +484,6 @@ pgm_do_call: la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r7 # branch to interrupt-handler pgm_exit: - TRACE_IRQS_CHECK_ON b BASED(sysc_return) # @@ -523,7 +511,6 @@ pgm_per_std: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER pgm_no_vtime2: - TRACE_IRQS_CHECK_OFF l %r9,__LC_THREAD_INFO # load pointer to thread_info struct l %r1,__TI_task(%r9) tm SP_PSW+1(%r15),0x01 # kernel per event ? @@ -533,6 +520,8 @@ pgm_no_vtime2: mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP l %r3,__LC_PGM_ILC # load program interruption code + l %r4,__LC_TRANS_EXC_CODE + REENABLE_IRQS la %r8,0x7f nr %r8,%r3 # clear per-event-bit and ilc be BASED(pgm_exit2) # only per or per+check ? @@ -542,8 +531,6 @@ pgm_no_vtime2: la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r7 # branch to interrupt-handler pgm_exit2: - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts b BASED(sysc_return) # @@ -557,13 +544,11 @@ pgm_svcper: mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER lh %r7,0x8a # get svc number from lowcore l %r9,__LC_THREAD_INFO # load pointer to thread_info struct - TRACE_IRQS_OFF l %r8,__TI_task(%r9) mvc __THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID mvc __THREAD_per+__PER_address(4,%r8),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID oi __TI_flags+3(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lm %r2,%r6,SP_R2(%r15) # load svc arguments b BASED(sysc_do_svc) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index ff579b6bde06..714ff2e57c6b 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -5,7 +5,7 @@ #include #include -typedef void pgm_check_handler_t(struct pt_regs *, long); +typedef void pgm_check_handler_t(struct pt_regs *, long, unsigned long); extern pgm_check_handler_t *pgm_check_table[128]; pgm_check_handler_t do_protection_exception; pgm_check_handler_t do_dat_exception; diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 8bccec15ea90..2d205e4e7bb6 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -79,25 +79,9 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ basr %r2,%r0 brasl %r14,trace_hardirqs_off_caller .endm - - .macro TRACE_IRQS_CHECK_ON - tm SP_PSW(%r15),0x03 # irqs enabled? - jz 0f - TRACE_IRQS_ON -0: - .endm - - .macro TRACE_IRQS_CHECK_OFF - tm SP_PSW(%r15),0x03 # irqs enabled? - jz 0f - TRACE_IRQS_OFF -0: - .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF -#define TRACE_IRQS_CHECK_ON -#define TRACE_IRQS_CHECK_OFF #endif #ifdef CONFIG_LOCKDEP @@ -207,6 +191,12 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \ 0: .endm + .macro REENABLE_IRQS + mvc __SF_EMPTY(1,%r15),SP_PSW(%r15) + ni __SF_EMPTY(%r15),0xbf + ssm __SF_EMPTY(%r15) + .endm + /* * Scheduler resume function, called by switch_to * gpr2 = (task_struct *) prev @@ -443,14 +433,12 @@ kernel_execve: br %r14 # execve succeeded. 0: stnsm __SF_EMPTY(%r15),0xfc # disable interrupts -# TRACE_IRQS_OFF lg %r15,__LC_KERNEL_STACK # load ksp aghi %r15,-SP_SIZE # make room for registers & psw lg %r13,__LC_SVC_NEW_PSW+8 mvc SP_PTREGS(__PT_SIZE,%r15),0(%r12) # copy pt_regs lg %r12,__LC_THREAD_INFO xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) -# TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts brasl %r14,execve_tail j sysc_return @@ -490,19 +478,18 @@ pgm_check_handler: LAST_BREAK pgm_no_vtime: HANDLE_SIE_INTERCEPT - TRACE_IRQS_CHECK_OFF stg %r11,SP_ARGS(%r15) lgf %r3,__LC_PGM_ILC # load program interruption code + lg %r4,__LC_TRANS_EXC_CODE + REENABLE_IRQS lghi %r8,0x7f ngr %r8,%r3 -pgm_do_call: sll %r8,3 larl %r1,pgm_check_table lg %r1,0(%r8,%r1) # load address of handler routine la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to interrupt-handler pgm_exit: - TRACE_IRQS_CHECK_ON j sysc_return # @@ -533,7 +520,6 @@ pgm_per_std: LAST_BREAK pgm_no_vtime2: HANDLE_SIE_INTERCEPT - TRACE_IRQS_CHECK_OFF lg %r1,__TI_task(%r12) tm SP_PSW+1(%r15),0x01 # kernel per event ? jz kernel_per @@ -542,6 +528,8 @@ pgm_no_vtime2: mvc __THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID oi __TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP lgf %r3,__LC_PGM_ILC # load program interruption code + lg %r4,__LC_TRANS_EXC_CODE + REENABLE_IRQS lghi %r8,0x7f ngr %r8,%r3 # clear per-event-bit and ilc je pgm_exit2 @@ -551,8 +539,6 @@ pgm_no_vtime2: la %r2,SP_PTREGS(%r15) # address of register-save area basr %r14,%r1 # branch to interrupt-handler pgm_exit2: - TRACE_IRQS_ON - stosm __SF_EMPTY(%r15),0x03 # reenable interrupts j sysc_return # @@ -568,13 +554,11 @@ pgm_svcper: UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER LAST_BREAK - TRACE_IRQS_OFF lg %r8,__TI_task(%r12) mvc __THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID mvc __THREAD_per+__PER_address(8,%r8),__LC_PER_ADDRESS mvc __THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID oi __TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP - TRACE_IRQS_ON stosm __SF_EMPTY(%r15),0x03 # reenable interrupts lmg %r2,%r6,SP_R2(%r15) # load svc arguments j sysc_do_svc diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 5d8f0f3d0250..e9b063e7d75f 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -329,27 +329,19 @@ int is_valid_bugaddr(unsigned long addr) return 1; } -static void __kprobes inline do_trap(long interruption_code, int signr, - char *str, struct pt_regs *regs, - siginfo_t *info) +static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str, + struct pt_regs *regs, siginfo_t *info) { - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); - - if (notify_die(DIE_TRAP, str, regs, interruption_code, - interruption_code, signr) == NOTIFY_STOP) + if (notify_die(DIE_TRAP, str, regs, pgm_int_code, + pgm_int_code, signr) == NOTIFY_STOP) return; if (regs->psw.mask & PSW_MASK_PSTATE) { struct task_struct *tsk = current; - tsk->thread.trap_no = interruption_code & 0xffff; + tsk->thread.trap_no = pgm_int_code & 0xffff; force_sig_info(signr, info, tsk); - report_user_fault(regs, interruption_code, signr); + report_user_fault(regs, pgm_int_code, signr); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); @@ -361,14 +353,16 @@ static void __kprobes inline do_trap(long interruption_code, int signr, btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs); if (btt == BUG_TRAP_TYPE_WARN) return; - die(str, regs, interruption_code); + die(str, regs, pgm_int_code); } } } -static inline void __user *get_check_address(struct pt_regs *regs) +static inline void __user *get_psw_address(struct pt_regs *regs, + long pgm_int_code) { - return (void __user *)((regs->psw.addr-S390_lowcore.pgm_ilc) & PSW_ADDR_INSN); + return (void __user *) + ((regs->psw.addr - (pgm_int_code >> 16)) & PSW_ADDR_INSN); } void __kprobes do_single_step(struct pt_regs *regs) @@ -381,57 +375,57 @@ void __kprobes do_single_step(struct pt_regs *regs) force_sig(SIGTRAP, current); } -static void default_trap_handler(struct pt_regs * regs, long interruption_code) +static void default_trap_handler(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { if (regs->psw.mask & PSW_MASK_PSTATE) { - local_irq_enable(); - report_user_fault(regs, interruption_code, SIGSEGV); + report_user_fault(regs, pgm_int_code, SIGSEGV); do_exit(SIGSEGV); } else - die("Unknown program exception", regs, interruption_code); + die("Unknown program exception", regs, pgm_int_code); } -#define DO_ERROR_INFO(signr, str, name, sicode, siaddr) \ -static void name(struct pt_regs * regs, long interruption_code) \ +#define DO_ERROR_INFO(name, signr, sicode, str) \ +static void name(struct pt_regs *regs, long pgm_int_code, \ + unsigned long trans_exc_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ info.si_errno = 0; \ info.si_code = sicode; \ - info.si_addr = siaddr; \ - do_trap(interruption_code, signr, str, regs, &info); \ + info.si_addr = get_psw_address(regs, pgm_int_code); \ + do_trap(pgm_int_code, signr, str, regs, &info); \ } -DO_ERROR_INFO(SIGILL, "addressing exception", addressing_exception, - ILL_ILLADR, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "execute exception", execute_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "fixpoint divide exception", divide_exception, - FPE_INTDIV, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "fixpoint overflow exception", overflow_exception, - FPE_INTOVF, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP overflow exception", hfp_overflow_exception, - FPE_FLTOVF, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP underflow exception", hfp_underflow_exception, - FPE_FLTUND, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP significance exception", hfp_significance_exception, - FPE_FLTRES, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP divide exception", hfp_divide_exception, - FPE_FLTDIV, get_check_address(regs)) -DO_ERROR_INFO(SIGFPE, "HFP square root exception", hfp_sqrt_exception, - FPE_FLTINV, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "operand exception", operand_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "privileged operation", privileged_op, - ILL_PRVOPC, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "special operation exception", special_op_exception, - ILL_ILLOPN, get_check_address(regs)) -DO_ERROR_INFO(SIGILL, "translation exception", translation_exception, - ILL_ILLOPN, get_check_address(regs)) +DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR, + "addressing exception") +DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN, + "execute exception") +DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV, + "fixpoint divide exception") +DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF, + "fixpoint overflow exception") +DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF, + "HFP overflow exception") +DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND, + "HFP underflow exception") +DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES, + "HFP significance exception") +DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV, + "HFP divide exception") +DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV, + "HFP square root exception") +DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN, + "operand exception") +DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC, + "privileged operation") +DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, + "special operation exception") +DO_ERROR_INFO(translation_exception, SIGILL, ILL_ILLOPN, + "translation exception") -static inline void -do_fp_trap(struct pt_regs *regs, void __user *location, - int fpc, long interruption_code) +static inline void do_fp_trap(struct pt_regs *regs, void __user *location, + int fpc, long pgm_int_code) { siginfo_t si; @@ -454,25 +448,19 @@ do_fp_trap(struct pt_regs *regs, void __user *location, si.si_code = FPE_FLTRES; } current->thread.ieee_instruction_pointer = (addr_t) location; - do_trap(interruption_code, SIGFPE, + do_trap(pgm_int_code, SIGFPE, "floating point exception", regs, &si); } -static void illegal_op(struct pt_regs * regs, long interruption_code) +static void illegal_op(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { siginfo_t info; __u8 opcode[6]; __u16 __user *location; int signal = 0; - location = get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = get_psw_address(regs, pgm_int_code); if (regs->psw.mask & PSW_MASK_PSTATE) { if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) @@ -512,7 +500,7 @@ static void illegal_op(struct pt_regs * regs, long interruption_code) * If we get an illegal op in kernel mode, send it through the * kprobes notifier. If kprobes doesn't pick it up, SIGILL */ - if (notify_die(DIE_BPT, "bpt", regs, interruption_code, + if (notify_die(DIE_BPT, "bpt", regs, pgm_int_code, 3, SIGTRAP) != NOTIFY_STOP) signal = SIGILL; } @@ -520,13 +508,13 @@ static void illegal_op(struct pt_regs * regs, long interruption_code) #ifdef CONFIG_MATHEMU if (signal == SIGFPE) do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); + current->thread.fp_regs.fpc, pgm_int_code); else if (signal == SIGSEGV) { info.si_signo = signal; info.si_errno = 0; info.si_code = SEGV_MAPERR; info.si_addr = (void __user *) location; - do_trap(interruption_code, signal, + do_trap(pgm_int_code, signal, "user address fault", regs, &info); } else #endif @@ -535,28 +523,22 @@ static void illegal_op(struct pt_regs * regs, long interruption_code) info.si_errno = 0; info.si_code = ILL_ILLOPC; info.si_addr = (void __user *) location; - do_trap(interruption_code, signal, + do_trap(pgm_int_code, signal, "illegal operation", regs, &info); } } #ifdef CONFIG_MATHEMU -asmlinkage void -specification_exception(struct pt_regs * regs, long interruption_code) +asmlinkage void specification_exception(struct pt_regs *regs, + long pgm_int_code, + unsigned long trans_exc_code) { __u8 opcode[6]; __u16 __user *location = NULL; int signal = 0; - location = (__u16 __user *) get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = (__u16 __user *) get_psw_address(regs, pgm_int_code); if (regs->psw.mask & PSW_MASK_PSTATE) { get_user(*((__u16 *) opcode), location); @@ -592,35 +574,29 @@ specification_exception(struct pt_regs * regs, long interruption_code) if (signal == SIGFPE) do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); + current->thread.fp_regs.fpc, pgm_int_code); else if (signal) { siginfo_t info; info.si_signo = signal; info.si_errno = 0; info.si_code = ILL_ILLOPN; info.si_addr = location; - do_trap(interruption_code, signal, + do_trap(pgm_int_code, signal, "specification exception", regs, &info); } } #else -DO_ERROR_INFO(SIGILL, "specification exception", specification_exception, - ILL_ILLOPN, get_check_address(regs)); +DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, + "specification exception"); #endif -static void data_exception(struct pt_regs * regs, long interruption_code) +static void data_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { __u16 __user *location; int signal = 0; - location = get_check_address(regs); - - /* - * We got all needed information from the lowcore and can - * now safely switch on interrupts. - */ - if (regs->psw.mask & PSW_MASK_PSTATE) - local_irq_enable(); + location = get_psw_address(regs, pgm_int_code); if (MACHINE_HAS_IEEE) asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc)); @@ -686,19 +662,19 @@ static void data_exception(struct pt_regs * regs, long interruption_code) signal = SIGILL; if (signal == SIGFPE) do_fp_trap(regs, location, - current->thread.fp_regs.fpc, interruption_code); + current->thread.fp_regs.fpc, pgm_int_code); else if (signal) { siginfo_t info; info.si_signo = signal; info.si_errno = 0; info.si_code = ILL_ILLOPN; info.si_addr = location; - do_trap(interruption_code, signal, - "data exception", regs, &info); + do_trap(pgm_int_code, signal, "data exception", regs, &info); } } -static void space_switch_exception(struct pt_regs * regs, long int_code) +static void space_switch_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { siginfo_t info; @@ -709,8 +685,8 @@ static void space_switch_exception(struct pt_regs * regs, long int_code) info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_PRVOPC; - info.si_addr = get_check_address(regs); - do_trap(int_code, SIGILL, "space switch event", regs, &info); + info.si_addr = get_psw_address(regs, pgm_int_code); + do_trap(pgm_int_code, SIGILL, "space switch event", regs, &info); } asmlinkage void kernel_stack_overflow(struct pt_regs * regs) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index bae2c282221c..b6570069b127 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -333,12 +333,6 @@ static inline int do_exception(struct pt_regs *regs, int access, goto out; address = trans_exc_code & __FAIL_ADDR_MASK; - /* - * When we get here, the fault happened in the current - * task's user address space, so we can switch on the - * interrupts again and then search the VMAs - */ - local_irq_enable(); perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); down_read(&mm->mmap_sem); @@ -397,20 +391,20 @@ static inline int do_exception(struct pt_regs *regs, int access, return fault; } -void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) +void __kprobes do_protection_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { - unsigned long trans_exc_code = S390_lowcore.trans_exc_code; int fault; /* Protection exception is supressing, decrement psw address. */ - regs->psw.addr -= (int_code >> 16); + regs->psw.addr -= (pgm_int_code >> 16); /* * Check for low-address protection. This needs to be treated * as a special case because the translation exception code * field is not guaranteed to contain valid data in this case. */ if (unlikely(!(trans_exc_code & 4))) { - do_low_address(regs, int_code, trans_exc_code); + do_low_address(regs, pgm_int_code, trans_exc_code); return; } fault = do_exception(regs, VM_WRITE, trans_exc_code); @@ -418,9 +412,9 @@ void __kprobes do_protection_exception(struct pt_regs *regs, long int_code) do_fault_error(regs, 4, trans_exc_code, fault); } -void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) +void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { - unsigned long trans_exc_code = S390_lowcore.trans_exc_code; int access, fault; access = VM_READ | VM_EXEC | VM_WRITE; @@ -431,21 +425,19 @@ void __kprobes do_dat_exception(struct pt_regs *regs, long int_code) #endif fault = do_exception(regs, access, trans_exc_code); if (unlikely(fault)) - do_fault_error(regs, int_code & 255, trans_exc_code, fault); + do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault); } #ifdef CONFIG_64BIT -void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) +void __kprobes do_asce_exception(struct pt_regs *regs, long pgm_int_code, + unsigned long trans_exc_code) { - unsigned long trans_exc_code = S390_lowcore.trans_exc_code; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm)) goto no_context; - local_irq_enable(); - down_read(&mm->mmap_sem); vma = find_vma(mm, trans_exc_code & __FAIL_ADDR_MASK); up_read(&mm->mmap_sem); @@ -457,16 +449,16 @@ void __kprobes do_asce_exception(struct pt_regs *regs, long int_code) /* User mode accesses just cause a SIGSEGV */ if (regs->psw.mask & PSW_MASK_PSTATE) { - do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code); + do_sigsegv(regs, pgm_int_code, SEGV_MAPERR, trans_exc_code); return; } no_context: - do_no_context(regs, int_code, trans_exc_code); + do_no_context(regs, pgm_int_code, trans_exc_code); } #endif -int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) +int __handle_fault(unsigned long uaddr, unsigned long pgm_int_code, int write) { struct pt_regs regs; int access, fault; @@ -477,14 +469,14 @@ int __handle_fault(unsigned long uaddr, unsigned long int_code, int write_user) regs.psw.addr = (unsigned long) __builtin_return_address(0); regs.psw.addr |= PSW_ADDR_AMODE; uaddr &= PAGE_MASK; - access = write_user ? VM_WRITE : VM_READ; + access = write ? VM_WRITE : VM_READ; fault = do_exception(®s, access, uaddr | 2); if (unlikely(fault)) { if (fault & VM_FAULT_OOM) { pagefault_out_of_memory(); fault = 0; } else if (fault & VM_FAULT_SIGBUS) - do_sigbus(®s, int_code, uaddr); + do_sigbus(®s, pgm_int_code, uaddr); } return fault ? -EFAULT : 0; } From f6649a7e5a9ee99e9623878f4a5579cc2f6cdd51 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:38 +0200 Subject: [PATCH 32/48] [S390] cleanup lowcore access from external interrupts Read external interrupts parameters from the lowcore in the first level interrupt handler in entry[64].S. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/s390_ext.h | 2 +- arch/s390/kernel/asm-offsets.c | 2 -- arch/s390/kernel/entry.S | 3 ++- arch/s390/kernel/entry.h | 2 +- arch/s390/kernel/entry64.S | 5 ++++- arch/s390/kernel/s390_ext.c | 9 ++++++--- arch/s390/kernel/smp.c | 3 ++- arch/s390/kernel/time.c | 17 +++++++++-------- arch/s390/kernel/vtime.c | 3 ++- arch/s390/mm/fault.c | 11 ++++++++--- drivers/s390/block/dasd_diag.c | 19 ++++++++----------- drivers/s390/block/dasd_diag.h | 4 ---- drivers/s390/char/sclp.c | 14 +++++++------- drivers/s390/kvm/kvm_virtio.c | 9 +++++---- net/iucv/iucv.c | 3 ++- 15 files changed, 57 insertions(+), 49 deletions(-) diff --git a/arch/s390/include/asm/s390_ext.h b/arch/s390/include/asm/s390_ext.h index 2afc060266a2..1a9307e70842 100644 --- a/arch/s390/include/asm/s390_ext.h +++ b/arch/s390/include/asm/s390_ext.h @@ -12,7 +12,7 @@ #include -typedef void (*ext_int_handler_t)(__u16 code); +typedef void (*ext_int_handler_t)(unsigned int, unsigned int, unsigned long); typedef struct ext_int_info_t { struct ext_int_info_t *next; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 76328deb31f7..f3c1b823c9a8 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -143,10 +143,8 @@ int main(void) DEFINE(__LC_GPREGS_SAVE_AREA, offsetof(struct _lowcore, gpregs_save_area)); DEFINE(__LC_CREGS_SAVE_AREA, offsetof(struct _lowcore, cregs_save_area)); #ifdef CONFIG_32BIT - DEFINE(__LC_PFAULT_INTPARM, offsetof(struct _lowcore, ext_params)); DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr)); #else /* CONFIG_32BIT */ - DEFINE(__LC_PFAULT_INTPARM, offsetof(struct _lowcore, ext_params2)); DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index adf25246f72e..736d7010629e 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -722,7 +722,8 @@ ext_no_vtime: l %r9,__LC_THREAD_INFO # load pointer to thread_info struct TRACE_IRQS_OFF la %r2,SP_PTREGS(%r15) # address of register-save area - lh %r3,__LC_EXT_INT_CODE # get interruption code + l %r3,__LC_CPU_ADDRESS # get cpu address + interruption code + l %r4,__LC_EXT_PARAMS # get external parameters l %r1,BASED(.Ldo_extint) basr %r14,%r1 b BASED(io_return) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 714ff2e57c6b..95c1dfc4ef31 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -19,7 +19,7 @@ void do_signal(struct pt_regs *regs); int handle_signal32(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); -void do_extint(struct pt_regs *regs, unsigned short code); +void do_extint(struct pt_regs *regs, unsigned int, unsigned int, unsigned long); int __cpuinit start_secondary(void *cpuvoid); void __init startup_init(void); void die(const char * str, struct pt_regs * regs, long err); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 2d205e4e7bb6..e4038ea4dc57 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -727,8 +727,11 @@ ext_int_handler: ext_no_vtime: HANDLE_SIE_INTERCEPT TRACE_IRQS_OFF + lghi %r1,4096 la %r2,SP_PTREGS(%r15) # address of register-save area - llgh %r3,__LC_EXT_INT_CODE # get interruption code + llgf %r3,__LC_CPU_ADDRESS # get cpu address + interruption code + llgf %r4,__LC_EXT_PARAMS # get external parameter + lg %r5,__LC_EXT_PARAMS2-4096(%r1) # get 64 bit external parameter brasl %r14,do_extint j io_return diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index 9ce641b5291f..bd1db508e8af 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -113,12 +113,15 @@ int unregister_early_external_interrupt(__u16 code, ext_int_handler_t handler, return 0; } -void __irq_entry do_extint(struct pt_regs *regs, unsigned short code) +void __irq_entry do_extint(struct pt_regs *regs, unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { + struct pt_regs *old_regs; + unsigned short code; ext_int_info_t *p; int index; - struct pt_regs *old_regs; + code = (unsigned short) ext_int_code; old_regs = set_irq_regs(regs); s390_idle_check(regs, S390_lowcore.int_clock, S390_lowcore.async_enter_timer); @@ -132,7 +135,7 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code) index = ext_hash(code); for (p = ext_int_hash[index]; p; p = p->next) { if (likely(p->code == code)) - p->handler(code); + p->handler(ext_int_code, param32, param64); } irq_exit(); set_irq_regs(old_regs); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 8127ebd59c4d..354589d096b1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -156,7 +156,8 @@ void smp_send_stop(void) * cpus are handled. */ -static void do_ext_call_interrupt(__u16 code) +static void do_ext_call_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { unsigned long bits; diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 2896cac9c14a..f754a6dc4f94 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -155,7 +155,9 @@ void init_cpu_timer(void) __ctl_set_bit(0, 4); } -static void clock_comparator_interrupt(__u16 code) +static void clock_comparator_interrupt(unsigned int ext_int_code, + unsigned int param32, + unsigned long param64) { if (S390_lowcore.clock_comparator == -1ULL) set_clock_comparator(S390_lowcore.clock_comparator); @@ -164,14 +166,13 @@ static void clock_comparator_interrupt(__u16 code) static void etr_timing_alert(struct etr_irq_parm *); static void stp_timing_alert(struct stp_irq_parm *); -static void timing_alert_interrupt(__u16 code) +static void timing_alert_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { - if (S390_lowcore.ext_params & 0x00c40000) - etr_timing_alert((struct etr_irq_parm *) - &S390_lowcore.ext_params); - if (S390_lowcore.ext_params & 0x00038000) - stp_timing_alert((struct stp_irq_parm *) - &S390_lowcore.ext_params); + if (param32 & 0x00c40000) + etr_timing_alert((struct etr_irq_parm *) ¶m32); + if (param32 & 0x00038000) + stp_timing_alert((struct stp_irq_parm *) ¶m32); } static void etr_reset(void); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 3479f1b0d4e0..56c8687b29b3 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -314,7 +314,8 @@ static void do_callbacks(struct list_head *cb_list) /* * Handler for the virtual CPU timer. */ -static void do_cpu_timer_interrupt(__u16 error_code) +static void do_cpu_timer_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { struct vtimer_queue *vq; struct vtimer_list *event, *tmp; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index b6570069b127..b4aad0c1f562 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -542,7 +542,8 @@ void pfault_fini(void) : : "a" (&refbk), "m" (refbk) : "cc"); } -static void pfault_interrupt(__u16 int_code) +static void pfault_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { struct task_struct *tsk; __u16 subcode; @@ -553,14 +554,18 @@ static void pfault_interrupt(__u16 int_code) * in the 'cpu address' field associated with the * external interrupt. */ - subcode = S390_lowcore.cpu_addr; + subcode = ext_int_code >> 16; if ((subcode & 0xff00) != __SUBCODE_MASK) return; /* * Get the token (= address of the task structure of the affected task). */ - tsk = *(struct task_struct **) __LC_PFAULT_INTPARM; +#ifdef CONFIG_64BIT + tsk = *(struct task_struct **) param64; +#else + tsk = *(struct task_struct **) param32; +#endif if (subcode & 0x0080) { /* signal bit is set -> a page has been swapped in by VM */ diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 2b3bc3ec0541..266b34b55403 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -228,25 +228,22 @@ dasd_diag_term_IO(struct dasd_ccw_req * cqr) } /* Handle external interruption. */ -static void -dasd_ext_handler(__u16 code) +static void dasd_ext_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { struct dasd_ccw_req *cqr, *next; struct dasd_device *device; unsigned long long expires; unsigned long flags; - u8 int_code, status; addr_t ip; int rc; - int_code = *((u8 *) DASD_DIAG_LC_INT_CODE); - status = *((u8 *) DASD_DIAG_LC_INT_STATUS); - switch (int_code) { + switch (ext_int_code >> 24) { case DASD_DIAG_CODE_31BIT: - ip = (addr_t) *((u32 *) DASD_DIAG_LC_INT_PARM_31BIT); + ip = (addr_t) param32; break; case DASD_DIAG_CODE_64BIT: - ip = (addr_t) *((u64 *) DASD_DIAG_LC_INT_PARM_64BIT); + ip = (addr_t) param64; break; default: return; @@ -281,7 +278,7 @@ dasd_ext_handler(__u16 code) cqr->stopclk = get_clock(); expires = 0; - if (status == 0) { + if ((ext_int_code & 0xff0000) == 0) { cqr->status = DASD_CQR_SUCCESS; /* Start first request on queue if possible -> fast_io. */ if (!list_empty(&device->ccw_queue)) { @@ -296,8 +293,8 @@ dasd_ext_handler(__u16 code) } else { cqr->status = DASD_CQR_QUEUED; DBF_DEV_EVENT(DBF_DEBUG, device, "interrupt status for " - "request %p was %d (%d retries left)", cqr, status, - cqr->retries); + "request %p was %d (%d retries left)", cqr, + (ext_int_code >> 16) & 0xff, cqr->retries); dasd_diag_erp(device); } diff --git a/drivers/s390/block/dasd_diag.h b/drivers/s390/block/dasd_diag.h index b8c78267ff3e..4f71fbe60c82 100644 --- a/drivers/s390/block/dasd_diag.h +++ b/drivers/s390/block/dasd_diag.h @@ -18,10 +18,6 @@ #define DEV_CLASS_FBA 0x01 #define DEV_CLASS_ECKD 0x04 -#define DASD_DIAG_LC_INT_CODE 132 -#define DASD_DIAG_LC_INT_STATUS 133 -#define DASD_DIAG_LC_INT_PARM_31BIT 128 -#define DASD_DIAG_LC_INT_PARM_64BIT 4536 #define DASD_DIAG_CODE_31BIT 0x03 #define DASD_DIAG_CODE_64BIT 0x07 diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index 5707a80b96b6..35cc4686b99b 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -395,16 +395,16 @@ __sclp_find_req(u32 sccb) /* Handler for external interruption. Perform request post-processing. * Prepare read event data request if necessary. Start processing of next * request on queue. */ -static void -sclp_interrupt_handler(__u16 code) +static void sclp_interrupt_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { struct sclp_req *req; u32 finished_sccb; u32 evbuf_pending; spin_lock(&sclp_lock); - finished_sccb = S390_lowcore.ext_params & 0xfffffff8; - evbuf_pending = S390_lowcore.ext_params & 0x3; + finished_sccb = param32 & 0xfffffff8; + evbuf_pending = param32 & 0x3; if (finished_sccb) { del_timer(&sclp_request_timer); sclp_running_state = sclp_running_state_reset_pending; @@ -819,12 +819,12 @@ EXPORT_SYMBOL(sclp_reactivate); /* Handler for external interruption used during initialization. Modify * request state to done. */ -static void -sclp_check_handler(__u16 code) +static void sclp_check_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { u32 finished_sccb; - finished_sccb = S390_lowcore.ext_params & 0xfffffff8; + finished_sccb = param32 & 0xfffffff8; /* Is this the interrupt we are waiting for? */ if (finished_sccb == 0) return; diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 5a46b8c5d68a..375aeeaf9ea5 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -372,21 +372,22 @@ static void hotplug_devices(struct work_struct *dummy) /* * we emulate the request_irq behaviour on top of s390 extints */ -static void kvm_extint_handler(u16 code) +static void kvm_extint_handler(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { struct virtqueue *vq; u16 subcode; u32 param; - subcode = S390_lowcore.cpu_addr; + subcode = ext_int_code >> 16; if ((subcode & 0xff00) != VIRTIO_SUBCODE_64) return; /* The LSB might be overloaded, we have to mask it */ - vq = (struct virtqueue *)(S390_lowcore.ext_params2 & ~1UL); + vq = (struct virtqueue *)(param64 & ~1UL); /* We use ext_params to decide what this interrupt means */ - param = S390_lowcore.ext_params & VIRTIO_PARAM_MASK; + param = param32 & VIRTIO_PARAM_MASK; switch (param) { case VIRTIO_PARAM_CONFIG_CHANGED: diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 499c045d6910..f7db676de77d 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1798,7 +1798,8 @@ static void iucv_work_fn(struct work_struct *work) * Handles external interrupts coming in from CP. * Places the interrupt buffer on a queue and schedules iucv_tasklet_fn(). */ -static void iucv_external_interrupt(u16 code) +static void iucv_external_interrupt(unsigned int ext_int_code, + unsigned int param32, unsigned long param64) { struct iucv_irq_data *p; struct iucv_irq_list *work; From 178514d7e3e8cfba087b3a208e22a54ce65e8f34 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:39 +0200 Subject: [PATCH 33/48] [S390] correct alignment of cpuid structure The store-cpu-id instruction has a minimum alignment of 8. Reflect that in the definition of struct cpuid. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index 471234b90574..e0b69540216f 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -20,7 +20,7 @@ struct cpuid unsigned int ident : 24; unsigned int machine : 16; unsigned int unused : 16; -} __packed; +} __attribute__ ((packed, aligned(8))); #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_CPU_H */ From baa071588c3ffcc1a8721faf9337140e85d34bf6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:40 +0200 Subject: [PATCH 34/48] [S390] cleanup system call parameter setup Do the setup of the stack overflow argument for the sixth system call parameter right before the branch to the system call function. That simplifies the system call parameter access code. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/syscall.h | 4 ---- arch/s390/kernel/entry.S | 6 +++--- arch/s390/kernel/entry64.S | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 8429686951f9..5c0246b955d8 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -65,8 +65,6 @@ static inline void syscall_get_arguments(struct task_struct *task, if (test_tsk_thread_flag(task, TIF_31BIT)) mask = 0xffffffff; #endif - if (i + n == 6) - args[--n] = regs->args[0] & mask; while (n-- > 0) if (i + n > 0) args[n] = regs->gprs[2 + i + n] & mask; @@ -80,8 +78,6 @@ static inline void syscall_set_arguments(struct task_struct *task, const unsigned long *args) { BUG_ON(i + n > 6); - if (i + n == 6) - regs->args[0] = args[--n]; while (n-- > 0) if (i + n > 0) regs->gprs[2 + i + n] = args[n]; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 736d7010629e..5efce7202984 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -254,12 +254,11 @@ sysc_do_svc: bnl BASED(sysc_nr_ok) lr %r7,%r1 # copy svc number to %r7 sysc_nr_ok: - mvc SP_ARGS(4,%r15),SP_R7(%r15) -sysc_do_restart: sth %r7,SP_SVCNR(%r15) sll %r7,2 # svc number *4 l %r8,BASED(.Lsysc_table) tm __TI_flags+2(%r9),_TIF_SYSCALL + mvc SP_ARGS(4,%r15),SP_R7(%r15) l %r8,0(%r7,%r8) # get system call addr. bnz BASED(sysc_tracesys) basr %r14,%r8 # call sys_xxxx @@ -347,7 +346,7 @@ sysc_restart: l %r7,SP_R2(%r15) # load new svc number mvc SP_R2(4,%r15),SP_ORIG_R2(%r15) # restore first argument lm %r2,%r6,SP_R2(%r15) # load svc arguments - b BASED(sysc_do_restart) # restart svc + b BASED(sysc_nr_ok) # restart svc # # _TIF_SINGLE_STEP is set, call do_single_step @@ -380,6 +379,7 @@ sysc_tracesys: l %r8,0(%r7,%r8) sysc_tracego: lm %r3,%r6,SP_R3(%r15) + mvc SP_ARGS(4,%r15),SP_R7(%r15) l %r2,SP_ORIG_R2(%r15) basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index e4038ea4dc57..a2be23922f43 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -246,7 +246,6 @@ sysc_saveall: CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_ILC(4,%r15),__LC_SVC_ILC - stg %r7,SP_ARGS(%r15) lg %r12,__LC_THREAD_INFO # load pointer to thread_info struct sysc_vtime: UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER @@ -274,6 +273,7 @@ sysc_nr_ok: sysc_noemu: #endif tm __TI_flags+6(%r12),_TIF_SYSCALL + mvc SP_ARGS(8,%r15),SP_R7(%r15) lgf %r8,0(%r7,%r10) # load address of system call routine jnz sysc_tracesys basr %r14,%r8 # call sys_xxxx @@ -387,6 +387,7 @@ sysc_tracesys: lgf %r8,0(%r7,%r10) sysc_tracego: lmg %r3,%r6,SP_R3(%r15) + mvc SP_ARGS(8,%r15),SP_R7(%r15) lg %r2,SP_ORIG_R2(%r15) basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value @@ -953,7 +954,6 @@ cleanup_system_call: CREATE_STACK_FRAME __LC_SAVE_AREA mvc SP_PSW(16,%r15),__LC_SVC_OLD_PSW mvc SP_ILC(4,%r15),__LC_SVC_ILC - stg %r7,SP_ARGS(%r15) mvc 8(8,%r12),__LC_THREAD_INFO cleanup_vtime: clc __LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24) From ba6cadfebc18f786ef4e60e9ff03f9656ce3d584 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:41 +0200 Subject: [PATCH 35/48] [S390] remove ieee_instruction_pointer from thread_struct The ieee_instruction_pointer can not be read from user space anymore since git commit 613e1def6b52c399a8b72a5e11bc2e57d2546fb8, the ptrace interface always returns zero. Remove it from the thread_struct. It is still present in the user_regs_struct for compatability reasons. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 2 -- arch/s390/include/asm/ptrace.h | 3 +-- arch/s390/kernel/compat_ptrace.h | 3 +-- arch/s390/kernel/traps.c | 1 - 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 73e259834e10..8d6f87169577 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -82,8 +82,6 @@ struct thread_struct { unsigned long prot_addr; /* address of protection-excep. */ unsigned int trap_no; per_struct per_info; - /* Used to give failing instruction back to user for ieee exceptions */ - unsigned long ieee_instruction_pointer; /* pfault_wait is used to block the process on a pfault event */ unsigned long pfault_wait; }; diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index e2c218dc68a6..d9d42b1e46fa 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -481,8 +481,7 @@ struct user_regs_struct * watchpoints. This is the way intel does it. */ per_struct per_info; - unsigned long ieee_instruction_pointer; - /* Used to give failing instruction back to user for ieee exceptions */ + unsigned long ieee_instruction_pointer; /* obsolete, always 0 */ }; #ifdef __KERNEL__ diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h index 123dd660d7fb..3141025724f4 100644 --- a/arch/s390/kernel/compat_ptrace.h +++ b/arch/s390/kernel/compat_ptrace.h @@ -51,8 +51,7 @@ struct user_regs_struct32 * watchpoints. This is the way intel does it. */ per_struct32 per_info; - u32 ieee_instruction_pointer; - /* Used to give failing instruction back to user for ieee exceptions */ + u32 ieee_instruction_pointer; /* obsolete, always 0 */ }; struct user32 { diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index e9b063e7d75f..70640822621a 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -447,7 +447,6 @@ static inline void do_fp_trap(struct pt_regs *regs, void __user *location, else if (fpc & 0x0800) /* inexact */ si.si_code = FPE_FLTRES; } - current->thread.ieee_instruction_pointer = (addr_t) location; do_trap(pgm_int_code, SIGFPE, "floating point exception", regs, &si); } From 0576fc703a03a43e73a57450d5720b47ec7a03ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:42 +0200 Subject: [PATCH 36/48] [S390] smp: use correct cpu address in print_cpu_info() Up to now print_cpu_info() uses the cpu address stored in it's local lowcore to print a message to the console. The cpu address in the lowcore is (in this case) however not the physical cpu address of the local cpu. It's the address of the cpu that issued the sigp restart which started the local cpu. Fix this by using the store cpu address instruction instead. It's not that anybody really cares since this is broken since more than ten years... Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index ecb2d02b02e4..644548e615c6 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -42,7 +42,7 @@ void __cpuinit print_cpu_info(void) struct cpuid *id = &per_cpu(cpu_id, smp_processor_id()); pr_info("Processor %d started, address %d, identification %06X\n", - S390_lowcore.cpu_nr, S390_lowcore.cpu_addr, id->ident); + S390_lowcore.cpu_nr, stap(), id->ident); } /* From c9af3fa9e1e3e5154649991a14b74f3a2dee19ee Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:43 +0200 Subject: [PATCH 37/48] [S390] topology: change default Switch default value of the kernel parameter 'topology' from off to on. Various performance measurements have finally shown that there are no (known) regressions anywhere. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- Documentation/kernel-parameters.txt | 2 +- arch/s390/kernel/topology.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 0b6815504e6d..b660085dcc69 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2438,7 +2438,7 @@ and is between 256 and 4096 characters. It is defined in the file topology informations if the hardware supports these. The scheduler will make use of these informations and e.g. base its process migration decisions on it. - Default is off. + Default is on. tp720= [HW,PS2] diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 13559c993847..eb0bc4752ae8 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -63,7 +63,7 @@ struct mask_info { cpumask_t mask; }; -static int topology_enabled; +static int topology_enabled = 1; static void topology_work_fn(struct work_struct *work); static struct tl_info *tl_info; static int machine_has_topology; @@ -311,9 +311,9 @@ static void set_topology_timer(void) static int __init early_parse_topology(char *p) { - if (strncmp(p, "on", 2)) + if (strncmp(p, "off", 3)) return 0; - topology_enabled = 1; + topology_enabled = 0; return 0; } early_param("topology", early_parse_topology); From 376ae4752e3a387d41a2ba9c9ea45c2df625e6e4 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Mon, 25 Oct 2010 16:10:44 +0200 Subject: [PATCH 38/48] [S390] cio: fix I/O cancel function Function ccw_device_cancel_halt_clear may cause an unexpected kernel panic if a clear function is currently active at the subchannel for which it is called. In that case, the iretry counter used to determine the number of retries is never initialized, leading to an immediate failure of the function which results in a kernel panic. Fix this by initializing the iretry counter when the function is first called. Also replace the kernel panic with a return code: a single malfunctioning I/O device should not automatically cause a system-wide kernel panic. Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device.c | 1 + drivers/s390/cio/device_fsm.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 881bdfd99140..2ff8a22d4257 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1205,6 +1205,7 @@ static void io_subchannel_quiesce(struct subchannel *sch) cdev->handler(cdev, cdev->private->intparm, ERR_PTR(-EIO)); while (ret == -EBUSY) { cdev->private->state = DEV_STATE_QUIESCE; + cdev->private->iretry = 255; ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, HZ/10); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 4395c01a9dac..a845695ac314 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -174,7 +174,10 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev) ret = cio_clear (sch); return (ret == 0) ? -EBUSY : ret; } - panic("Can't stop i/o on subchannel.\n"); + /* Function was unsuccessful */ + CIO_MSG_EVENT(0, "0.%x.%04x: could not stop I/O\n", + cdev->private->dev_id.ssid, cdev->private->dev_id.devno); + return -EIO; } void ccw_device_update_sense_data(struct ccw_device *cdev) @@ -766,13 +769,14 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event) int ret; ccw_device_set_timeout(cdev, 0); + cdev->private->iretry = 255; ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, 3*HZ); cdev->private->state = DEV_STATE_TIMEOUT_KILL; return; } - if (ret == -ENODEV) + if (ret) dev_fsm_event(cdev, DEV_EVENT_NOTOPER); else if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, @@ -869,6 +873,7 @@ void ccw_device_kill_io(struct ccw_device *cdev) { int ret; + cdev->private->iretry = 255; ret = ccw_device_cancel_halt_clear(cdev); if (ret == -EBUSY) { ccw_device_set_timeout(cdev, 3*HZ); From e05ef9bdb899e2f3798be74691842fc597d8ce60 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 25 Oct 2010 16:10:45 +0200 Subject: [PATCH 39/48] [S390] kvm: Fix badness at include/asm/mmu_context.h:83 commit 050eef364ad700590a605a0749f825cab4834b1e [S390] fix tlb flushing vs. concurrent /proc accesses broke KVM on s390x. On every schedule a Badness at include/asm/mmu_context.h:83 appears. s390_enable_sie replaces the mm on the __running__ task, therefore, we have to increase the attach count of the new mm. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 19338d228c9b..0c719c61972e 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -449,6 +449,8 @@ int s390_enable_sie(void) tsk->mm = tsk->active_mm = mm; preempt_disable(); update_mm(mm, tsk); + atomic_inc(&mm->context.attach_count); + atomic_dec(&old_mm->context.attach_count); cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); preempt_enable(); task_unlock(tsk); From 26cffecf84c8cb33787dd13a72bd2124d107d413 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 25 Oct 2010 16:10:46 +0200 Subject: [PATCH 40/48] [S390] dasd: fix use after free in dbf Writing to /proc/dasd/statistics while the debug level of the generic dasd debug entry is set to DBF_DEBUG will lead to an use after free when accessing the debug entry later. Since for the format string "%s" in the s390 dbf only a pointer to the string is stored in the debug feature and the buffer used here is freed afterwards. To fix this just remove the debug message. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_proc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 2eb025592809..c4a6a31bd9cd 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -251,7 +251,6 @@ static ssize_t dasd_stats_proc_write(struct file *file, buffer = dasd_get_user_string(user_buf, user_len); if (IS_ERR(buffer)) return PTR_ERR(buffer); - DBF_EVENT(DBF_DEBUG, "/proc/dasd/statictics: '%s'\n", buffer); /* check for valid verbs */ str = skip_spaces(buffer); From a5a0061fb3a22bbd9b108af8382142fd0f41ebee Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Mon, 25 Oct 2010 16:10:47 +0200 Subject: [PATCH 41/48] [S390] dasd: fix unsolicited interrupt recognition The dasd interrupt handler needs to distinguish solicited from unsolicited interrupts, as unsolicited interrupts may require special handling (e.g. summary unit checks) and solicited interrupts require proper error recovery for the failed I/O request. The interrupt handler needs to check several bit fields in the interrupt response block (irb) to make this distinction. So far our check of the status control bits has not been specific enough, which may lead to a failed request getting just retried instead of the necessary error recovery. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 11 +++++- drivers/s390/block/dasd_eckd.c | 70 +++++++++++++++------------------- 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index aa95f1001761..80e45de096a9 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1099,11 +1099,20 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, cqr = (struct dasd_ccw_req *) intparm; if (!cqr || ((scsw_cc(&irb->scsw) == 1) && (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC) && - (scsw_stctl(&irb->scsw) & SCSW_STCTL_STATUS_PEND))) { + ((scsw_stctl(&irb->scsw) == SCSW_STCTL_STATUS_PEND) || + (scsw_stctl(&irb->scsw) == (SCSW_STCTL_STATUS_PEND | + SCSW_STCTL_ALERT_STATUS))))) { if (cqr && cqr->status == DASD_CQR_IN_IO) cqr->status = DASD_CQR_QUEUED; + if (cqr) + memcpy(&cqr->irb, irb, sizeof(*irb)); device = dasd_device_from_cdev_locked(cdev); if (!IS_ERR(device)) { + device->discipline->dump_sense_dbf(device, irb, + "unsolicited"); + if ((device->features & DASD_FEATURE_ERPLOG)) + device->discipline->dump_sense(device, cqr, + irb); dasd_device_clear_timer(device); device->discipline->handle_unsolicited_interrupt(device, irb); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index ea0e565ebc9d..50cf96389d2c 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1776,13 +1776,13 @@ static void dasd_eckd_handle_unsolicited_interrupt(struct dasd_device *device, } /* summary unit check */ - if ((scsw_dstat(&irb->scsw) & DEV_STAT_UNIT_CHECK) && - (irb->ecw[7] == 0x0D)) { + sense = dasd_get_sense(irb); + if (sense && (sense[7] == 0x0D) && + (scsw_dstat(&irb->scsw) & DEV_STAT_UNIT_CHECK)) { dasd_alias_handle_summary_unit_check(device, irb); return; } - sense = dasd_get_sense(irb); /* service information message SIM */ if (sense && !(sense[27] & DASD_SENSE_BIT_0) && ((sense[6] & DASD_SIM_SENSE) == DASD_SIM_SENSE)) { @@ -1791,26 +1791,15 @@ static void dasd_eckd_handle_unsolicited_interrupt(struct dasd_device *device, return; } - if ((scsw_cc(&irb->scsw) == 1) && - (scsw_fctl(&irb->scsw) & SCSW_FCTL_START_FUNC) && - (scsw_actl(&irb->scsw) & SCSW_ACTL_START_PEND) && - (scsw_stctl(&irb->scsw) & SCSW_STCTL_STATUS_PEND)) { + if ((scsw_cc(&irb->scsw) == 1) && !sense && + (scsw_fctl(&irb->scsw) == SCSW_FCTL_START_FUNC) && + (scsw_actl(&irb->scsw) == SCSW_ACTL_START_PEND) && + (scsw_stctl(&irb->scsw) == SCSW_STCTL_STATUS_PEND)) { /* fake irb do nothing, they are handled elsewhere */ dasd_schedule_device_bh(device); return; } - if (!sense) { - /* just report other unsolicited interrupts */ - DBF_DEV_EVENT(DBF_ERR, device, "%s", - "unsolicited interrupt received"); - } else { - DBF_DEV_EVENT(DBF_ERR, device, "%s", - "unsolicited interrupt received " - "(sense available)"); - device->discipline->dump_sense_dbf(device, irb, "unsolicited"); - } - dasd_schedule_device_bh(device); return; }; @@ -3093,23 +3082,19 @@ dasd_eckd_dump_sense_dbf(struct dasd_device *device, struct irb *irb, char *reason) { u64 *sense; - u32 stat; + u64 *stat; sense = (u64 *) dasd_get_sense(irb); - stat = scsw_cstat(&irb->scsw); - stat <<= 8; - stat |= scsw_dstat(&irb->scsw); - stat <<= 8; - stat |= scsw_cc(&irb->scsw); - + stat = (u64 *) &irb->scsw; if (sense) { - DBF_DEV_EVENT(DBF_EMERG, device, - "%s: %s %06x %016llx %016llx %016llx %016llx", - reason, scsw_is_tm(&irb->scsw) ? "t" : "c", stat, + DBF_DEV_EVENT(DBF_EMERG, device, "%s: %016llx %08x : " + "%016llx %016llx %016llx %016llx", + reason, *stat, *((u32 *) (stat + 1)), sense[0], sense[1], sense[2], sense[3]); } else { - DBF_DEV_EVENT(DBF_EMERG, device, "%s", - "SORRY - NO VALID SENSE AVAILABLE\n"); + DBF_DEV_EVENT(DBF_EMERG, device, "%s: %016llx %08x : %s", + reason, *stat, *((u32 *) (stat + 1)), + "NO VALID SENSE"); } } @@ -3135,9 +3120,12 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device, " I/O status report for device %s:\n", dev_name(&device->cdev->dev)); len += sprintf(page + len, KERN_ERR PRINTK_HEADER - " in req: %p CS: 0x%02X DS: 0x%02X CC: 0x%02X RC: %d\n", - req, scsw_cstat(&irb->scsw), scsw_dstat(&irb->scsw), - scsw_cc(&irb->scsw), req ? req->intrc : 0); + " in req: %p CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X " + "CS:%02X RC:%d\n", + req, scsw_cc(&irb->scsw), scsw_fctl(&irb->scsw), + scsw_actl(&irb->scsw), scsw_stctl(&irb->scsw), + scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw), + req ? req->intrc : 0); len += sprintf(page + len, KERN_ERR PRINTK_HEADER " device %s: Failing CCW: %p\n", dev_name(&device->cdev->dev), @@ -3238,11 +3226,13 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, " I/O status report for device %s:\n", dev_name(&device->cdev->dev)); len += sprintf(page + len, KERN_ERR PRINTK_HEADER - " in req: %p CS: 0x%02X DS: 0x%02X CC: 0x%02X RC: %d " - "fcxs: 0x%02X schxs: 0x%02X\n", req, - scsw_cstat(&irb->scsw), scsw_dstat(&irb->scsw), - scsw_cc(&irb->scsw), req->intrc, - irb->scsw.tm.fcxs, irb->scsw.tm.schxs); + " in req: %p CC:%02X FC:%02X AC:%02X SC:%02X DS:%02X " + "CS:%02X fcxs:%02X schxs:%02X RC:%d\n", + req, scsw_cc(&irb->scsw), scsw_fctl(&irb->scsw), + scsw_actl(&irb->scsw), scsw_stctl(&irb->scsw), + scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw), + irb->scsw.tm.fcxs, irb->scsw.tm.schxs, + req ? req->intrc : 0); len += sprintf(page + len, KERN_ERR PRINTK_HEADER " device %s: Failing TCW: %p\n", dev_name(&device->cdev->dev), @@ -3250,7 +3240,7 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, tsb = NULL; sense = NULL; - if (irb->scsw.tm.tcw && (irb->scsw.tm.fcxs == 0x01)) + if (irb->scsw.tm.tcw && (irb->scsw.tm.fcxs & 0x01)) tsb = tcw_get_tsb( (struct tcw *)(unsigned long)irb->scsw.tm.tcw); @@ -3348,7 +3338,7 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device, static void dasd_eckd_dump_sense(struct dasd_device *device, struct dasd_ccw_req *req, struct irb *irb) { - if (req && scsw_is_tm(&req->irb.scsw)) + if (scsw_is_tm(&irb->scsw)) dasd_eckd_dump_sense_tcw(device, req, irb); else dasd_eckd_dump_sense_ccw(device, req, irb); From 6d00d00bf23b86b352e2d075cfe751acd1908278 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 25 Oct 2010 16:10:48 +0200 Subject: [PATCH 42/48] [S390] kvm: Enable z196 instruction facilities Enable PFPO, floating point extension, distinct-operands, fast-BCR-serialization, high-word, interlocked-access, load/store- on-condition, and population-count facilities for guests. (bits 37, 44 and 45). Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4fe68650535c..76482b65ba3e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -741,7 +741,7 @@ static int __init kvm_s390_init(void) return -ENOMEM; } stfle(facilities, 1); - facilities[0] &= 0xff00fff3f0700000ULL; + facilities[0] &= 0xff00fff3f47c0000ULL; return 0; } From 56b86b615b807e043339979878a2de88f900ee4f Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Mon, 25 Oct 2010 16:10:49 +0200 Subject: [PATCH 43/48] [S390] dasd: ignore unsolicited interrupts for DIAG For the DASD DIAG discipline IO is started through special diagnose calls. Unsolicited interrupts may contain information about the device itself. But this information is not needed because the device is not used directly. Fix the case that an unimplemented dicipline function may be called by ignoring unsolicited interrupts for the DIAG disciplin. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 80e45de096a9..fb613d70c2cb 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1107,17 +1107,22 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, if (cqr) memcpy(&cqr->irb, irb, sizeof(*irb)); device = dasd_device_from_cdev_locked(cdev); - if (!IS_ERR(device)) { - device->discipline->dump_sense_dbf(device, irb, - "unsolicited"); - if ((device->features & DASD_FEATURE_ERPLOG)) - device->discipline->dump_sense(device, cqr, - irb); - dasd_device_clear_timer(device); - device->discipline->handle_unsolicited_interrupt(device, - irb); + if (IS_ERR(device)) + return; + /* ignore unsolicited interrupts for DIAG discipline */ + if (device->discipline == dasd_diag_discipline_pointer) { dasd_put_device(device); + return; } + device->discipline->dump_sense_dbf(device, irb, + "unsolicited"); + if ((device->features & DASD_FEATURE_ERPLOG)) + device->discipline->dump_sense(device, cqr, + irb); + dasd_device_clear_timer(device); + device->discipline->handle_unsolicited_interrupt(device, + irb); + dasd_put_device(device); return; } From eca577ef5989d25dedc6b0fae3c4622ceaee8005 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 25 Oct 2010 16:10:50 +0200 Subject: [PATCH 44/48] [S390] enable ARCH_DMA_ADDR_T_64BIT with 64BIT Signed-off-by: FUJITA Tomonori Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 21a7030abbb4..068e55d1bba8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -60,6 +60,9 @@ config NO_IOMEM config NO_DMA def_bool y +config ARCH_DMA_ADDR_T_64BIT + def_bool 64BIT + config GENERIC_LOCKBREAK bool default y From 14375bc4eb8dd0fb0e765390650564c35bb31068 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 25 Oct 2010 16:10:51 +0200 Subject: [PATCH 45/48] [S390] cleanup facility list handling Store the facility list once at system startup with stfl/stfle and reuse the result for all facility tests. Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/crypt_s390.h | 2 +- arch/s390/include/asm/lowcore.h | 11 ++++++--- arch/s390/include/asm/system.h | 33 ++++++++++----------------- arch/s390/kernel/early.c | 40 ++++++++++++++++++++++++--------- arch/s390/kernel/setup.c | 19 ++++++---------- arch/s390/kernel/smp.c | 2 ++ arch/s390/kernel/topology.c | 5 +---- arch/s390/kernel/vdso.c | 6 +---- arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/kvm/priv.c | 4 ++-- arch/s390/mm/fault.c | 7 +----- drivers/s390/crypto/ap_bus.c | 9 +------- 12 files changed, 66 insertions(+), 74 deletions(-) diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h index 0ef9829f2ad6..7ee9a1b4ad9f 100644 --- a/arch/s390/crypto/crypt_s390.h +++ b/arch/s390/crypto/crypt_s390.h @@ -297,7 +297,7 @@ static inline int crypt_s390_func_available(int func) int ret; /* check if CPACF facility (bit 17) is available */ - if (!(stfl() & 1ULL << (31 - 17))) + if (!test_facility(17)) return 0; switch (func & CRYPT_S390_OP_MASK) { diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 0f97ef2d92ac..65e172f8209d 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -150,9 +150,10 @@ struct _lowcore { */ __u32 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e04 */ + __u8 pad_0x0e08[0x0f00-0x0e08]; /* 0x0e08 */ - /* Align to the top 1k of prefix area */ - __u8 pad_0x0e08[0x1000-0x0e08]; /* 0x0e08 */ + /* Extended facility list */ + __u64 stfle_fac_list[32]; /* 0x0f00 */ } __packed; #else /* CONFIG_32BIT */ @@ -285,7 +286,11 @@ struct _lowcore { */ __u64 ipib; /* 0x0e00 */ __u32 ipib_checksum; /* 0x0e08 */ - __u8 pad_0x0e0c[0x11b8-0x0e0c]; /* 0x0e0c */ + __u8 pad_0x0e0c[0x0f00-0x0e0c]; /* 0x0e0c */ + + /* Extended facility list */ + __u64 stfle_fac_list[32]; /* 0x0f00 */ + __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */ /* 64 bit extparam used for pfault/diag 250: defined by architecture */ __u64 ext_params2; /* 0x11B8 */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 3c079dd5ee79..3ad16dbf622e 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -420,30 +420,21 @@ extern void smp_ctl_clear_bit(int cr, int bit); #endif /* CONFIG_SMP */ -static inline unsigned int stfl(void) -{ - asm volatile( - " .insn s,0xb2b10000,0(0)\n" /* stfl */ - "0:\n" - EX_TABLE(0b,0b)); - return S390_lowcore.stfl_fac_list; -} +#define MAX_FACILITY_BIT (256*8) /* stfle_fac_list has 256 bytes */ -static inline int __stfle(unsigned long long *list, int doublewords) +/* + * The test_facility function uses the bit odering where the MSB is bit 0. + * That makes it easier to query facility bits with the bit number as + * documented in the Principles of Operation. + */ +static inline int test_facility(unsigned long nr) { - typedef struct { unsigned long long _[doublewords]; } addrtype; - register unsigned long __nr asm("0") = doublewords - 1; + unsigned char *ptr; - asm volatile(".insn s,0xb2b00000,%0" /* stfle */ - : "=m" (*(addrtype *) list), "+d" (__nr) : : "cc"); - return __nr + 1; -} - -static inline int stfle(unsigned long long *list, int doublewords) -{ - if (!(stfl() & (1UL << 24))) - return -EOPNOTSUPP; - return __stfle(list, doublewords); + if (nr >= MAX_FACILITY_BIT) + return 0; + ptr = (unsigned char *) &S390_lowcore.stfle_fac_list + (nr >> 3); + return (*ptr & (0x80 >> (nr & 7))) != 0; } static inline unsigned short stap(void) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 0badc6344eb4..d2455d44d99a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -256,13 +256,35 @@ static noinline __init void setup_lowcore_early(void) s390_base_pgm_handler_fn = early_pgm_check_handler; } +static noinline __init void setup_facility_list(void) +{ + unsigned long nr; + + S390_lowcore.stfl_fac_list = 0; + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b,0b) : "=m" (S390_lowcore.stfl_fac_list)); + memcpy(&S390_lowcore.stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); + nr = 4; /* # bytes stored by stfl */ + if (test_facility(7)) { + /* More facility bits available with stfle */ + register unsigned long reg0 asm("0") = MAX_FACILITY_BIT/64 - 1; + asm volatile(".insn s,0xb2b00000,%0" /* stfle */ + : "=m" (S390_lowcore.stfle_fac_list), "+d" (reg0) + : : "cc"); + nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + } + memset((char *) S390_lowcore.stfle_fac_list + nr, 0, + MAX_FACILITY_BIT/8 - nr); +} + static noinline __init void setup_hpage(void) { #ifndef CONFIG_DEBUG_PAGEALLOC unsigned int facilities; - facilities = stfl(); - if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29))) + if (!test_facility(2) || !test_facility(8)) return; S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; __ctl_set_bit(0, 23); @@ -356,18 +378,13 @@ static __init void detect_diag44(void) static __init void detect_machine_facilities(void) { #ifdef CONFIG_64BIT - unsigned int facilities; - unsigned long long facility_bits; - - facilities = stfl(); - if (facilities & (1 << 28)) + if (test_facility(3)) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; - if (facilities & (1 << 23)) + if (test_facility(8)) S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; - if (facilities & (1 << 4)) + if (test_facility(27)) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; - if ((stfle(&facility_bits, 1) > 0) && - (facility_bits & (1ULL << (63 - 40)))) + if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_SPP; #endif } @@ -448,6 +465,7 @@ void __init startup_init(void) lockdep_off(); sort_main_extable(); setup_lowcore_early(); + setup_facility_list(); detect_machine_type(); ipl_update_parameters(); setup_boot_command_line(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9071e984dcf1..e3ceb911dc75 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -409,6 +409,9 @@ setup_lowcore(void) lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; lc->machine_flags = S390_lowcore.machine_flags; + lc->stfl_fac_list = S390_lowcore.stfl_fac_list; + memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) { lc->extended_save_area_addr = (__u32) @@ -675,12 +678,9 @@ setup_memory(void) static void __init setup_hwcaps(void) { static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 }; - unsigned long long facility_list_extended; - unsigned int facility_list; struct cpuid cpu_id; int i; - facility_list = stfl(); /* * The store facility list bits numbers as found in the principles * of operation are numbered with bit 1UL<<31 as number 0 to @@ -700,11 +700,10 @@ static void __init setup_hwcaps(void) * HWCAP_S390_ETF3EH bit 8 (22 && 30). */ for (i = 0; i < 6; i++) - if (facility_list & (1UL << (31 - stfl_bits[i]))) + if (test_facility(stfl_bits[i])) elf_hwcap |= 1UL << i; - if ((facility_list & (1UL << (31 - 22))) - && (facility_list & (1UL << (31 - 30)))) + if (test_facility(22) && test_facility(30)) elf_hwcap |= HWCAP_S390_ETF3EH; /* @@ -720,12 +719,8 @@ static void __init setup_hwcaps(void) * translated to: * HWCAP_S390_DFP bit 6 (42 && 44). */ - if ((elf_hwcap & (1UL << 2)) && - __stfle(&facility_list_extended, 1) > 0) { - if ((facility_list_extended & (1ULL << (63 - 42))) - && (facility_list_extended & (1ULL << (63 - 44)))) - elf_hwcap |= HWCAP_S390_DFP; - } + if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44)) + elf_hwcap |= HWCAP_S390_DFP; /* * Huge page support HWCAP_S390_HPAGE is bit 7. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 354589d096b1..94cf510b8fe1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -594,6 +594,8 @@ int __cpuinit __cpu_up(unsigned int cpu) cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce; cpu_lowcore->machine_flags = S390_lowcore.machine_flags; cpu_lowcore->ftrace_func = S390_lowcore.ftrace_func; + memcpy(cpu_lowcore->stfle_fac_list, S390_lowcore.stfle_fac_list, + MAX_FACILITY_BIT/8); eieio(); while (sigp(cpu, sigp_restart) == sigp_busy) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index eb0bc4752ae8..91fb66baa50b 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -351,13 +351,10 @@ static void alloc_masks(struct tl_info *info, struct mask_info *mask, int offset void __init s390_init_cpu_topology(void) { - unsigned long long facility_bits; struct tl_info *info; int i; - if (stfle(&facility_bits, 1) <= 0) - return; - if (!(facility_bits & (1ULL << 52)) || !(facility_bits & (1ULL << 61))) + if (!test_facility(2) || !test_facility(11)) return; machine_has_topology = 1; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 6b83870507d5..e3150dd2fe74 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -84,11 +84,7 @@ struct vdso_data *vdso_data = &vdso_data_store.data; */ static void vdso_init_data(struct vdso_data *vd) { - unsigned int facility_list; - - facility_list = stfl(); - vd->ectg_available = - user_mode != HOME_SPACE_MODE && (facility_list & 1); + vd->ectg_available = user_mode != HOME_SPACE_MODE && test_facility(31); } #ifdef CONFIG_64BIT diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 76482b65ba3e..985d825494f1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -740,7 +740,7 @@ static int __init kvm_s390_init(void) kvm_exit(); return -ENOMEM; } - stfle(facilities, 1); + memcpy(facilities, S390_lowcore.stfle_fac_list, 16); facilities[0] &= 0xff00fff3f47c0000ULL; return 0; } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 44205507717c..9194a4b52b22 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -154,12 +154,12 @@ static int handle_chsc(struct kvm_vcpu *vcpu) static int handle_stfl(struct kvm_vcpu *vcpu) { - unsigned int facility_list = stfl(); + unsigned int facility_list; int rc; vcpu->stat.instruction_stfl++; /* only pass the facility bits, which we can handle */ - facility_list &= 0xff00fff3; + facility_list = S390_lowcore.stfl_fac_list & 0xff00fff3; rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), &facility_list, sizeof(facility_list)); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index b4aad0c1f562..fe5701e9efbf 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -56,12 +56,7 @@ static unsigned long store_indication; void fault_init(void) { - unsigned long long facility_list[2]; - - if (stfle(facility_list, 2) < 2) - return; - if ((facility_list[0] & (1ULL << 61)) && - (facility_list[1] & (1ULL << 52))) + if (test_facility(2) && test_facility(75)) store_indication = 0xc00; } diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 91c6028d7b74..8fd8c62455e9 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -154,14 +154,7 @@ static inline int ap_instructions_available(void) */ static int ap_interrupts_available(void) { - unsigned long long facility_bits[2]; - - if (stfle(facility_bits, 2) <= 1) - return 0; - if (!(facility_bits[0] & (1ULL << 61)) || - !(facility_bits[1] & (1ULL << 62))) - return 0; - return 1; + return test_facility(1) && test_facility(2); } /** From 9186d7a9cfd75e51ac4db4a40e0a558371988bd2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:52 +0200 Subject: [PATCH 46/48] [S390] topology: clean up facility detection Move cpu topology facility detection to early setup code where it should be. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 3 +++ arch/s390/kernel/early.c | 2 ++ arch/s390/kernel/topology.c | 14 +++++--------- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 25e831d58e1e..d5e2ef10537d 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -73,6 +73,7 @@ extern unsigned int user_mode; #define MACHINE_FLAG_PFMF (1UL << 11) #define MACHINE_FLAG_LPAR (1UL << 12) #define MACHINE_FLAG_SPP (1UL << 13) +#define MACHINE_FLAG_TOPOLOGY (1UL << 14) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -90,6 +91,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_HPAGE (0) #define MACHINE_HAS_PFMF (0) #define MACHINE_HAS_SPP (0) +#define MACHINE_HAS_TOPOLOGY (0) #else /* __s390x__ */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -100,6 +102,7 @@ extern unsigned int user_mode; #define MACHINE_HAS_HPAGE (S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE) #define MACHINE_HAS_PFMF (S390_lowcore.machine_flags & MACHINE_FLAG_PFMF) #define MACHINE_HAS_SPP (S390_lowcore.machine_flags & MACHINE_FLAG_SPP) +#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY) #endif /* __s390x__ */ #define ZFCPDUMP_HSA_SIZE (32UL<<20) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index d2455d44d99a..d149609e46e6 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -382,6 +382,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; if (test_facility(8)) S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; + if (test_facility(11)) + S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY; if (test_facility(27)) S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 91fb66baa50b..69004411a93d 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -66,7 +66,6 @@ struct mask_info { static int topology_enabled = 1; static void topology_work_fn(struct work_struct *work); static struct tl_info *tl_info; -static int machine_has_topology; static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); @@ -88,7 +87,7 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) cpumask_t mask; cpus_clear(mask); - if (!topology_enabled || !machine_has_topology) + if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) return cpu_possible_map; while (info) { if (cpu_isset(cpu, info->mask)) { @@ -186,7 +185,6 @@ static void tl_to_cores(struct tl_info *info) break; default: clear_masks(); - machine_has_topology = 0; goto out; } tle = next_tle(tle); @@ -223,7 +221,7 @@ int topology_set_cpu_management(int fc) int cpu; int rc; - if (!machine_has_topology) + if (!MACHINE_HAS_TOPOLOGY) return -EOPNOTSUPP; if (fc) rc = ptf(PTF_VERTICAL); @@ -269,7 +267,7 @@ int arch_update_cpu_topology(void) struct sys_device *sysdev; int cpu; - if (!machine_has_topology) { + if (!MACHINE_HAS_TOPOLOGY) { update_cpu_core_map(); topology_update_polarization_simple(); return 0; @@ -323,7 +321,7 @@ static int __init init_topology_update(void) int rc; rc = 0; - if (!machine_has_topology) { + if (!MACHINE_HAS_TOPOLOGY) { topology_update_polarization_simple(); goto out; } @@ -354,10 +352,8 @@ void __init s390_init_cpu_topology(void) struct tl_info *info; int i; - if (!test_facility(2) || !test_facility(11)) + if (!MACHINE_HAS_TOPOLOGY) return; - machine_has_topology = 1; - tl_info = alloc_bootmem_pages(PAGE_SIZE); info = tl_info; store_topology(info); From c30f91b6a264aef9ffb05e13931514c2a988c495 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:53 +0200 Subject: [PATCH 47/48] [S390] topology: move topology sysinfo code Move the topology sysinfo SYSIB definitions to the proper place in asm/sysinfo.h where they should be. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/sysinfo.h | 35 +++++++++++++++ arch/s390/kernel/topology.c | 75 +++++++++------------------------ 2 files changed, 56 insertions(+), 54 deletions(-) diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index c2ece4dd6c0a..79d3d6e2e9c5 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -14,6 +14,8 @@ #ifndef __ASM_S390_SYSINFO_H #define __ASM_S390_SYSINFO_H +#include + struct sysinfo_1_1_1 { unsigned short :16; unsigned char ccr; @@ -107,6 +109,39 @@ struct sysinfo_3_2_2 { char reserved_544[3552]; }; +#define TOPOLOGY_CPU_BITS 64 +#define TOPOLOGY_NR_MAG 6 + +struct topology_cpu { + unsigned char reserved0[4]; + unsigned char :6; + unsigned char pp:2; + unsigned char reserved1; + unsigned short origin; + unsigned long mask[TOPOLOGY_CPU_BITS / BITS_PER_LONG]; +}; + +struct topology_container { + unsigned char reserved[7]; + unsigned char id; +}; + +union topology_entry { + unsigned char nl; + struct topology_cpu cpu; + struct topology_container container; +}; + +struct sysinfo_15_1_x { + unsigned char reserved0[2]; + unsigned short length; + unsigned char mag[TOPOLOGY_NR_MAG]; + unsigned char reserved1; + unsigned char mnest; + unsigned char reserved2[4]; + union topology_entry tle[0]; +}; + static inline int stsi(void *sysinfo, int fc, int sel1, int sel2) { register int r0 asm("0") = (fc << 28) | sel1; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 69004411a93d..ffea01e63218 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -20,43 +20,10 @@ #include #include -#define CPU_BITS 64 -#define NR_MAG 6 - #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) #define PTF_CHECK (2UL) -struct tl_cpu { - unsigned char reserved0[4]; - unsigned char :6; - unsigned char pp:2; - unsigned char reserved1; - unsigned short origin; - unsigned long mask[CPU_BITS / BITS_PER_LONG]; -}; - -struct tl_container { - unsigned char reserved[7]; - unsigned char id; -}; - -union tl_entry { - unsigned char nl; - struct tl_cpu cpu; - struct tl_container container; -}; - -struct tl_info { - unsigned char reserved0[2]; - unsigned short length; - unsigned char mag[NR_MAG]; - unsigned char reserved1; - unsigned char mnest; - unsigned char reserved2[4]; - union tl_entry tle[0]; -}; - struct mask_info { struct mask_info *next; unsigned char id; @@ -65,7 +32,7 @@ struct mask_info { static int topology_enabled = 1; static void topology_work_fn(struct work_struct *work); -static struct tl_info *tl_info; +static struct sysinfo_15_1_x *tl_info; static struct timer_list topology_timer; static void set_topology_timer(void); static DECLARE_WORK(topology_work, topology_work_fn); @@ -101,18 +68,18 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) return mask; } -static void add_cpus_to_mask(struct tl_cpu *tl_cpu, struct mask_info *book, - struct mask_info *core) +static void add_cpus_to_mask(struct topology_cpu *tl_cpu, + struct mask_info *book, struct mask_info *core) { unsigned int cpu; - for (cpu = find_first_bit(&tl_cpu->mask[0], CPU_BITS); - cpu < CPU_BITS; - cpu = find_next_bit(&tl_cpu->mask[0], CPU_BITS, cpu + 1)) + for (cpu = find_first_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS); + cpu < TOPOLOGY_CPU_BITS; + cpu = find_next_bit(&tl_cpu->mask[0], TOPOLOGY_CPU_BITS, cpu + 1)) { unsigned int rcpu, lcpu; - rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin; + rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; for_each_present_cpu(lcpu) { if (cpu_logical_map(lcpu) != rcpu) continue; @@ -145,15 +112,14 @@ static void clear_masks(void) #endif } -static union tl_entry *next_tle(union tl_entry *tle) +static union topology_entry *next_tle(union topology_entry *tle) { - if (tle->nl) - return (union tl_entry *)((struct tl_container *)tle + 1); - else - return (union tl_entry *)((struct tl_cpu *)tle + 1); + if (!tle->nl) + return (union topology_entry *)((struct topology_cpu *)tle + 1); + return (union topology_entry *)((struct topology_container *)tle + 1); } -static void tl_to_cores(struct tl_info *info) +static void tl_to_cores(struct sysinfo_15_1_x *info) { #ifdef CONFIG_SCHED_BOOK struct mask_info *book = &book_info; @@ -161,13 +127,13 @@ static void tl_to_cores(struct tl_info *info) struct mask_info *book = NULL; #endif struct mask_info *core = &core_info; - union tl_entry *tle, *end; + union topology_entry *tle, *end; spin_lock_irq(&topology_lock); clear_masks(); tle = info->tle; - end = (union tl_entry *)((unsigned long)info + info->length); + end = (union topology_entry *)((unsigned long)info + info->length); while (tle < end) { switch (tle->nl) { #ifdef CONFIG_SCHED_BOOK @@ -263,7 +229,7 @@ static void store_topology(struct tl_info *info) int arch_update_cpu_topology(void) { - struct tl_info *info = tl_info; + struct sysinfo_15_1_x *info = tl_info; struct sys_device *sysdev; int cpu; @@ -333,13 +299,14 @@ static int __init init_topology_update(void) } __initcall(init_topology_update); -static void alloc_masks(struct tl_info *info, struct mask_info *mask, int offset) +static void alloc_masks(struct sysinfo_15_1_x *info, struct mask_info *mask, + int offset) { int i, nr_masks; - nr_masks = info->mag[NR_MAG - offset]; + nr_masks = info->mag[TOPOLOGY_NR_MAG - offset]; for (i = 0; i < info->mnest - offset; i++) - nr_masks *= info->mag[NR_MAG - offset - 1 - i]; + nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i]; nr_masks = max(nr_masks, 1); for (i = 0; i < nr_masks; i++) { mask->next = alloc_bootmem(sizeof(struct mask_info)); @@ -349,7 +316,7 @@ static void alloc_masks(struct tl_info *info, struct mask_info *mask, int offset void __init s390_init_cpu_topology(void) { - struct tl_info *info; + struct sysinfo_15_1_x *info; int i; if (!MACHINE_HAS_TOPOLOGY) @@ -358,7 +325,7 @@ void __init s390_init_cpu_topology(void) info = tl_info; store_topology(info); pr_info("The CPU configuration topology of the machine is:"); - for (i = 0; i < NR_MAG; i++) + for (i = 0; i < TOPOLOGY_NR_MAG; i++) printk(" %d", info->mag[i]); printk(" / %d\n", info->mnest); alloc_masks(info, &core_info, 2); From 96f4a70d8eb4d746b19d5b5510407c8ff0d00340 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 25 Oct 2010 16:10:54 +0200 Subject: [PATCH 48/48] [S390] topology: export cpu topology via proc/sysinfo Export the cpu configuration topology via sysinfo. Two new lines are introduced: CPU Topology HW: 0 0 0 4 6 4 CPU Topology SW: 0 0 0 0 4 24 The HW line describes the cpu topology nesting levels when the maximum nesting level is used to get the corresponding SYSIB. The SW line describes what Linux is actually using. In this case it supports only two levels (CONFIG_SCHED_BOOK off) and therefore the hardware folded the two lower levels in the SYSIB response block. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/topology.h | 2 ++ arch/s390/kernel/sysinfo.c | 34 +++++++++++++++++++++++++++++++- arch/s390/kernel/topology.c | 3 +-- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 051107a2c5e2..c5338834ddbd 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -2,6 +2,7 @@ #define _ASM_S390_TOPOLOGY_H #include +#include extern unsigned char cpu_core_id[NR_CPUS]; extern cpumask_t cpu_core_map[NR_CPUS]; @@ -32,6 +33,7 @@ static inline const struct cpumask *cpu_book_mask(unsigned int cpu) int topology_set_cpu_management(int fc); void topology_schedule_update(void); +void store_topology(struct sysinfo_15_1_x *info); #define POLARIZATION_UNKNWN (-1) #define POLARIZATION_HRZ (0) diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index a91274b4eb8f..f04d93aa48ec 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -15,6 +15,7 @@ #include #include #include +#include /* Sigh, math-emu. Don't ask. */ #include @@ -84,6 +85,35 @@ static int stsi_1_1_1(struct sysinfo_1_1_1 *info, char *page, int len) return len; } +static int stsi_15_1_x(struct sysinfo_15_1_x *info, char *page, int len) +{ + static int max_mnest; + int i, rc; + + len += sprintf(page + len, "\n"); + if (!MACHINE_HAS_TOPOLOGY) + return len; + if (max_mnest) { + stsi(info, 15, 1, max_mnest); + } else { + for (max_mnest = 6; max_mnest > 1; max_mnest--) { + rc = stsi(info, 15, 1, max_mnest); + if (rc != -ENOSYS) + break; + } + } + len += sprintf(page + len, "CPU Topology HW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + len += sprintf(page + len, " %d", info->mag[i]); + len += sprintf(page + len, "\n"); + store_topology(info); + len += sprintf(page + len, "CPU Topology SW: "); + for (i = 0; i < TOPOLOGY_NR_MAG; i++) + len += sprintf(page + len, " %d", info->mag[i]); + len += sprintf(page + len, "\n"); + return len; +} + static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len) { struct sysinfo_1_2_2_extension *ext; @@ -94,7 +124,6 @@ static int stsi_1_2_2(struct sysinfo_1_2_2 *info, char *page, int len) ext = (struct sysinfo_1_2_2_extension *) ((unsigned long) info + info->acc_offset); - len += sprintf(page + len, "\n"); len += sprintf(page + len, "CPUs Total: %d\n", info->cpus_total); len += sprintf(page + len, "CPUs Configured: %d\n", @@ -223,6 +252,9 @@ static int proc_read_sysinfo(char *page, char **start, if (level >= 1) len = stsi_1_1_1((struct sysinfo_1_1_1 *) info, page, len); + if (level >= 1) + len = stsi_15_1_x((struct sysinfo_15_1_x *) info, page, len); + if (level >= 1) len = stsi_1_2_2((struct sysinfo_1_2_2 *) info, page, len); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index ffea01e63218..a9dee9048ee5 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -18,7 +18,6 @@ #include #include #include -#include #define PTF_HORIZONTAL (0UL) #define PTF_VERTICAL (1UL) @@ -215,7 +214,7 @@ static void update_cpu_core_map(void) spin_unlock_irqrestore(&topology_lock, flags); } -static void store_topology(struct tl_info *info) +void store_topology(struct sysinfo_15_1_x *info) { #ifdef CONFIG_SCHED_BOOK int rc;