s390: implement memset16, memset32 & memset64

Provide fast versions of the new memset variants. E.g. the generic
memset64 is ten times slower than the optimized version if used on a
whole page.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
This commit is contained in:
Heiko Carstens 2017-10-04 19:27:05 +02:00 committed by Martin Schwidefsky
parent 3bdf5679c9
commit 0b77d6701c
2 changed files with 66 additions and 0 deletions

View File

@ -17,6 +17,9 @@
#define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */
#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */
#define __HAVE_ARCH_MEMSET16 /* arch function */
#define __HAVE_ARCH_MEMSET32 /* arch function */
#define __HAVE_ARCH_MEMSET64 /* arch function */
#define __HAVE_ARCH_STRCAT /* inline & arch function */
#define __HAVE_ARCH_STRCMP /* arch function */
#define __HAVE_ARCH_STRCPY /* inline & arch function */
@ -49,6 +52,25 @@ extern char *strstr(const char *, const char *);
#undef __HAVE_ARCH_STRSEP
#undef __HAVE_ARCH_STRSPN
void *__memset16(uint16_t *s, uint16_t v, size_t count);
void *__memset32(uint32_t *s, uint32_t v, size_t count);
void *__memset64(uint64_t *s, uint64_t v, size_t count);
static inline void *memset16(uint16_t *s, uint16_t v, size_t count)
{
return __memset16(s, v, count * sizeof(v));
}
static inline void *memset32(uint32_t *s, uint32_t v, size_t count)
{
return __memset32(s, v, count * sizeof(v));
}
static inline void *memset64(uint64_t *s, uint64_t v, size_t count)
{
return __memset64(s, v, count * sizeof(v));
}
#if !defined(IN_ARCH_STRING_C) && (!defined(CONFIG_FORTIFY_SOURCE) || defined(__NO_FORTIFY))
static inline void *memchr(const void * s, int c, size_t n)

View File

@ -126,3 +126,47 @@ ENTRY(memcpy)
.Lmemcpy_mvc:
mvc 0(1,%r1),0(%r3)
EXPORT_SYMBOL(memcpy)
/*
* __memset16/32/64
*
* void *__memset16(uint16_t *s, uint16_t v, size_t count)
* void *__memset32(uint32_t *s, uint32_t v, size_t count)
* void *__memset64(uint64_t *s, uint64_t v, size_t count)
*/
.macro __MEMSET bits,bytes,insn
ENTRY(__memset\bits)
ltgr %r4,%r4
bzr %r14
cghi %r4,\bytes
je .L__memset_exit\bits
aghi %r4,-(\bytes+1)
srlg %r5,%r4,8
ltgr %r5,%r5
lgr %r1,%r2
jz .L__memset_remainder\bits
.L__memset_loop\bits:
\insn %r3,0(%r1)
mvc \bytes(256-\bytes,%r1),0(%r1)
la %r1,256(%r1)
brctg %r5,.L__memset_loop\bits
.L__memset_remainder\bits:
\insn %r3,0(%r1)
larl %r5,.L__memset_mvc\bits
ex %r4,0(%r5)
br %r14
.L__memset_exit\bits:
\insn %r3,0(%r2)
br %r14
.L__memset_mvc\bits:
mvc \bytes(1,%r1),0(%r1)
.endm
__MEMSET 16,2,sth
EXPORT_SYMBOL(__memset16)
__MEMSET 32,4,st
EXPORT_SYMBOL(__memset32)
__MEMSET 64,8,stg
EXPORT_SYMBOL(__memset64)