csky: optimize memcpy_{from,to}io() and memset_io()

Optimize memcpy_{from,to}io() and memset_io() by transferring in
64 bit as much as possible with minimized barrier usage.  This
simplest optimization brings faster throughput compare to current
byte-by-byte read and write with barrier in the loop. Code's
skeleton is taken from the powerpc & arm64.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
This commit is contained in:
Guo Ren 2022-04-06 21:32:22 +08:00
parent e4df2d5e85
commit 8318f7c231
3 changed files with 103 additions and 1 deletions

View File

@ -31,6 +31,17 @@
#define writel(v,c) ({ wmb(); writel_relaxed((v),(c)); mb(); })
#endif
/*
* String version of I/O memory access operations.
*/
extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
extern void __memset_io(volatile void __iomem *, int, size_t);
#define memset_io(c,v,l) __memset_io((c),(v),(l))
#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l))
#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l))
/*
* I/O memory mapping functions.
*/

View File

@ -2,7 +2,7 @@
extra-y := head.o vmlinux.lds
obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
obj-y += power.o syscall.o syscall_table.o setup.o
obj-y += power.o syscall.o syscall_table.o setup.o io.o
obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
obj-y += probes/

91
arch/csky/kernel/io.c Normal file
View File

@ -0,0 +1,91 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/export.h>
#include <linux/types.h>
#include <linux/io.h>
/*
* Copy data from IO memory space to "real" memory space.
*/
void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
{
while (count && !IS_ALIGNED((unsigned long)from, 4)) {
*(u8 *)to = __raw_readb(from);
from++;
to++;
count--;
}
while (count >= 4) {
*(u32 *)to = __raw_readl(from);
from += 4;
to += 4;
count -= 4;
}
while (count) {
*(u8 *)to = __raw_readb(from);
from++;
to++;
count--;
}
}
EXPORT_SYMBOL(__memcpy_fromio);
/*
* Copy data from "real" memory space to IO memory space.
*/
void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
{
while (count && !IS_ALIGNED((unsigned long)to, 4)) {
__raw_writeb(*(u8 *)from, to);
from++;
to++;
count--;
}
while (count >= 4) {
__raw_writel(*(u32 *)from, to);
from += 4;
to += 4;
count -= 4;
}
while (count) {
__raw_writeb(*(u8 *)from, to);
from++;
to++;
count--;
}
}
EXPORT_SYMBOL(__memcpy_toio);
/*
* "memset" on IO memory space.
*/
void __memset_io(volatile void __iomem *dst, int c, size_t count)
{
u32 qc = (u8)c;
qc |= qc << 8;
qc |= qc << 16;
while (count && !IS_ALIGNED((unsigned long)dst, 4)) {
__raw_writeb(c, dst);
dst++;
count--;
}
while (count >= 4) {
__raw_writel(qc, dst);
dst += 4;
count -= 4;
}
while (count) {
__raw_writeb(c, dst);
dst++;
count--;
}
}
EXPORT_SYMBOL(__memset_io);