[MIPS] Eleminate local symbols from the symbol table.

These symbols appear in oprofile output, stacktraces and similar but only
make the output harder to read.  Many identical symbol names such as
"both_aligned" were also being used in multiple source files making it
impossible to see which file actually was meant.  So let's get rid of them.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
This commit is contained in:
Ralf Baechle 2008-01-29 10:14:59 +00:00
parent f21d850808
commit c5ec1983e4
7 changed files with 282 additions and 281 deletions

View file

@ -96,13 +96,13 @@ LEAF(csum_partial)
move t7, zero move t7, zero
sltiu t8, a1, 0x8 sltiu t8, a1, 0x8
bnez t8, small_csumcpy /* < 8 bytes to copy */ bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */
move t2, a1 move t2, a1
andi t7, src, 0x1 /* odd buffer? */ andi t7, src, 0x1 /* odd buffer? */
hword_align: .Lhword_align:
beqz t7, word_align beqz t7, .Lword_align
andi t8, src, 0x2 andi t8, src, 0x2
lbu t0, (src) lbu t0, (src)
@ -114,8 +114,8 @@ hword_align:
PTR_ADDU src, src, 0x1 PTR_ADDU src, src, 0x1
andi t8, src, 0x2 andi t8, src, 0x2
word_align: .Lword_align:
beqz t8, dword_align beqz t8, .Ldword_align
sltiu t8, a1, 56 sltiu t8, a1, 56
lhu t0, (src) lhu t0, (src)
@ -124,12 +124,12 @@ word_align:
sltiu t8, a1, 56 sltiu t8, a1, 56
PTR_ADDU src, src, 0x2 PTR_ADDU src, src, 0x2
dword_align: .Ldword_align:
bnez t8, do_end_words bnez t8, .Ldo_end_words
move t8, a1 move t8, a1
andi t8, src, 0x4 andi t8, src, 0x4
beqz t8, qword_align beqz t8, .Lqword_align
andi t8, src, 0x8 andi t8, src, 0x8
lw t0, 0x00(src) lw t0, 0x00(src)
@ -138,8 +138,8 @@ dword_align:
PTR_ADDU src, src, 0x4 PTR_ADDU src, src, 0x4
andi t8, src, 0x8 andi t8, src, 0x8
qword_align: .Lqword_align:
beqz t8, oword_align beqz t8, .Loword_align
andi t8, src, 0x10 andi t8, src, 0x10
#ifdef USE_DOUBLE #ifdef USE_DOUBLE
@ -156,8 +156,8 @@ qword_align:
PTR_ADDU src, src, 0x8 PTR_ADDU src, src, 0x8
andi t8, src, 0x10 andi t8, src, 0x10
oword_align: .Loword_align:
beqz t8, begin_movement beqz t8, .Lbegin_movement
LONG_SRL t8, a1, 0x7 LONG_SRL t8, a1, 0x7
#ifdef USE_DOUBLE #ifdef USE_DOUBLE
@ -172,11 +172,11 @@ oword_align:
PTR_ADDU src, src, 0x10 PTR_ADDU src, src, 0x10
LONG_SRL t8, a1, 0x7 LONG_SRL t8, a1, 0x7
begin_movement: .Lbegin_movement:
beqz t8, 1f beqz t8, 1f
andi t2, a1, 0x40 andi t2, a1, 0x40
move_128bytes: .Lmove_128bytes:
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
@ -184,43 +184,43 @@ move_128bytes:
LONG_SUBU t8, t8, 0x01 LONG_SUBU t8, t8, 0x01
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
PTR_ADDU src, src, 0x80 PTR_ADDU src, src, 0x80
bnez t8, move_128bytes bnez t8, .Lmove_128bytes
.set noreorder .set noreorder
1: 1:
beqz t2, 1f beqz t2, 1f
andi t2, a1, 0x20 andi t2, a1, 0x20
move_64bytes: .Lmove_64bytes:
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
PTR_ADDU src, src, 0x40 PTR_ADDU src, src, 0x40
1: 1:
beqz t2, do_end_words beqz t2, .Ldo_end_words
andi t8, a1, 0x1c andi t8, a1, 0x1c
move_32bytes: .Lmove_32bytes:
CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
andi t8, a1, 0x1c andi t8, a1, 0x1c
PTR_ADDU src, src, 0x20 PTR_ADDU src, src, 0x20
do_end_words: .Ldo_end_words:
beqz t8, small_csumcpy beqz t8, .Lsmall_csumcpy
andi t2, a1, 0x3 andi t2, a1, 0x3
LONG_SRL t8, t8, 0x2 LONG_SRL t8, t8, 0x2
end_words: .Lend_words:
lw t0, (src) lw t0, (src)
LONG_SUBU t8, t8, 0x1 LONG_SUBU t8, t8, 0x1
ADDC(sum, t0) ADDC(sum, t0)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
PTR_ADDU src, src, 0x4 PTR_ADDU src, src, 0x4
bnez t8, end_words bnez t8, .Lend_words
.set noreorder .set noreorder
/* unknown src alignment and < 8 bytes to go */ /* unknown src alignment and < 8 bytes to go */
small_csumcpy: .Lsmall_csumcpy:
move a1, t2 move a1, t2
andi t0, a1, 4 andi t0, a1, 4
@ -413,48 +413,48 @@ FEXPORT(csum_partial_copy_nocheck)
*/ */
sltu t2, len, NBYTES sltu t2, len, NBYTES
and t1, dst, ADDRMASK and t1, dst, ADDRMASK
bnez t2, copy_bytes_checklen bnez t2, .Lcopy_bytes_checklen
and t0, src, ADDRMASK and t0, src, ADDRMASK
andi odd, dst, 0x1 /* odd buffer? */ andi odd, dst, 0x1 /* odd buffer? */
bnez t1, dst_unaligned bnez t1, .Ldst_unaligned
nop nop
bnez t0, src_unaligned_dst_aligned bnez t0, .Lsrc_unaligned_dst_aligned
/* /*
* use delay slot for fall-through * use delay slot for fall-through
* src and dst are aligned; need to compute rem * src and dst are aligned; need to compute rem
*/ */
both_aligned: .Lboth_aligned:
SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
beqz t0, cleanup_both_aligned # len < 8*NBYTES beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
nop nop
SUB len, 8*NBYTES # subtract here for bgez loop SUB len, 8*NBYTES # subtract here for bgez loop
.align 4 .align 4
1: 1:
EXC( LOAD t0, UNIT(0)(src), l_exc) EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
EXC( LOAD t1, UNIT(1)(src), l_exc_copy) EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
EXC( LOAD t2, UNIT(2)(src), l_exc_copy) EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
EXC( LOAD t3, UNIT(3)(src), l_exc_copy) EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
EXC( LOAD t4, UNIT(4)(src), l_exc_copy) EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy)
EXC( LOAD t5, UNIT(5)(src), l_exc_copy) EXC( LOAD t5, UNIT(5)(src), .Ll_exc_copy)
EXC( LOAD t6, UNIT(6)(src), l_exc_copy) EXC( LOAD t6, UNIT(6)(src), .Ll_exc_copy)
EXC( LOAD t7, UNIT(7)(src), l_exc_copy) EXC( LOAD t7, UNIT(7)(src), .Ll_exc_copy)
SUB len, len, 8*NBYTES SUB len, len, 8*NBYTES
ADD src, src, 8*NBYTES ADD src, src, 8*NBYTES
EXC( STORE t0, UNIT(0)(dst), s_exc) EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
ADDC(sum, t0) ADDC(sum, t0)
EXC( STORE t1, UNIT(1)(dst), s_exc) EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
ADDC(sum, t1) ADDC(sum, t1)
EXC( STORE t2, UNIT(2)(dst), s_exc) EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
ADDC(sum, t2) ADDC(sum, t2)
EXC( STORE t3, UNIT(3)(dst), s_exc) EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
ADDC(sum, t3) ADDC(sum, t3)
EXC( STORE t4, UNIT(4)(dst), s_exc) EXC( STORE t4, UNIT(4)(dst), .Ls_exc)
ADDC(sum, t4) ADDC(sum, t4)
EXC( STORE t5, UNIT(5)(dst), s_exc) EXC( STORE t5, UNIT(5)(dst), .Ls_exc)
ADDC(sum, t5) ADDC(sum, t5)
EXC( STORE t6, UNIT(6)(dst), s_exc) EXC( STORE t6, UNIT(6)(dst), .Ls_exc)
ADDC(sum, t6) ADDC(sum, t6)
EXC( STORE t7, UNIT(7)(dst), s_exc) EXC( STORE t7, UNIT(7)(dst), .Ls_exc)
ADDC(sum, t7) ADDC(sum, t7)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, 8*NBYTES ADD dst, dst, 8*NBYTES
@ -465,44 +465,44 @@ EXC( STORE t7, UNIT(7)(dst), s_exc)
/* /*
* len == the number of bytes left to copy < 8*NBYTES * len == the number of bytes left to copy < 8*NBYTES
*/ */
cleanup_both_aligned: .Lcleanup_both_aligned:
#define rem t7 #define rem t7
beqz len, done beqz len, .Ldone
sltu t0, len, 4*NBYTES sltu t0, len, 4*NBYTES
bnez t0, less_than_4units bnez t0, .Lless_than_4units
and rem, len, (NBYTES-1) # rem = len % NBYTES and rem, len, (NBYTES-1) # rem = len % NBYTES
/* /*
* len >= 4*NBYTES * len >= 4*NBYTES
*/ */
EXC( LOAD t0, UNIT(0)(src), l_exc) EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
EXC( LOAD t1, UNIT(1)(src), l_exc_copy) EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
EXC( LOAD t2, UNIT(2)(src), l_exc_copy) EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
EXC( LOAD t3, UNIT(3)(src), l_exc_copy) EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
SUB len, len, 4*NBYTES SUB len, len, 4*NBYTES
ADD src, src, 4*NBYTES ADD src, src, 4*NBYTES
EXC( STORE t0, UNIT(0)(dst), s_exc) EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
ADDC(sum, t0) ADDC(sum, t0)
EXC( STORE t1, UNIT(1)(dst), s_exc) EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
ADDC(sum, t1) ADDC(sum, t1)
EXC( STORE t2, UNIT(2)(dst), s_exc) EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
ADDC(sum, t2) ADDC(sum, t2)
EXC( STORE t3, UNIT(3)(dst), s_exc) EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
ADDC(sum, t3) ADDC(sum, t3)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES ADD dst, dst, 4*NBYTES
beqz len, done beqz len, .Ldone
.set noreorder .set noreorder
less_than_4units: .Lless_than_4units:
/* /*
* rem = len % NBYTES * rem = len % NBYTES
*/ */
beq rem, len, copy_bytes beq rem, len, .Lcopy_bytes
nop nop
1: 1:
EXC( LOAD t0, 0(src), l_exc) EXC( LOAD t0, 0(src), .Ll_exc)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc) EXC( STORE t0, 0(dst), .Ls_exc)
ADDC(sum, t0) ADDC(sum, t0)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES ADD dst, dst, NBYTES
@ -521,20 +521,20 @@ EXC( STORE t0, 0(dst), s_exc)
* more instruction-level parallelism. * more instruction-level parallelism.
*/ */
#define bits t2 #define bits t2
beqz len, done beqz len, .Ldone
ADD t1, dst, len # t1 is just past last byte of dst ADD t1, dst, len # t1 is just past last byte of dst
li bits, 8*NBYTES li bits, 8*NBYTES
SLL rem, len, 3 # rem = number of bits to keep SLL rem, len, 3 # rem = number of bits to keep
EXC( LOAD t0, 0(src), l_exc) EXC( LOAD t0, 0(src), .Ll_exc)
SUB bits, bits, rem # bits = number of bits to discard SUB bits, bits, rem # bits = number of bits to discard
SHIFT_DISCARD t0, t0, bits SHIFT_DISCARD t0, t0, bits
EXC( STREST t0, -1(t1), s_exc) EXC( STREST t0, -1(t1), .Ls_exc)
SHIFT_DISCARD_REVERT t0, t0, bits SHIFT_DISCARD_REVERT t0, t0, bits
.set reorder .set reorder
ADDC(sum, t0) ADDC(sum, t0)
b done b .Ldone
.set noreorder .set noreorder
dst_unaligned: .Ldst_unaligned:
/* /*
* dst is unaligned * dst is unaligned
* t0 = src & ADDRMASK * t0 = src & ADDRMASK
@ -545,25 +545,25 @@ dst_unaligned:
* Set match = (src and dst have same alignment) * Set match = (src and dst have same alignment)
*/ */
#define match rem #define match rem
EXC( LDFIRST t3, FIRST(0)(src), l_exc) EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc)
ADD t2, zero, NBYTES ADD t2, zero, NBYTES
EXC( LDREST t3, REST(0)(src), l_exc_copy) EXC( LDREST t3, REST(0)(src), .Ll_exc_copy)
SUB t2, t2, t1 # t2 = number of bytes copied SUB t2, t2, t1 # t2 = number of bytes copied
xor match, t0, t1 xor match, t0, t1
EXC( STFIRST t3, FIRST(0)(dst), s_exc) EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc)
SLL t4, t1, 3 # t4 = number of bits to discard SLL t4, t1, 3 # t4 = number of bits to discard
SHIFT_DISCARD t3, t3, t4 SHIFT_DISCARD t3, t3, t4
/* no SHIFT_DISCARD_REVERT to handle odd buffer properly */ /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
ADDC(sum, t3) ADDC(sum, t3)
beq len, t2, done beq len, t2, .Ldone
SUB len, len, t2 SUB len, len, t2
ADD dst, dst, t2 ADD dst, dst, t2
beqz match, both_aligned beqz match, .Lboth_aligned
ADD src, src, t2 ADD src, src, t2
src_unaligned_dst_aligned: .Lsrc_unaligned_dst_aligned:
SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
beqz t0, cleanup_src_unaligned beqz t0, .Lcleanup_src_unaligned
and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
1: 1:
/* /*
@ -572,53 +572,53 @@ src_unaligned_dst_aligned:
* It's OK to load FIRST(N+1) before REST(N) because the two addresses * It's OK to load FIRST(N+1) before REST(N) because the two addresses
* are to the same unit (unless src is aligned, but it's not). * are to the same unit (unless src is aligned, but it's not).
*/ */
EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy)
SUB len, len, 4*NBYTES SUB len, len, 4*NBYTES
EXC( LDREST t0, REST(0)(src), l_exc_copy) EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
EXC( LDREST t1, REST(1)(src), l_exc_copy) EXC( LDREST t1, REST(1)(src), .Ll_exc_copy)
EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy)
EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy)
EXC( LDREST t2, REST(2)(src), l_exc_copy) EXC( LDREST t2, REST(2)(src), .Ll_exc_copy)
EXC( LDREST t3, REST(3)(src), l_exc_copy) EXC( LDREST t3, REST(3)(src), .Ll_exc_copy)
ADD src, src, 4*NBYTES ADD src, src, 4*NBYTES
#ifdef CONFIG_CPU_SB1 #ifdef CONFIG_CPU_SB1
nop # improves slotting nop # improves slotting
#endif #endif
EXC( STORE t0, UNIT(0)(dst), s_exc) EXC( STORE t0, UNIT(0)(dst), .Ls_exc)
ADDC(sum, t0) ADDC(sum, t0)
EXC( STORE t1, UNIT(1)(dst), s_exc) EXC( STORE t1, UNIT(1)(dst), .Ls_exc)
ADDC(sum, t1) ADDC(sum, t1)
EXC( STORE t2, UNIT(2)(dst), s_exc) EXC( STORE t2, UNIT(2)(dst), .Ls_exc)
ADDC(sum, t2) ADDC(sum, t2)
EXC( STORE t3, UNIT(3)(dst), s_exc) EXC( STORE t3, UNIT(3)(dst), .Ls_exc)
ADDC(sum, t3) ADDC(sum, t3)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES ADD dst, dst, 4*NBYTES
bne len, rem, 1b bne len, rem, 1b
.set noreorder .set noreorder
cleanup_src_unaligned: .Lcleanup_src_unaligned:
beqz len, done beqz len, .Ldone
and rem, len, NBYTES-1 # rem = len % NBYTES and rem, len, NBYTES-1 # rem = len % NBYTES
beq rem, len, copy_bytes beq rem, len, .Lcopy_bytes
nop nop
1: 1:
EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
EXC( LDREST t0, REST(0)(src), l_exc_copy) EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc) EXC( STORE t0, 0(dst), .Ls_exc)
ADDC(sum, t0) ADDC(sum, t0)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES ADD dst, dst, NBYTES
bne len, rem, 1b bne len, rem, 1b
.set noreorder .set noreorder
copy_bytes_checklen: .Lcopy_bytes_checklen:
beqz len, done beqz len, .Ldone
nop nop
copy_bytes: .Lcopy_bytes:
/* 0 < len < NBYTES */ /* 0 < len < NBYTES */
#ifdef CONFIG_CPU_LITTLE_ENDIAN #ifdef CONFIG_CPU_LITTLE_ENDIAN
#define SHIFT_START 0 #define SHIFT_START 0
@ -629,14 +629,14 @@ copy_bytes:
#endif #endif
move t2, zero # partial word move t2, zero # partial word
li t3, SHIFT_START # shift li t3, SHIFT_START # shift
/* use l_exc_copy here to return correct sum on fault */ /* use .Ll_exc_copy here to return correct sum on fault */
#define COPY_BYTE(N) \ #define COPY_BYTE(N) \
EXC( lbu t0, N(src), l_exc_copy); \ EXC( lbu t0, N(src), .Ll_exc_copy); \
SUB len, len, 1; \ SUB len, len, 1; \
EXC( sb t0, N(dst), s_exc); \ EXC( sb t0, N(dst), .Ls_exc); \
SLLV t0, t0, t3; \ SLLV t0, t0, t3; \
addu t3, SHIFT_INC; \ addu t3, SHIFT_INC; \
beqz len, copy_bytes_done; \ beqz len, .Lcopy_bytes_done; \
or t2, t0 or t2, t0
COPY_BYTE(0) COPY_BYTE(0)
@ -647,14 +647,14 @@ EXC( sb t0, N(dst), s_exc); \
COPY_BYTE(4) COPY_BYTE(4)
COPY_BYTE(5) COPY_BYTE(5)
#endif #endif
EXC( lbu t0, NBYTES-2(src), l_exc_copy) EXC( lbu t0, NBYTES-2(src), .Ll_exc_copy)
SUB len, len, 1 SUB len, len, 1
EXC( sb t0, NBYTES-2(dst), s_exc) EXC( sb t0, NBYTES-2(dst), .Ls_exc)
SLLV t0, t0, t3 SLLV t0, t0, t3
or t2, t0 or t2, t0
copy_bytes_done: .Lcopy_bytes_done:
ADDC(sum, t2) ADDC(sum, t2)
done: .Ldone:
/* fold checksum */ /* fold checksum */
.set push .set push
.set noat .set noat
@ -685,7 +685,7 @@ done:
jr ra jr ra
.set noreorder .set noreorder
l_exc_copy: .Ll_exc_copy:
/* /*
* Copy bytes from src until faulting load address (or until a * Copy bytes from src until faulting load address (or until a
* lb faults) * lb faults)
@ -700,7 +700,7 @@ l_exc_copy:
li t2, SHIFT_START li t2, SHIFT_START
LOAD t0, THREAD_BUADDR(t0) LOAD t0, THREAD_BUADDR(t0)
1: 1:
EXC( lbu t1, 0(src), l_exc) EXC( lbu t1, 0(src), .Ll_exc)
ADD src, src, 1 ADD src, src, 1
sb t1, 0(dst) # can't fault -- we're copy_from_user sb t1, 0(dst) # can't fault -- we're copy_from_user
SLLV t1, t1, t2 SLLV t1, t1, t2
@ -710,7 +710,7 @@ EXC( lbu t1, 0(src), l_exc)
ADD dst, dst, 1 ADD dst, dst, 1
bne src, t0, 1b bne src, t0, 1b
.set noreorder .set noreorder
l_exc: .Ll_exc:
LOAD t0, TI_TASK($28) LOAD t0, TI_TASK($28)
nop nop
LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address
@ -729,7 +729,7 @@ l_exc:
*/ */
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
SUB src, len, 1 SUB src, len, 1
beqz len, done beqz len, .Ldone
.set noreorder .set noreorder
1: sb zero, 0(dst) 1: sb zero, 0(dst)
ADD dst, dst, 1 ADD dst, dst, 1
@ -744,10 +744,10 @@ l_exc:
SUB src, src, v1 SUB src, src, v1
#endif #endif
li v1, -EFAULT li v1, -EFAULT
b done b .Ldone
sw v1, (errptr) sw v1, (errptr)
s_exc: .Ls_exc:
li v0, -1 /* invalid checksum */ li v0, -1 /* invalid checksum */
li v1, -EFAULT li v1, -EFAULT
jr ra jr ra

View file

@ -209,36 +209,36 @@ LEAF(__copy_user_inatomic)
and t1, dst, ADDRMASK and t1, dst, ADDRMASK
PREF( 0, 1*32(src) ) PREF( 0, 1*32(src) )
PREF( 1, 1*32(dst) ) PREF( 1, 1*32(dst) )
bnez t2, copy_bytes_checklen bnez t2, .Lcopy_bytes_checklen
and t0, src, ADDRMASK and t0, src, ADDRMASK
PREF( 0, 2*32(src) ) PREF( 0, 2*32(src) )
PREF( 1, 2*32(dst) ) PREF( 1, 2*32(dst) )
bnez t1, dst_unaligned bnez t1, .Ldst_unaligned
nop nop
bnez t0, src_unaligned_dst_aligned bnez t0, .Lsrc_unaligned_dst_aligned
/* /*
* use delay slot for fall-through * use delay slot for fall-through
* src and dst are aligned; need to compute rem * src and dst are aligned; need to compute rem
*/ */
both_aligned: .Lboth_aligned:
SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
beqz t0, cleanup_both_aligned # len < 8*NBYTES beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES)
PREF( 0, 3*32(src) ) PREF( 0, 3*32(src) )
PREF( 1, 3*32(dst) ) PREF( 1, 3*32(dst) )
.align 4 .align 4
1: 1:
EXC( LOAD t0, UNIT(0)(src), l_exc) EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
EXC( LOAD t1, UNIT(1)(src), l_exc_copy) EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
EXC( LOAD t2, UNIT(2)(src), l_exc_copy) EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
EXC( LOAD t3, UNIT(3)(src), l_exc_copy) EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
SUB len, len, 8*NBYTES SUB len, len, 8*NBYTES
EXC( LOAD t4, UNIT(4)(src), l_exc_copy) EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy)
EXC( LOAD t7, UNIT(5)(src), l_exc_copy) EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy)
STORE t0, UNIT(0)(dst) STORE t0, UNIT(0)(dst)
STORE t1, UNIT(1)(dst) STORE t1, UNIT(1)(dst)
EXC( LOAD t0, UNIT(6)(src), l_exc_copy) EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy)
EXC( LOAD t1, UNIT(7)(src), l_exc_copy) EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy)
ADD src, src, 8*NBYTES ADD src, src, 8*NBYTES
ADD dst, dst, 8*NBYTES ADD dst, dst, 8*NBYTES
STORE t2, UNIT(-6)(dst) STORE t2, UNIT(-6)(dst)
@ -255,18 +255,18 @@ EXC( LOAD t1, UNIT(7)(src), l_exc_copy)
/* /*
* len == rem == the number of bytes left to copy < 8*NBYTES * len == rem == the number of bytes left to copy < 8*NBYTES
*/ */
cleanup_both_aligned: .Lcleanup_both_aligned:
beqz len, done beqz len, .Ldone
sltu t0, len, 4*NBYTES sltu t0, len, 4*NBYTES
bnez t0, less_than_4units bnez t0, .Lless_than_4units
and rem, len, (NBYTES-1) # rem = len % NBYTES and rem, len, (NBYTES-1) # rem = len % NBYTES
/* /*
* len >= 4*NBYTES * len >= 4*NBYTES
*/ */
EXC( LOAD t0, UNIT(0)(src), l_exc) EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
EXC( LOAD t1, UNIT(1)(src), l_exc_copy) EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
EXC( LOAD t2, UNIT(2)(src), l_exc_copy) EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
EXC( LOAD t3, UNIT(3)(src), l_exc_copy) EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
SUB len, len, 4*NBYTES SUB len, len, 4*NBYTES
ADD src, src, 4*NBYTES ADD src, src, 4*NBYTES
STORE t0, UNIT(0)(dst) STORE t0, UNIT(0)(dst)
@ -275,16 +275,16 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
STORE t3, UNIT(3)(dst) STORE t3, UNIT(3)(dst)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES ADD dst, dst, 4*NBYTES
beqz len, done beqz len, .Ldone
.set noreorder .set noreorder
less_than_4units: .Lless_than_4units:
/* /*
* rem = len % NBYTES * rem = len % NBYTES
*/ */
beq rem, len, copy_bytes beq rem, len, .Lcopy_bytes
nop nop
1: 1:
EXC( LOAD t0, 0(src), l_exc) EXC( LOAD t0, 0(src), .Ll_exc)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
STORE t0, 0(dst) STORE t0, 0(dst)
@ -305,17 +305,17 @@ EXC( LOAD t0, 0(src), l_exc)
* more instruction-level parallelism. * more instruction-level parallelism.
*/ */
#define bits t2 #define bits t2
beqz len, done beqz len, .Ldone
ADD t1, dst, len # t1 is just past last byte of dst ADD t1, dst, len # t1 is just past last byte of dst
li bits, 8*NBYTES li bits, 8*NBYTES
SLL rem, len, 3 # rem = number of bits to keep SLL rem, len, 3 # rem = number of bits to keep
EXC( LOAD t0, 0(src), l_exc) EXC( LOAD t0, 0(src), .Ll_exc)
SUB bits, bits, rem # bits = number of bits to discard SUB bits, bits, rem # bits = number of bits to discard
SHIFT_DISCARD t0, t0, bits SHIFT_DISCARD t0, t0, bits
STREST t0, -1(t1) STREST t0, -1(t1)
jr ra jr ra
move len, zero move len, zero
dst_unaligned: .Ldst_unaligned:
/* /*
* dst is unaligned * dst is unaligned
* t0 = src & ADDRMASK * t0 = src & ADDRMASK
@ -326,22 +326,22 @@ dst_unaligned:
* Set match = (src and dst have same alignment) * Set match = (src and dst have same alignment)
*/ */
#define match rem #define match rem
EXC( LDFIRST t3, FIRST(0)(src), l_exc) EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc)
ADD t2, zero, NBYTES ADD t2, zero, NBYTES
EXC( LDREST t3, REST(0)(src), l_exc_copy) EXC( LDREST t3, REST(0)(src), .Ll_exc_copy)
SUB t2, t2, t1 # t2 = number of bytes copied SUB t2, t2, t1 # t2 = number of bytes copied
xor match, t0, t1 xor match, t0, t1
STFIRST t3, FIRST(0)(dst) STFIRST t3, FIRST(0)(dst)
beq len, t2, done beq len, t2, .Ldone
SUB len, len, t2 SUB len, len, t2
ADD dst, dst, t2 ADD dst, dst, t2
beqz match, both_aligned beqz match, .Lboth_aligned
ADD src, src, t2 ADD src, src, t2
src_unaligned_dst_aligned: .Lsrc_unaligned_dst_aligned:
SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
PREF( 0, 3*32(src) ) PREF( 0, 3*32(src) )
beqz t0, cleanup_src_unaligned beqz t0, .Lcleanup_src_unaligned
and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
PREF( 1, 3*32(dst) ) PREF( 1, 3*32(dst) )
1: 1:
@ -351,15 +351,15 @@ src_unaligned_dst_aligned:
* It's OK to load FIRST(N+1) before REST(N) because the two addresses * It's OK to load FIRST(N+1) before REST(N) because the two addresses
* are to the same unit (unless src is aligned, but it's not). * are to the same unit (unless src is aligned, but it's not).
*/ */
EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy)
SUB len, len, 4*NBYTES SUB len, len, 4*NBYTES
EXC( LDREST t0, REST(0)(src), l_exc_copy) EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
EXC( LDREST t1, REST(1)(src), l_exc_copy) EXC( LDREST t1, REST(1)(src), .Ll_exc_copy)
EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy)
EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy)
EXC( LDREST t2, REST(2)(src), l_exc_copy) EXC( LDREST t2, REST(2)(src), .Ll_exc_copy)
EXC( LDREST t3, REST(3)(src), l_exc_copy) EXC( LDREST t3, REST(3)(src), .Ll_exc_copy)
PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed)
ADD src, src, 4*NBYTES ADD src, src, 4*NBYTES
#ifdef CONFIG_CPU_SB1 #ifdef CONFIG_CPU_SB1
@ -375,14 +375,14 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy)
bne len, rem, 1b bne len, rem, 1b
.set noreorder .set noreorder
cleanup_src_unaligned: .Lcleanup_src_unaligned:
beqz len, done beqz len, .Ldone
and rem, len, NBYTES-1 # rem = len % NBYTES and rem, len, NBYTES-1 # rem = len % NBYTES
beq rem, len, copy_bytes beq rem, len, .Lcopy_bytes
nop nop
1: 1:
EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
EXC( LDREST t0, REST(0)(src), l_exc_copy) EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
STORE t0, 0(dst) STORE t0, 0(dst)
@ -391,15 +391,15 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
bne len, rem, 1b bne len, rem, 1b
.set noreorder .set noreorder
copy_bytes_checklen: .Lcopy_bytes_checklen:
beqz len, done beqz len, .Ldone
nop nop
copy_bytes: .Lcopy_bytes:
/* 0 < len < NBYTES */ /* 0 < len < NBYTES */
#define COPY_BYTE(N) \ #define COPY_BYTE(N) \
EXC( lb t0, N(src), l_exc); \ EXC( lb t0, N(src), .Ll_exc); \
SUB len, len, 1; \ SUB len, len, 1; \
beqz len, done; \ beqz len, .Ldone; \
sb t0, N(dst) sb t0, N(dst)
COPY_BYTE(0) COPY_BYTE(0)
@ -410,16 +410,16 @@ EXC( lb t0, N(src), l_exc); \
COPY_BYTE(4) COPY_BYTE(4)
COPY_BYTE(5) COPY_BYTE(5)
#endif #endif
EXC( lb t0, NBYTES-2(src), l_exc) EXC( lb t0, NBYTES-2(src), .Ll_exc)
SUB len, len, 1 SUB len, len, 1
jr ra jr ra
sb t0, NBYTES-2(dst) sb t0, NBYTES-2(dst)
done: .Ldone:
jr ra jr ra
nop nop
END(__copy_user_inatomic) END(__copy_user_inatomic)
l_exc_copy: .Ll_exc_copy:
/* /*
* Copy bytes from src until faulting load address (or until a * Copy bytes from src until faulting load address (or until a
* lb faults) * lb faults)
@ -434,14 +434,14 @@ l_exc_copy:
nop nop
LOAD t0, THREAD_BUADDR(t0) LOAD t0, THREAD_BUADDR(t0)
1: 1:
EXC( lb t1, 0(src), l_exc) EXC( lb t1, 0(src), .Ll_exc)
ADD src, src, 1 ADD src, src, 1
sb t1, 0(dst) # can't fault -- we're copy_from_user sb t1, 0(dst) # can't fault -- we're copy_from_user
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, 1 ADD dst, dst, 1
bne src, t0, 1b bne src, t0, 1b
.set noreorder .set noreorder
l_exc: .Ll_exc:
LOAD t0, TI_TASK($28) LOAD t0, TI_TASK($28)
nop nop
LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address

View file

@ -191,7 +191,7 @@
.align 5 .align 5
LEAF(memcpy) /* a0=dst a1=src a2=len */ LEAF(memcpy) /* a0=dst a1=src a2=len */
move v0, dst /* return value */ move v0, dst /* return value */
__memcpy: .L__memcpy:
FEXPORT(__copy_user) FEXPORT(__copy_user)
/* /*
* Note: dst & src may be unaligned, len may be 0 * Note: dst & src may be unaligned, len may be 0
@ -213,45 +213,45 @@ FEXPORT(__copy_user)
and t1, dst, ADDRMASK and t1, dst, ADDRMASK
PREF( 0, 1*32(src) ) PREF( 0, 1*32(src) )
PREF( 1, 1*32(dst) ) PREF( 1, 1*32(dst) )
bnez t2, copy_bytes_checklen bnez t2, .Lcopy_bytes_checklen
and t0, src, ADDRMASK and t0, src, ADDRMASK
PREF( 0, 2*32(src) ) PREF( 0, 2*32(src) )
PREF( 1, 2*32(dst) ) PREF( 1, 2*32(dst) )
bnez t1, dst_unaligned bnez t1, .Ldst_unaligned
nop nop
bnez t0, src_unaligned_dst_aligned bnez t0, .Lsrc_unaligned_dst_aligned
/* /*
* use delay slot for fall-through * use delay slot for fall-through
* src and dst are aligned; need to compute rem * src and dst are aligned; need to compute rem
*/ */
both_aligned: .Lboth_aligned:
SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
beqz t0, cleanup_both_aligned # len < 8*NBYTES beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES)
PREF( 0, 3*32(src) ) PREF( 0, 3*32(src) )
PREF( 1, 3*32(dst) ) PREF( 1, 3*32(dst) )
.align 4 .align 4
1: 1:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
EXC( LOAD t0, UNIT(0)(src), l_exc) EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
EXC( LOAD t1, UNIT(1)(src), l_exc_copy) EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
EXC( LOAD t2, UNIT(2)(src), l_exc_copy) EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
EXC( LOAD t3, UNIT(3)(src), l_exc_copy) EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
SUB len, len, 8*NBYTES SUB len, len, 8*NBYTES
EXC( LOAD t4, UNIT(4)(src), l_exc_copy) EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy)
EXC( LOAD t7, UNIT(5)(src), l_exc_copy) EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy)
EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u)
EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u)
EXC( LOAD t0, UNIT(6)(src), l_exc_copy) EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy)
EXC( LOAD t1, UNIT(7)(src), l_exc_copy) EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy)
ADD src, src, 8*NBYTES ADD src, src, 8*NBYTES
ADD dst, dst, 8*NBYTES ADD dst, dst, 8*NBYTES
EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u)
EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u)
EXC( STORE t4, UNIT(-4)(dst), s_exc_p4u) EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u)
EXC( STORE t7, UNIT(-3)(dst), s_exc_p3u) EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u)
EXC( STORE t0, UNIT(-2)(dst), s_exc_p2u) EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u)
EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u)
PREF( 0, 8*32(src) ) PREF( 0, 8*32(src) )
PREF( 1, 8*32(dst) ) PREF( 1, 8*32(dst) )
bne len, rem, 1b bne len, rem, 1b
@ -260,41 +260,41 @@ EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u)
/* /*
* len == rem == the number of bytes left to copy < 8*NBYTES * len == rem == the number of bytes left to copy < 8*NBYTES
*/ */
cleanup_both_aligned: .Lcleanup_both_aligned:
beqz len, done beqz len, .Ldone
sltu t0, len, 4*NBYTES sltu t0, len, 4*NBYTES
bnez t0, less_than_4units bnez t0, .Lless_than_4units
and rem, len, (NBYTES-1) # rem = len % NBYTES and rem, len, (NBYTES-1) # rem = len % NBYTES
/* /*
* len >= 4*NBYTES * len >= 4*NBYTES
*/ */
EXC( LOAD t0, UNIT(0)(src), l_exc) EXC( LOAD t0, UNIT(0)(src), .Ll_exc)
EXC( LOAD t1, UNIT(1)(src), l_exc_copy) EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy)
EXC( LOAD t2, UNIT(2)(src), l_exc_copy) EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy)
EXC( LOAD t3, UNIT(3)(src), l_exc_copy) EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy)
SUB len, len, 4*NBYTES SUB len, len, 4*NBYTES
ADD src, src, 4*NBYTES ADD src, src, 4*NBYTES
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u)
EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u)
EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u)
EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES ADD dst, dst, 4*NBYTES
beqz len, done beqz len, .Ldone
.set noreorder .set noreorder
less_than_4units: .Lless_than_4units:
/* /*
* rem = len % NBYTES * rem = len % NBYTES
*/ */
beq rem, len, copy_bytes beq rem, len, .Lcopy_bytes
nop nop
1: 1:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
EXC( LOAD t0, 0(src), l_exc) EXC( LOAD t0, 0(src), .Ll_exc)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc_p1u) EXC( STORE t0, 0(dst), .Ls_exc_p1u)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES ADD dst, dst, NBYTES
bne rem, len, 1b bne rem, len, 1b
@ -312,17 +312,17 @@ EXC( STORE t0, 0(dst), s_exc_p1u)
* more instruction-level parallelism. * more instruction-level parallelism.
*/ */
#define bits t2 #define bits t2
beqz len, done beqz len, .Ldone
ADD t1, dst, len # t1 is just past last byte of dst ADD t1, dst, len # t1 is just past last byte of dst
li bits, 8*NBYTES li bits, 8*NBYTES
SLL rem, len, 3 # rem = number of bits to keep SLL rem, len, 3 # rem = number of bits to keep
EXC( LOAD t0, 0(src), l_exc) EXC( LOAD t0, 0(src), .Ll_exc)
SUB bits, bits, rem # bits = number of bits to discard SUB bits, bits, rem # bits = number of bits to discard
SHIFT_DISCARD t0, t0, bits SHIFT_DISCARD t0, t0, bits
EXC( STREST t0, -1(t1), s_exc) EXC( STREST t0, -1(t1), .Ls_exc)
jr ra jr ra
move len, zero move len, zero
dst_unaligned: .Ldst_unaligned:
/* /*
* dst is unaligned * dst is unaligned
* t0 = src & ADDRMASK * t0 = src & ADDRMASK
@ -333,23 +333,23 @@ dst_unaligned:
* Set match = (src and dst have same alignment) * Set match = (src and dst have same alignment)
*/ */
#define match rem #define match rem
EXC( LDFIRST t3, FIRST(0)(src), l_exc) EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc)
ADD t2, zero, NBYTES ADD t2, zero, NBYTES
EXC( LDREST t3, REST(0)(src), l_exc_copy) EXC( LDREST t3, REST(0)(src), .Ll_exc_copy)
SUB t2, t2, t1 # t2 = number of bytes copied SUB t2, t2, t1 # t2 = number of bytes copied
xor match, t0, t1 xor match, t0, t1
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
EXC( STFIRST t3, FIRST(0)(dst), s_exc) EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc)
beq len, t2, done beq len, t2, .Ldone
SUB len, len, t2 SUB len, len, t2
ADD dst, dst, t2 ADD dst, dst, t2
beqz match, both_aligned beqz match, .Lboth_aligned
ADD src, src, t2 ADD src, src, t2
src_unaligned_dst_aligned: .Lsrc_unaligned_dst_aligned:
SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
PREF( 0, 3*32(src) ) PREF( 0, 3*32(src) )
beqz t0, cleanup_src_unaligned beqz t0, .Lcleanup_src_unaligned
and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
PREF( 1, 3*32(dst) ) PREF( 1, 3*32(dst) )
1: 1:
@ -360,58 +360,58 @@ src_unaligned_dst_aligned:
* are to the same unit (unless src is aligned, but it's not). * are to the same unit (unless src is aligned, but it's not).
*/ */
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy)
SUB len, len, 4*NBYTES SUB len, len, 4*NBYTES
EXC( LDREST t0, REST(0)(src), l_exc_copy) EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
EXC( LDREST t1, REST(1)(src), l_exc_copy) EXC( LDREST t1, REST(1)(src), .Ll_exc_copy)
EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy)
EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy)
EXC( LDREST t2, REST(2)(src), l_exc_copy) EXC( LDREST t2, REST(2)(src), .Ll_exc_copy)
EXC( LDREST t3, REST(3)(src), l_exc_copy) EXC( LDREST t3, REST(3)(src), .Ll_exc_copy)
PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed)
ADD src, src, 4*NBYTES ADD src, src, 4*NBYTES
#ifdef CONFIG_CPU_SB1 #ifdef CONFIG_CPU_SB1
nop # improves slotting nop # improves slotting
#endif #endif
EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u)
EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u)
EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u)
EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u)
PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES ADD dst, dst, 4*NBYTES
bne len, rem, 1b bne len, rem, 1b
.set noreorder .set noreorder
cleanup_src_unaligned: .Lcleanup_src_unaligned:
beqz len, done beqz len, .Ldone
and rem, len, NBYTES-1 # rem = len % NBYTES and rem, len, NBYTES-1 # rem = len % NBYTES
beq rem, len, copy_bytes beq rem, len, .Lcopy_bytes
nop nop
1: 1:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
EXC( LDFIRST t0, FIRST(0)(src), l_exc) EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc)
EXC( LDREST t0, REST(0)(src), l_exc_copy) EXC( LDREST t0, REST(0)(src), .Ll_exc_copy)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc_p1u) EXC( STORE t0, 0(dst), .Ls_exc_p1u)
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES ADD dst, dst, NBYTES
bne len, rem, 1b bne len, rem, 1b
.set noreorder .set noreorder
copy_bytes_checklen: .Lcopy_bytes_checklen:
beqz len, done beqz len, .Ldone
nop nop
copy_bytes: .Lcopy_bytes:
/* 0 < len < NBYTES */ /* 0 < len < NBYTES */
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
#define COPY_BYTE(N) \ #define COPY_BYTE(N) \
EXC( lb t0, N(src), l_exc); \ EXC( lb t0, N(src), .Ll_exc); \
SUB len, len, 1; \ SUB len, len, 1; \
beqz len, done; \ beqz len, .Ldone; \
EXC( sb t0, N(dst), s_exc_p1) EXC( sb t0, N(dst), .Ls_exc_p1)
COPY_BYTE(0) COPY_BYTE(0)
COPY_BYTE(1) COPY_BYTE(1)
@ -421,16 +421,16 @@ EXC( sb t0, N(dst), s_exc_p1)
COPY_BYTE(4) COPY_BYTE(4)
COPY_BYTE(5) COPY_BYTE(5)
#endif #endif
EXC( lb t0, NBYTES-2(src), l_exc) EXC( lb t0, NBYTES-2(src), .Ll_exc)
SUB len, len, 1 SUB len, len, 1
jr ra jr ra
EXC( sb t0, NBYTES-2(dst), s_exc_p1) EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1)
done: .Ldone:
jr ra jr ra
nop nop
END(memcpy) END(memcpy)
l_exc_copy: .Ll_exc_copy:
/* /*
* Copy bytes from src until faulting load address (or until a * Copy bytes from src until faulting load address (or until a
* lb faults) * lb faults)
@ -445,14 +445,14 @@ l_exc_copy:
nop nop
LOAD t0, THREAD_BUADDR(t0) LOAD t0, THREAD_BUADDR(t0)
1: 1:
EXC( lb t1, 0(src), l_exc) EXC( lb t1, 0(src), .Ll_exc)
ADD src, src, 1 ADD src, src, 1
sb t1, 0(dst) # can't fault -- we're copy_from_user sb t1, 0(dst) # can't fault -- we're copy_from_user
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD dst, dst, 1 ADD dst, dst, 1
bne src, t0, 1b bne src, t0, 1b
.set noreorder .set noreorder
l_exc: .Ll_exc:
LOAD t0, TI_TASK($28) LOAD t0, TI_TASK($28)
nop nop
LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address
@ -471,7 +471,7 @@ l_exc:
*/ */
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
SUB src, len, 1 SUB src, len, 1
beqz len, done beqz len, .Ldone
.set noreorder .set noreorder
1: sb zero, 0(dst) 1: sb zero, 0(dst)
ADD dst, dst, 1 ADD dst, dst, 1
@ -492,7 +492,7 @@ l_exc:
#define SEXC(n) \ #define SEXC(n) \
.set reorder; /* DADDI_WAR */ \ .set reorder; /* DADDI_WAR */ \
s_exc_p ## n ## u: \ .Ls_exc_p ## n ## u: \
ADD len, len, n*NBYTES; \ ADD len, len, n*NBYTES; \
jr ra; \ jr ra; \
.set noreorder .set noreorder
@ -506,12 +506,12 @@ SEXC(3)
SEXC(2) SEXC(2)
SEXC(1) SEXC(1)
s_exc_p1: .Ls_exc_p1:
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD len, len, 1 ADD len, len, 1
jr ra jr ra
.set noreorder .set noreorder
s_exc: .Ls_exc:
jr ra jr ra
nop nop
@ -522,20 +522,20 @@ LEAF(memmove)
sltu t0, a1, t0 # dst + len <= src -> memcpy sltu t0, a1, t0 # dst + len <= src -> memcpy
sltu t1, a0, t1 # dst >= src + len -> memcpy sltu t1, a0, t1 # dst >= src + len -> memcpy
and t0, t1 and t0, t1
beqz t0, __memcpy beqz t0, .L__memcpy
move v0, a0 /* return value */ move v0, a0 /* return value */
beqz a2, r_out beqz a2, .Lr_out
END(memmove) END(memmove)
/* fall through to __rmemcpy */ /* fall through to __rmemcpy */
LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
sltu t0, a1, a0 sltu t0, a1, a0
beqz t0, r_end_bytes_up # src >= dst beqz t0, .Lr_end_bytes_up # src >= dst
nop nop
ADD a0, a2 # dst = dst + len ADD a0, a2 # dst = dst + len
ADD a1, a2 # src = src + len ADD a1, a2 # src = src + len
r_end_bytes: .Lr_end_bytes:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
lb t0, -1(a1) lb t0, -1(a1)
SUB a2, a2, 0x1 SUB a2, a2, 0x1
@ -543,14 +543,14 @@ r_end_bytes:
SUB a1, a1, 0x1 SUB a1, a1, 0x1
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
SUB a0, a0, 0x1 SUB a0, a0, 0x1
bnez a2, r_end_bytes bnez a2, .Lr_end_bytes
.set noreorder .set noreorder
r_out: .Lr_out:
jr ra jr ra
move a2, zero move a2, zero
r_end_bytes_up: .Lr_end_bytes_up:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
lb t0, (a1) lb t0, (a1)
SUB a2, a2, 0x1 SUB a2, a2, 0x1
@ -558,7 +558,7 @@ r_end_bytes_up:
ADD a1, a1, 0x1 ADD a1, a1, 0x1
.set reorder /* DADDI_WAR */ .set reorder /* DADDI_WAR */
ADD a0, a0, 0x1 ADD a0, a0, 0x1
bnez a2, r_end_bytes_up bnez a2, .Lr_end_bytes_up
.set noreorder .set noreorder
jr ra jr ra

View file

@ -72,7 +72,7 @@ LEAF(memset)
FEXPORT(__bzero) FEXPORT(__bzero)
sltiu t0, a2, LONGSIZE /* very small region? */ sltiu t0, a2, LONGSIZE /* very small region? */
bnez t0, small_memset bnez t0, .Lsmall_memset
andi t0, a0, LONGMASK /* aligned? */ andi t0, a0, LONGMASK /* aligned? */
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
@ -88,28 +88,28 @@ FEXPORT(__bzero)
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
#ifdef __MIPSEB__ #ifdef __MIPSEB__
EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ EX(LONG_S_L, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */
#endif #endif
#ifdef __MIPSEL__ #ifdef __MIPSEL__
EX(LONG_S_R, a1, (a0), first_fixup) /* make word/dword aligned */ EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */
#endif #endif
PTR_SUBU a0, t0 /* long align ptr */ PTR_SUBU a0, t0 /* long align ptr */
PTR_ADDU a2, t0 /* correct size */ PTR_ADDU a2, t0 /* correct size */
1: ori t1, a2, 0x3f /* # of full blocks */ 1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f xori t1, 0x3f
beqz t1, memset_partial /* no block to fill */ beqz t1, .Lmemset_partial /* no block to fill */
andi t0, a2, 0x40-LONGSIZE andi t0, a2, 0x40-LONGSIZE
PTR_ADDU t1, a0 /* end address */ PTR_ADDU t1, a0 /* end address */
.set reorder .set reorder
1: PTR_ADDIU a0, 64 1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
f_fill64 a0, -64, a1, fwd_fixup f_fill64 a0, -64, a1, .Lfwd_fixup
bne t1, a0, 1b bne t1, a0, 1b
.set noreorder .set noreorder
memset_partial: .Lmemset_partial:
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
PTR_LA t1, 2f /* where to start */ PTR_LA t1, 2f /* where to start */
#if LONGSIZE == 4 #if LONGSIZE == 4
@ -126,7 +126,7 @@ memset_partial:
.set push .set push
.set noreorder .set noreorder
.set nomacro .set nomacro
f_fill64 a0, -64, a1, partial_fixup /* ... but first do longs ... */ f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */
2: .set pop 2: .set pop
andi a2, LONGMASK /* At most one long to go */ andi a2, LONGMASK /* At most one long to go */
@ -134,15 +134,15 @@ memset_partial:
PTR_ADDU a0, a2 /* What's left */ PTR_ADDU a0, a2 /* What's left */
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
#ifdef __MIPSEB__ #ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), last_fixup) EX(LONG_S_R, a1, -1(a0), .Llast_fixup)
#endif #endif
#ifdef __MIPSEL__ #ifdef __MIPSEL__
EX(LONG_S_L, a1, -1(a0), last_fixup) EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
#endif #endif
1: jr ra 1: jr ra
move a2, zero move a2, zero
small_memset: .Lsmall_memset:
beqz a2, 2f beqz a2, 2f
PTR_ADDU t1, a0, a2 PTR_ADDU t1, a0, a2
@ -155,11 +155,11 @@ small_memset:
move a2, zero move a2, zero
END(memset) END(memset)
first_fixup: .Lfirst_fixup:
jr ra jr ra
nop nop
fwd_fixup: .Lfwd_fixup:
PTR_L t0, TI_TASK($28) PTR_L t0, TI_TASK($28)
LONG_L t0, THREAD_BUADDR(t0) LONG_L t0, THREAD_BUADDR(t0)
andi a2, 0x3f andi a2, 0x3f
@ -167,7 +167,7 @@ fwd_fixup:
jr ra jr ra
LONG_SUBU a2, t0 LONG_SUBU a2, t0
partial_fixup: .Lpartial_fixup:
PTR_L t0, TI_TASK($28) PTR_L t0, TI_TASK($28)
LONG_L t0, THREAD_BUADDR(t0) LONG_L t0, THREAD_BUADDR(t0)
andi a2, LONGMASK andi a2, LONGMASK
@ -175,6 +175,6 @@ partial_fixup:
jr ra jr ra
LONG_SUBU a2, t0 LONG_SUBU a2, t0
last_fixup: .Llast_fixup:
jr ra jr ra
andi v1, a2, LONGMASK andi v1, a2, LONGMASK

View file

@ -24,16 +24,16 @@
LEAF(__strlen_user_asm) LEAF(__strlen_user_asm)
LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok?
and v0, a0 and v0, a0
bnez v0, fault bnez v0, .Lfault
FEXPORT(__strlen_user_nocheck_asm) FEXPORT(__strlen_user_nocheck_asm)
move v0, a0 move v0, a0
1: EX(lb, t0, (v0), fault) 1: EX(lb, t0, (v0), .Lfault)
PTR_ADDIU v0, 1 PTR_ADDIU v0, 1
bnez t0, 1b bnez t0, 1b
PTR_SUBU v0, a0 PTR_SUBU v0, a0
jr ra jr ra
END(__strlen_user_asm) END(__strlen_user_asm)
fault: move v0, zero .Lfault: move v0, zero
jr ra jr ra

View file

@ -30,13 +30,13 @@
LEAF(__strncpy_from_user_asm) LEAF(__strncpy_from_user_asm)
LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok?
and v0, a1 and v0, a1
bnez v0, fault bnez v0, .Lfault
FEXPORT(__strncpy_from_user_nocheck_asm) FEXPORT(__strncpy_from_user_nocheck_asm)
move v0, zero move v0, zero
move v1, a1 move v1, a1
.set noreorder .set noreorder
1: EX(lbu, t0, (v1), fault) 1: EX(lbu, t0, (v1), .Lfault)
PTR_ADDIU v1, 1 PTR_ADDIU v1, 1
R10KCBARRIER(0(ra)) R10KCBARRIER(0(ra))
beqz t0, 2f beqz t0, 2f
@ -47,13 +47,13 @@ FEXPORT(__strncpy_from_user_nocheck_asm)
bne v0, a2, 1b bne v0, a2, 1b
2: PTR_ADDU t0, a1, v0 2: PTR_ADDU t0, a1, v0
xor t0, a1 xor t0, a1
bltz t0, fault bltz t0, .Lfault
jr ra # return n jr ra # return n
END(__strncpy_from_user_asm) END(__strncpy_from_user_asm)
fault: li v0, -EFAULT .Lfault: li v0, -EFAULT
jr ra jr ra
.section __ex_table,"a" .section __ex_table,"a"
PTR 1b, fault PTR 1b, .Lfault
.previous .previous

View file

@ -28,18 +28,19 @@
LEAF(__strnlen_user_asm) LEAF(__strnlen_user_asm)
LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok? LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok?
and v0, a0 and v0, a0
bnez v0, fault bnez v0, .Lfault
FEXPORT(__strnlen_user_nocheck_asm) FEXPORT(__strnlen_user_nocheck_asm)
move v0, a0 move v0, a0
PTR_ADDU a1, a0 # stop pointer PTR_ADDU a1, a0 # stop pointer
1: beq v0, a1, 1f # limit reached? 1: beq v0, a1, 1f # limit reached?
EX(lb, t0, (v0), fault) EX(lb, t0, (v0), .Lfault)
PTR_ADDU v0, 1 PTR_ADDU v0, 1
bnez t0, 1b bnez t0, 1b
1: PTR_SUBU v0, a0 1: PTR_SUBU v0, a0
jr ra jr ra
END(__strnlen_user_asm) END(__strnlen_user_asm)
fault: move v0, zero .Lfault:
move v0, zero
jr ra jr ra