arch/sparc: Avoid DCTI Couples

Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated.
Also address the "Programming Note" for optimal performance.

Here is the complete text from Oracle SPARC Architecture Specs.

6.3.4.7 DCTI Couples
"A delayed control transfer instruction (DCTI) in the delay slot of
another DCTI is referred to as a “DCTI couple”. The use of DCTI couples
is deprecated in the Oracle SPARC Architecture; no new software should
place a DCTI in the delay slot of another DCTI, because on future Oracle
SPARC Architecture implementations DCTI couples may execute either
slowly or differently than the programmer assumes it will.

SPARC V8 and SPARC V9 Compatibility Note
The SPARC V8 architecture left behavior undefined for a DCTI couple. The
SPARC V9 architecture defined behavior in that case, but as of
UltraSPARC Architecture 2005, use of DCTI couples was deprecated.
Software should not expect high performance from DCTI couples, and
performance of DCTI couples should be expected to decline further in
future processors.

Programming Note
As noted in TABLE 6-5 on page 115, an annulled branch-always
(branch-always with a = 1) instruction is not architecturally a DCTI.
However, since not all implementations make that distinction, for
optimal performance, a DCTI should not be placed in the instruction word
immediately following an annulled branch-always instruction (BA,A or
BPA,A)."

Signed-off-by: Babu Moger <babu.moger@oracle.com>
Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Babu Moger 2017-03-17 14:52:21 -06:00 committed by David S. Miller
parent adfae8a5d8
commit 0ae2d26ffe
11 changed files with 18 additions and 0 deletions

View File

@ -96,6 +96,7 @@ sparc64_boot:
andn %g1, PSTATE_AM, %g1
wrpr %g1, 0x0, %pstate
ba,a,pt %xcc, 1f
nop
.globl prom_finddev_name, prom_chosen_path, prom_root_node
.globl prom_getprop_name, prom_mmu_name, prom_peer_name
@ -613,6 +614,7 @@ niagara_tlb_fixup:
nop
ba,a,pt %xcc, 80f
nop
niagara4_patch:
call niagara4_patch_copyops
nop
@ -622,6 +624,7 @@ niagara4_patch:
nop
ba,a,pt %xcc, 80f
nop
niagara2_patch:
call niagara2_patch_copyops
@ -632,6 +635,7 @@ niagara2_patch:
nop
ba,a,pt %xcc, 80f
nop
niagara_patch:
call niagara_patch_copyops

View File

@ -82,6 +82,7 @@ do_stdfmna:
call handle_stdfmna
add %sp, PTREGS_OFF, %o0
ba,a,pt %xcc, rtrap
nop
.size do_stdfmna,.-do_stdfmna
.type breakpoint_trap,#function

View File

@ -237,6 +237,7 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
bne,pt %xcc, user_rtt_fill_32bit
wrpr %g1, %cwp
ba,a,pt %xcc, user_rtt_fill_64bit
nop
user_rtt_fill_fixup_dax:
ba,pt %xcc, user_rtt_fill_fixup_common

View File

@ -86,6 +86,7 @@ __spitfire_cee_trap_continue:
rd %pc, %g7
ba,a,pt %xcc, 2f
nop
1: ba,pt %xcc, etrap_irq
rd %pc, %g7

View File

@ -352,6 +352,7 @@ sun4v_mna:
call sun4v_do_mna
add %sp, PTREGS_OFF, %o0
ba,a,pt %xcc, rtrap
nop
/* Privileged Action. */
sun4v_privact:

View File

@ -92,6 +92,7 @@ user_rtt_fill_fixup_common:
call sun4v_data_access_exception
nop
ba,a,pt %xcc, rtrap
nop
1: call spitfire_data_access_exception
nop

View File

@ -152,6 +152,8 @@ fill_fixup_dax:
call sun4v_data_access_exception
nop
ba,a,pt %xcc, rtrap
nop
1: call spitfire_data_access_exception
nop
ba,a,pt %xcc, rtrap
nop

View File

@ -326,11 +326,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
blu 170f
nop
ba,a,pt %xcc, 180f
nop
4: /* 32 <= low bits < 48 */
blu 150f
nop
ba,a,pt %xcc, 160f
nop
5: /* 0 < low bits < 32 */
blu,a 6f
cmp %g2, 8
@ -338,6 +340,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
blu 130f
nop
ba,a,pt %xcc, 140f
nop
6: /* 0 < low bits < 16 */
bgeu 120f
nop
@ -475,6 +478,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
brz,pt %o2, 85f
sub %o0, %o1, GLOBAL_SPARE
ba,a,pt %XCC, 90f
nop
.align 64
75: /* 16 < len <= 64 */

View File

@ -530,4 +530,5 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
bne,pt %icc, 1b
EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
ba,a,pt %icc, .Lexit
nop
.size FUNC_NAME, .-FUNC_NAME

View File

@ -102,4 +102,5 @@ NG4bzero:
bne,pt %icc, 1b
add %o0, 0x30, %o0
ba,a,pt %icc, .Lpostloop
nop
.size NG4bzero,.-NG4bzero

View File

@ -394,6 +394,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
brz,pt %i2, 85f
sub %o0, %i1, %i3
ba,a,pt %XCC, 90f
nop
.align 64
70: /* 16 < len <= 64 */