cosmopolitan/libc/nexgen32e/djbsort-avx2.S
Justine Tunney e75ffde09e Get codebase completely working with LLVM
You can now build Cosmopolitan with Clang:

    make -j8 MODE=llvm
    o/llvm/examples/hello.com

The assembler and linker code is now friendly to LLVM too.
So it's not needed to configure Clang to use binutils under
the hood. If you love LLVM then you can now use pure LLVM.
2021-02-09 02:57:32 -08:00

2089 lines
44 KiB
ArmAsm

#include "libc/macros.h"
.source __FILE__
// D.J. Bernstein's outrageously fast integer sorting algorithm.
//
// @param rdi is int32 array
// @param rsi is number of elements in rdi
// @note public domain
// @see en.wikipedia.org/wiki/Sorting_network
djbsort_avx2:
push %rbp
mov %rsp,%rbp
push %r15
push %r14
push %r13
mov %rsi,%r13
push %r12
mov %rdi,%r12
push %rbx
andq $-32,%rsp
sub $1056,%rsp
cmp $8,%rsi
jg .L148
jne .L149
mov (%rdi),%eax
mov 4(%rdi),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,(%rdi)
mov 8(%rdi),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,4(%rdi)
mov 12(%rdi),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,8(%rdi)
mov 16(%rdi),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,12(%rdi)
mov 20(%rdi),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,16(%rdi)
mov 24(%rdi),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,20(%rdi)
mov 28(%rdi),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,24(%rdi)
mov %edx,28(%rdi)
jmp .L150
.L149: cmp $7,%rsi
jne .L151
.L150: mov (%r12),%edx
mov 4(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r12)
mov 8(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,4(%r12)
mov 12(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,8(%r12)
mov 16(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,12(%r12)
mov 20(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,16(%r12)
mov 24(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,20(%r12)
mov %edx,24(%r12)
jmp .L152
.L151: cmp $6,%rsi
jne .L153
.L152: mov (%r12),%eax
mov 4(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,(%r12)
mov 8(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,4(%r12)
mov 12(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,8(%r12)
mov 16(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,12(%r12)
mov 20(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,16(%r12)
mov %edx,20(%r12)
jmp .L154
.L153: cmp $5,%rsi
jne .L155
.L154: mov (%r12),%edx
mov 4(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r12)
mov 8(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,4(%r12)
mov 12(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,8(%r12)
mov 16(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,12(%r12)
mov %edx,16(%r12)
jmp .L156
.L155: cmp $4,%rsi
jne .L157
.L156: mov (%r12),%eax
mov 4(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,(%r12)
mov 8(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,4(%r12)
mov 12(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,8(%r12)
mov %edx,12(%r12)
jmp .L158
.L157: cmp $3,%rsi
jne .L159
.L158: mov (%r12),%edx
mov 4(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r12)
mov 8(%r12),%edx
cmp %edx,%eax
mov %eax,%ecx
cmovg %edx,%eax
cmovg %ecx,%edx
mov %eax,4(%r12)
mov %edx,8(%r12)
jmp .L160
.L159: cmp $2,%rsi
jne .L147
.L160: mov (%r12),%edx
mov 4(%r12),%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r12)
mov %eax,4(%r12)
jmp .L147
.L148: lea -1(%rsi),%rax
mov $8,%ebx
test %rsi,%rax
jne .L162
xor %edx,%edx
call int32_sort_2power
jmp .L147
.L162: mov %r13,%r14
sub %rbx,%r14
cmp %rbx,%r14
jle .L199
add %rbx,%rbx
jmp .L162
.L199: cmp $128,%rbx
jg .L164
mov %rbx,%rax
mov %rbx,%rdx
vmovdqa .LC4(%rip),%ymm0
sar $3,%rax
sar $2,%rdx
.L165: cmp %rdx,%rax
jge .L200
mov %rax,%rcx
incq %rax
salq $5,%rcx
vmovdqa %ymm0,32(%rsp,%rcx)
jmp .L165
.L200: xor %eax,%eax
.L167: mov (%r12,%rax,4),%edx
mov %rax,%r14
mov %edx,32(%rsp,%rax,4)
lea 1(%rax),%rax
cmp %rax,%r13
jne .L167
lea (%rbx,%rbx),%rsi
xor %edx,%edx
lea 32(%rsp),%rdi
call int32_sort_2power
xor %eax,%eax
.L168: mov 32(%rsp,%rax,4),%ecx
mov %rax,%rdx
mov %ecx,(%r12,%rax,4)
incq %rax
cmp %rdx,%r14
jne .L168
jmp .L147
.L164: mov %rbx,%rsi
mov %r12,%rdi
mov $1,%edx
call int32_sort_2power
lea (%r12,%rbx,4),%rdi
mov %r14,%rsi
call djbsort_avx2
.L175: mov %rbx,%r14
mov %r13,%rsi
mov %r12,%rdi
sar $2,%r14
mov %r14,%rdx
call int32_threestages
lea 0(,%r14,4),%r10
mov %r13,%rdx
lea (%r10,%rax),%r11
sub %r10,%rdx
lea (%r12,%rax,4),%rdi
mov %rax,%r9
sub %rax,%rdx
lea (%r12,%r11,4),%rsi
call minmax_vector
lea (%r14,%r14),%rax
mov %rax,24(%rsp)
cmp %r13,%r11
jg .L169
imul $-8,%r14,%rax
lea (%r12,%r10),%rdx
lea (%rdx,%r10),%rcx
lea (%r14,%r9),%r15
lea (%rcx,%r10),%rdi
add %rdi,%rax
lea (%rax,%r10),%rsi
lea (%rsi,%r10),%r8
.L170: cmp %r9,%r15
jle .L201
vmovdqu (%rcx,%r9,4),%ymm7
vmovdqu (%rdi,%r9,4),%ymm6
vpminsd (%r12,%r9,4),%ymm7,%ymm2
vpminsd (%rdx,%r9,4),%ymm6,%ymm3
vpmaxsd (%r12,%r9,4),%ymm7,%ymm0
vpmaxsd (%rdx,%r9,4),%ymm6,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%r12,%r9,4)
vmovdqu %ymm2,(%rax,%r9,4)
vmovdqu %ymm3,(%rsi,%r9,4)
vmovdqu %ymm0,(%r8,%r9,4)
add $8,%r9
jmp .L170
.L201: mov %r11,%r9
.L169: mov 24(%rsp),%rax
lea (%r14,%r14),%r15
mov %r13,%r11
lea (%r12,%r9,4),%rdi
sub %r15,%r11
add %r9,%rax
mov %r11,%rdx
lea (%r12,%rax,4),%rsi
sub %r9,%rdx
call minmax_vector
lea (%r15,%r9),%rax
cmp %r13,%rax
jg .L172
mov %rax,%rdx
add %r12,%r10
sub %r14,%rdx
.L173: cmp %r9,%rdx
jle .L202
vmovdqu (%r10,%r9,4),%ymm6
vpminsd (%r12,%r9,4),%ymm6,%ymm1
vpmaxsd (%r12,%r9,4),%ymm6,%ymm0
vmovdqu %ymm1,(%r12,%r9,4)
vmovdqu %ymm0,(%r10,%r9,4)
add $8,%r9
jmp .L173
.L202: mov %rax,%r9
.L172: lea (%r11,%r14),%rdx
add %r9,%r14
lea (%r12,%r9,4),%rdi
sar $3,%rbx
sub %r9,%rdx
lea (%r12,%r14,4),%rsi
call minmax_vector
cmp $63,%rbx
jg .L175
cmp $32,%rbx
jne .L176
mov %r12,%rax
mov $63,%edx
.L177: cmp %r13,%rdx
jge .L203
vmovdqu (%rax),%ymm6
add $64,%rdx
add $256,%rax
vpminsd -128(%rax),%ymm6,%ymm10
vpmaxsd -128(%rax),%ymm6,%ymm8
vmovdqu -224(%rax),%ymm6
vpminsd -96(%rax),%ymm6,%ymm3
vpmaxsd -96(%rax),%ymm6,%ymm0
vmovdqu -192(%rax),%ymm6
vpminsd -64(%rax),%ymm6,%ymm2
vpmaxsd -64(%rax),%ymm6,%ymm1
vmovdqu -160(%rax),%ymm6
vpmaxsd -32(%rax),%ymm6,%ymm4
vpminsd -32(%rax),%ymm6,%ymm13
vpminsd %ymm2,%ymm10,%ymm15
vpminsd %ymm1,%ymm8,%ymm12
vpminsd %ymm13,%ymm3,%ymm11
vpminsd %ymm4,%ymm0,%ymm5
vpmaxsd %ymm1,%ymm8,%ymm1
vpmaxsd %ymm2,%ymm10,%ymm2
vpmaxsd %ymm13,%ymm3,%ymm13
vpmaxsd %ymm4,%ymm0,%ymm0
vpminsd %ymm13,%ymm2,%ymm10
vpminsd %ymm0,%ymm1,%ymm4
vpminsd %ymm5,%ymm12,%ymm9
vpminsd %ymm11,%ymm15,%ymm14
vpmaxsd %ymm13,%ymm2,%ymm13
vpmaxsd %ymm0,%ymm1,%ymm0
vpmaxsd %ymm11,%ymm15,%ymm15
vpmaxsd %ymm5,%ymm12,%ymm12
vperm2i128 $32,%ymm13,%ymm10,%ymm6
vperm2i128 $32,%ymm12,%ymm9,%ymm5
vperm2i128 $32,%ymm0,%ymm4,%ymm8
vperm2i128 $32,%ymm15,%ymm14,%ymm11
vperm2i128 $49,%ymm0,%ymm4,%ymm0
vperm2i128 $49,%ymm12,%ymm9,%ymm12
vperm2i128 $49,%ymm15,%ymm14,%ymm14
vperm2i128 $49,%ymm13,%ymm10,%ymm13
vpminsd %ymm14,%ymm11,%ymm3
vpminsd %ymm12,%ymm5,%ymm1
vpminsd %ymm13,%ymm6,%ymm2
vpmaxsd %ymm12,%ymm5,%ymm9
vpmaxsd %ymm14,%ymm11,%ymm11
vpminsd %ymm0,%ymm8,%ymm12
vperm2i128 $32,%ymm9,%ymm1,%ymm5
vpmaxsd %ymm0,%ymm8,%ymm8
vpmaxsd %ymm13,%ymm6,%ymm10
vperm2i128 $32,%ymm11,%ymm3,%ymm7
vperm2i128 $32,%ymm10,%ymm2,%ymm6
vperm2i128 $49,%ymm11,%ymm3,%ymm11
vperm2i128 $49,%ymm10,%ymm2,%ymm10
vperm2i128 $49,%ymm9,%ymm1,%ymm9
vperm2i128 $32,%ymm8,%ymm12,%ymm4
vperm2i128 $49,%ymm8,%ymm12,%ymm8
vpunpcklqdq %ymm11,%ymm7,%ymm3
vpunpcklqdq %ymm10,%ymm6,%ymm2
vpunpcklqdq %ymm9,%ymm5,%ymm1
vpunpcklqdq %ymm8,%ymm4,%ymm0
vpunpckhqdq %ymm11,%ymm7,%ymm7
vpunpckhqdq %ymm10,%ymm6,%ymm6
vpunpckhqdq %ymm9,%ymm5,%ymm5
vpunpckhqdq %ymm8,%ymm4,%ymm4
vpminsd %ymm3,%ymm7,%ymm11
vpminsd %ymm2,%ymm6,%ymm10
vpminsd %ymm1,%ymm5,%ymm9
vpminsd %ymm0,%ymm4,%ymm8
vpmaxsd %ymm3,%ymm7,%ymm7
vpmaxsd %ymm2,%ymm6,%ymm6
vpmaxsd %ymm1,%ymm5,%ymm5
vpunpckldq %ymm7,%ymm11,%ymm3
vpmaxsd %ymm0,%ymm4,%ymm4
vpunpckhdq %ymm7,%ymm11,%ymm7
vpunpckldq %ymm6,%ymm10,%ymm2
vpunpckldq %ymm5,%ymm9,%ymm1
vpunpckhdq %ymm6,%ymm10,%ymm6
vpunpckhdq %ymm5,%ymm9,%ymm5
vpunpckldq %ymm4,%ymm8,%ymm0
vpunpckhdq %ymm4,%ymm8,%ymm4
vpunpcklqdq %ymm7,%ymm3,%ymm10
vpunpcklqdq %ymm5,%ymm1,%ymm8
vpunpckhqdq %ymm7,%ymm3,%ymm3
vpunpcklqdq %ymm6,%ymm2,%ymm9
vpunpcklqdq %ymm4,%ymm0,%ymm7
vpunpckhqdq %ymm6,%ymm2,%ymm2
vpunpckhqdq %ymm5,%ymm1,%ymm1
vpunpckhqdq %ymm4,%ymm0,%ymm0
vpminsd %ymm8,%ymm1,%ymm5
vpminsd %ymm9,%ymm2,%ymm6
vpminsd %ymm7,%ymm0,%ymm4
vpminsd %ymm10,%ymm3,%ymm11
vpmaxsd %ymm8,%ymm1,%ymm1
vpmaxsd %ymm7,%ymm0,%ymm0
vpmaxsd %ymm10,%ymm3,%ymm3
vpmaxsd %ymm9,%ymm2,%ymm2
vpunpckldq %ymm2,%ymm6,%ymm7
vpunpckldq %ymm3,%ymm11,%ymm8
vpunpckhdq %ymm2,%ymm6,%ymm2
vpunpckhdq %ymm3,%ymm11,%ymm3
vpunpckldq %ymm1,%ymm5,%ymm6
vpunpckhdq %ymm1,%ymm5,%ymm1
vmovdqu %ymm8,-256(%rax)
vpunpckldq %ymm0,%ymm4,%ymm5
vpunpckhdq %ymm0,%ymm4,%ymm0
vmovdqu %ymm3,-224(%rax)
vmovdqu %ymm7,-192(%rax)
vmovdqu %ymm2,-160(%rax)
vmovdqu %ymm6,-128(%rax)
vmovdqu %ymm1,-96(%rax)
vmovdqu %ymm5,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L177
.L203: mov %r13,%rdi
mov %r13,%r9
lea -32(%r13),%rdx
shr $6,%rdi
andq $-64,%r9
salq $8,%rdi
sub %r9,%rdx
lea 128(%r12,%rdi),%rsi
add %r12,%rdi
call minmax_vector
jmp .L180
.L176: xor %r10d,%r10d
cmp $16,%rbx
jne .L181
xor %r9d,%r9d
.L180: lea 31(%r9),%rax
.L179: cmp %r13,%rax
jge .L204
vmovdqu -124(%r12,%rax,4),%ymm6
vpminsd -60(%r12,%rax,4),%ymm6,%ymm5
vpmaxsd -60(%r12,%rax,4),%ymm6,%ymm0
vmovdqu -92(%r12,%rax,4),%ymm6
vpminsd -28(%r12,%rax,4),%ymm6,%ymm1
vpmaxsd -28(%r12,%rax,4),%ymm6,%ymm2
vpminsd %ymm1,%ymm5,%ymm3
vpminsd %ymm2,%ymm0,%ymm4
vpmaxsd %ymm1,%ymm5,%ymm5
vpmaxsd %ymm2,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm4,%ymm2
vperm2i128 $32,%ymm5,%ymm3,%ymm1
vperm2i128 $49,%ymm0,%ymm4,%ymm0
vperm2i128 $49,%ymm5,%ymm3,%ymm3
vpminsd %ymm0,%ymm2,%ymm4
vpmaxsd %ymm0,%ymm2,%ymm0
vpminsd %ymm3,%ymm1,%ymm5
vpmaxsd %ymm3,%ymm1,%ymm1
vperm2i128 $32,%ymm0,%ymm4,%ymm2
vperm2i128 $32,%ymm1,%ymm5,%ymm3
vperm2i128 $49,%ymm0,%ymm4,%ymm4
vperm2i128 $49,%ymm1,%ymm5,%ymm5
vpunpcklqdq %ymm5,%ymm3,%ymm1
vpunpcklqdq %ymm4,%ymm2,%ymm0
vpunpckhqdq %ymm5,%ymm3,%ymm3
vpunpckhqdq %ymm4,%ymm2,%ymm2
vpminsd %ymm3,%ymm1,%ymm5
vpmaxsd %ymm3,%ymm1,%ymm1
vpminsd %ymm2,%ymm0,%ymm4
vpmaxsd %ymm2,%ymm0,%ymm0
vpunpckldq %ymm1,%ymm5,%ymm3
vpunpckldq %ymm0,%ymm4,%ymm2
vpunpckhdq %ymm1,%ymm5,%ymm5
vpunpckhdq %ymm0,%ymm4,%ymm4
vpunpcklqdq %ymm5,%ymm3,%ymm1
vpunpcklqdq %ymm4,%ymm2,%ymm0
vpunpckhqdq %ymm5,%ymm3,%ymm3
vpunpckhqdq %ymm4,%ymm2,%ymm2
vpminsd %ymm3,%ymm1,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vpunpckldq %ymm1,%ymm4,%ymm5
vpunpckldq %ymm0,%ymm3,%ymm2
vpunpckhdq %ymm1,%ymm4,%ymm1
vpunpckhdq %ymm0,%ymm3,%ymm0
vmovdqu %ymm5,-124(%r12,%rax,4)
vmovdqu %ymm1,-92(%r12,%rax,4)
vmovdqu %ymm2,-60(%r12,%rax,4)
vmovdqu %ymm0,-28(%r12,%rax,4)
add $32,%rax
jmp .L179
.L204: mov %r13,%r10
xor %edx,%edx
lea 0(,%r9,4),%rax
sub %r9,%r10
mov %r10,%rdi
andq $-32,%r10
shr $5,%rdi
cmp %r9,%r13
cmovl %rdx,%r10
salq $7,%rdi
add %r9,%r10
cmp %r9,%r13
cmovl %rdx,%rdi
lea -16(%r13),%rdx
sub %r10,%rdx
lea 64(%rax,%rdi),%rsi
add %rax,%rdi
add %r12,%rsi
add %r12,%rdi
call minmax_vector
.L181: lea 15(%r10),%rax
.L183: cmp %r13,%rax
jge .L205
vmovdqu -60(%r12,%rax,4),%ymm6
vpmaxsd -28(%r12,%rax,4),%ymm6,%ymm2
vpminsd -28(%r12,%rax,4),%ymm6,%ymm1
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm0
vmovdqu %ymm1,-60(%r12,%rax,4)
vmovdqu %ymm0,-28(%r12,%rax,4)
add $16,%rax
jmp .L183
.L205: mov %r13,%r9
xor %edx,%edx
lea 0(,%r10,4),%rcx
sub %r10,%r9
mov %r9,%rax
andq $-16,%r9
shr $4,%rax
cmp %r10,%r13
cmovl %rdx,%r9
salq $6,%rax
add %r10,%r9
cmp %r10,%r13
cmovl %rdx,%rax
lea -8(%r13),%rdx
sub %r9,%rdx
lea (%rax,%rcx),%r10
lea 32(%rcx,%rax),%rsi
add %r12,%r10
add %r12,%rsi
mov %r10,%rdi
call minmax_vector
lea 7(%r9),%rax
cmp %r13,%rax
jge .L185
lea 16(,%r9,4),%rax
mov (%r10),%ecx
add $8,%r9
lea -12(%r12,%rax),%r14
lea (%r12,%rax),%rbx
lea 4(%r12,%rax),%r11
mov (%rbx),%edx
lea 8(%r12,%rax),%r8
cmp %edx,%ecx
mov %ecx,%esi
cmovg %edx,%ecx
cmovg %esi,%edx
mov %ecx,(%r10)
mov %edx,(%rbx)
mov (%r14),%ecx
mov (%r11),%edx
cmp %edx,%ecx
mov %ecx,%esi
cmovg %edx,%ecx
cmovg %esi,%edx
lea -8(%r12,%rax),%rsi
mov %ecx,(%r14)
mov %edx,(%r11)
mov (%rsi),%ecx
mov (%r8),%edx
cmp %edx,%ecx
mov %ecx,%edi
cmovg %edx,%ecx
cmovg %edi,%edx
lea 12(%r12,%rax),%rdi
mov %ecx,(%rsi)
lea -4(%r12,%rax),%rcx
mov %edx,(%r8)
mov (%rcx),%edx
mov (%rdi),%eax
cmp %eax,%edx
mov %edx,%r15d
cmovg %eax,%edx
cmovg %r15d,%eax
mov %edx,(%rcx)
mov %eax,(%rdi)
mov (%r10),%edx
mov (%rsi),%eax
cmp %eax,%edx
mov %edx,%r15d
cmovg %eax,%edx
cmovg %r15d,%eax
mov %edx,(%r10)
mov %eax,(%rsi)
mov (%rcx),%eax
mov (%r14),%edx
cmp %eax,%edx
mov %edx,%r15d
cmovg %eax,%edx
cmovg %r15d,%eax
mov %edx,(%r14)
mov %eax,(%rcx)
mov (%r10),%edx
mov (%r14),%eax
cmp %eax,%edx
mov %edx,%r15d
cmovg %eax,%edx
cmovg %r15d,%eax
mov %edx,(%r10)
mov %eax,(%r14)
mov (%rsi),%edx
mov (%rcx),%eax
cmp %eax,%edx
mov %edx,%r10d
cmovg %eax,%edx
cmovg %r10d,%eax
mov %edx,(%rsi)
mov %eax,(%rcx)
mov (%rbx),%edx
mov (%r8),%esi
mov (%rdi),%ecx
cmp %esi,%edx
mov %edx,%eax
cmovg %esi,%edx
cmovg %eax,%esi
mov (%r11),%eax
cmp %ecx,%eax
mov %eax,%r10d
cmovg %ecx,%eax
cmovg %r10d,%ecx
cmp %eax,%edx
mov %edx,%r10d
cmovg %eax,%edx
cmovg %r10d,%eax
mov %edx,(%rbx)
mov %esi,%edx
mov %eax,(%r11)
mov %ecx,%eax
cmp %eax,%edx
mov %edx,%ecx
cmovg %eax,%edx
cmovg %ecx,%eax
mov %edx,(%r8)
mov %eax,(%rdi)
.L185: lea 4(%r9),%r10
lea -4(%r13),%rdx
lea 0(,%r10,4),%rbx
sub %r9,%rdx
lea -16(%r12,%rbx),%r11
lea (%r12,%rbx),%rsi
mov %r11,%rdi
call minmax_vector
lea 3(%r9),%rax
cmp %r13,%rax
jge .L186
lea -8(%r12,%rbx),%rcx
mov (%r11),%edx
lea -12(%r12,%rbx),%rdi
mov %r10,%r9
mov (%rcx),%eax
cmp %eax,%edx
mov %edx,%esi
cmovg %eax,%edx
cmovg %esi,%eax
lea -4(%r12,%rbx),%rsi
mov %edx,(%r11)
mov %eax,(%rcx)
mov (%rdi),%edx
mov (%rsi),%eax
cmp %eax,%edx
mov %edx,%r8d
cmovg %eax,%edx
cmovg %r8d,%eax
mov %edx,(%rdi)
mov %eax,(%rsi)
mov (%rdi),%eax
mov (%r11),%edx
cmp %eax,%edx
mov %edx,%r8d
cmovg %eax,%edx
cmovg %r8d,%eax
mov %edx,(%r11)
mov %eax,(%rdi)
mov (%rcx),%edx
mov (%rsi),%eax
cmp %eax,%edx
mov %edx,%edi
cmovg %eax,%edx
cmovg %edi,%eax
mov %edx,(%rcx)
mov %eax,(%rsi)
.L186: lea 2(%r9),%rax
cmp %r13,%rax
jge .L187
lea 0(,%r9,4),%rax
lea (%r12,%rax),%rsi
lea 8(%r12,%rax),%rcx
mov (%rsi),%edx
mov (%rcx),%eax
cmp %eax,%edx
mov %edx,%edi
cmovg %eax,%edx
cmovg %edi,%eax
mov %edx,(%rsi)
mov %eax,(%rcx)
.L187: lea 1(%r9),%rax
cmp %r13,%rax
jge .L147
salq $2,%r9
lea (%r12,%r9),%rsi
lea 4(%r12,%r9),%rcx
mov (%rsi),%edx
mov (%rcx),%eax
cmp %eax,%edx
mov %edx,%edi
cmovg %eax,%edx
cmovg %edi,%eax
mov %edx,(%rsi)
mov %eax,(%rcx)
.L147: lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.endfn djbsort_avx2,globl,hidden
minmax_vector:
cmp $7,%rdx
jg .L13
.L2: test %rdx,%rdx
jle .L15
mov (%rdi),%ecx
mov (%rsi),%eax
add $4,%rdi
add $4,%rsi
cmp %eax,%ecx
mov %ecx,%r8d
cmovg %eax,%ecx
cmovg %r8d,%eax
decq %rdx
mov %ecx,-4(%rdi)
mov %eax,-4(%rsi)
jmp .L2
.L15: ret
.L13: testb $7,%dl
je .L6
lea -32(,%rdx,4),%rax
andq $-8,%rdx
lea (%rdi,%rax),%rcx
add %rsi,%rax
vmovdqu (%rax),%ymm2
vpminsd (%rcx),%ymm2,%ymm1
vpmaxsd (%rcx),%ymm2,%ymm0
vmovdqu %ymm1,(%rcx)
vmovdqu %ymm0,(%rax)
.L6: xor %eax,%eax
.L7: vmovdqu (%rdi,%rax),%ymm4
vpminsd (%rsi,%rax),%ymm4,%ymm1
vpmaxsd (%rsi,%rax),%ymm4,%ymm0
vmovdqu %ymm1,(%rdi,%rax)
vmovdqu %ymm0,(%rsi,%rax)
add $32,%rax
sub $8,%rdx
jne .L7
ret
.endfn minmax_vector
int32_twostages_32:
sub $-128,%rdi
.L17: lea -128(%rdi),%rax
test %rsi,%rsi
jle .L21
.L18: vmovdqu (%rax),%ymm5
vmovdqu 128(%rax),%ymm7
add $32,%rax
vpminsd 352(%rax),%ymm7,%ymm3
vpminsd 224(%rax),%ymm5,%ymm2
vpmaxsd 224(%rax),%ymm5,%ymm0
vpmaxsd 352(%rax),%ymm7,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,-32(%rax)
vmovdqu %ymm2,96(%rax)
vmovdqu %ymm3,224(%rax)
vmovdqu %ymm0,352(%rax)
cmp %rax,%rdi
jne .L18
add $-128,%rsi
add $512,%rdi
jmp .L17
.L21: ret
.endfn int32_twostages_32
int32_threestages:
push %rbp
imul $-24,%rdx,%r8
lea 0(,%rdx,8),%rax
mov %rsp,%rbp
push %r15
push %r14
push %r13
push %r12
push %rbx
andq $-32,%rsp
sub $64,%rsp
mov %rax,56(%rsp)
lea 0(,%rdx,4),%rax
lea (%rdi,%rax),%rcx
mov %rsi,8(%rsp)
lea (%rcx,%rax),%rsi
lea (%rsi,%rax),%r9
lea (%r9,%rax),%r11
lea (%r11,%rax),%r12
lea (%r12,%rax),%r14
lea (%r14,%rax),%r15
lea (%r15,%r8),%rbx
mov %rbx,40(%rsp)
add %rax,%rbx
lea (%rbx,%rax),%r10
mov %rbx,32(%rsp)
lea (%r10,%rax),%rbx
lea (%rbx,%rax),%r13
lea 0(%r13,%rax),%r8
mov %r8,24(%rsp)
add %r8,%rax
mov %rax,16(%rsp)
xor %eax,%eax
.L23: mov 56(%rsp),%r8
add %rax,%r8
mov %r8,48(%rsp)
cmp 8(%rsp),%r8
jg .L28
.L25: cmp %rdx,%rax
jge .L29
vmovdqu (%rdi,%rax,4),%ymm3
vmovdqu (%rsi,%rax,4),%ymm6
vpminsd (%r11,%rax,4),%ymm3,%ymm7
vpmaxsd (%r11,%rax,4),%ymm3,%ymm4
vpmaxsd (%r14,%rax,4),%ymm6,%ymm0
vmovdqu (%rcx,%rax,4),%ymm3
vmovdqu (%rsi,%rax,4),%ymm5
vpminsd (%r12,%rax,4),%ymm3,%ymm2
vpmaxsd (%r12,%rax,4),%ymm3,%ymm1
vpminsd (%r14,%rax,4),%ymm5,%ymm5
vmovdqu (%r9,%rax,4),%ymm3
vpminsd (%r15,%rax,4),%ymm3,%ymm6
vpmaxsd (%r15,%rax,4),%ymm3,%ymm3
vpminsd %ymm5,%ymm7,%ymm8
mov 40(%rsp),%r8
vpmaxsd %ymm5,%ymm7,%ymm5
vpminsd %ymm6,%ymm2,%ymm7
vpminsd %ymm7,%ymm8,%ymm9
vpmaxsd %ymm6,%ymm2,%ymm2
vpminsd %ymm0,%ymm4,%ymm6
vpmaxsd %ymm0,%ymm4,%ymm0
vmovdqu %ymm9,(%rdi,%rax,4)
vpminsd %ymm3,%ymm1,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpmaxsd %ymm7,%ymm8,%ymm3
vpminsd %ymm2,%ymm5,%ymm7
vmovdqu %ymm3,(%r8,%rax,4)
mov 32(%rsp),%r8
vpmaxsd %ymm2,%ymm5,%ymm2
vpminsd %ymm4,%ymm6,%ymm5
vpmaxsd %ymm4,%ymm6,%ymm6
vpminsd %ymm1,%ymm0,%ymm4
vmovdqu %ymm7,(%r8,%rax,4)
mov 24(%rsp),%r8
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm2,(%r10,%rax,4)
vmovdqu %ymm5,(%rbx,%rax,4)
vmovdqu %ymm6,0(%r13,%rax,4)
vmovdqu %ymm4,(%r8,%rax,4)
mov 16(%rsp),%r8
vmovdqu %ymm0,(%r8,%rax,4)
add $8,%rax
jmp .L25
.L29: mov 48(%rsp),%rax
add 56(%rsp),%rdx
jmp .L23
.L28: lea -40(%rbp),%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
ret
.endfn int32_threestages
merge16_finish:
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm3,%ymm2
vperm2i128 $49,%ymm0,%ymm3,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm2
vpunpckhqdq %ymm0,%ymm1,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm2
vpunpckldq %ymm2,%ymm1,%ymm0
vpunpckhdq %ymm2,%ymm1,%ymm1
vpunpcklqdq %ymm1,%ymm0,%ymm3
vpunpckhqdq %ymm1,%ymm0,%ymm0
vpminsd %ymm3,%ymm0,%ymm2
vpmaxsd %ymm3,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm0
vperm2i128 $32,%ymm0,%ymm1,%ymm2
vperm2i128 $49,%ymm0,%ymm1,%ymm0
test %esi,%esi
je .L31
vpcmpeqd %ymm1,%ymm1,%ymm1
vpxor %ymm1,%ymm2,%ymm2
vpxor %ymm1,%ymm0,%ymm0
.L31: vmovdqu %ymm2,(%rdi)
vmovdqu %ymm0,32(%rdi)
ret
.endfn merge16_finish
int32_sort_2power:
push %r13
lea 16(%rsp),%r13
andq $-32,%rsp
push -8(%r13)
push %rbp
mov %rsp,%rbp
push %r15
push %r14
push %r13
push %r12
mov %rdi,%r12
push %rbx
sub $264,%rsp
mov %edx,-116(%rbp)
cmp $8,%rsi
jne .L36
mov 4(%rdi),%edx
mov (%rdi),%r8d
mov 8(%rdi),%ecx
mov 28(%r12),%r9d
cmp %r8d,%edx
mov %edx,%eax
cmovg %r8d,%edx
cmovg %eax,%r8d
mov 12(%rdi),%eax
cmp %ecx,%eax
mov %eax,%esi
cmovg %ecx,%eax
cmovg %esi,%ecx
cmp %r8d,%ecx
mov %ecx,%esi
cmovg %r8d,%ecx
cmovg %esi,%r8d
cmp %edx,%eax
mov %eax,%esi
cmovg %edx,%eax
cmovg %esi,%edx
mov 20(%rdi),%esi
mov %edx,%r10d
mov 16(%rdi),%edi
cmp %r10d,%ecx
mov %ecx,%edx
cmovg %r10d,%ecx
cmovg %edx,%r10d
cmp %edi,%esi
mov %esi,%edx
cmovg %edi,%esi
cmovg %edx,%edi
mov 24(%r12),%edx
cmp %edx,%r9d
mov %r9d,%r11d
cmovg %edx,%r9d
cmovg %r11d,%edx
cmp %edi,%edx
mov %edx,%r11d
cmovg %edi,%edx
cmovg %r11d,%edi
cmp %esi,%r9d
mov %r9d,%r11d
cmovg %esi,%r9d
cmovg %r11d,%esi
cmp %esi,%edx
mov %edx,%r11d
cmovg %esi,%edx
cmovg %r11d,%esi
cmp %r8d,%edi
mov %edi,%r11d
cmovg %r8d,%edi
cmovg %r11d,%r8d
cmp %ecx,%edx
mov %edx,%r11d
cmovg %ecx,%edx
cmovg %r11d,%ecx
mov %r8d,(%r12)
cmp %ecx,%edi
mov %edi,%r11d
cmovg %ecx,%edi
cmovg %r11d,%ecx
cmp %r10d,%esi
mov %esi,%r11d
cmovg %r10d,%esi
cmovg %r11d,%r10d
cmp %eax,%r9d
mov %r9d,%r11d
cmovg %eax,%r9d
cmovg %r11d,%eax
cmp %eax,%esi
mov %esi,%r11d
cmovg %eax,%esi
cmovg %r11d,%eax
mov %r9d,28(%r12)
cmp %r10d,%ecx
mov %ecx,%r11d
cmovg %r10d,%ecx
cmovg %r11d,%r10d
cmp %eax,%edi
mov %edi,%r11d
cmovg %eax,%edi
cmovg %r11d,%eax
mov %r10d,4(%r12)
cmp %esi,%edx
mov %edx,%r11d
cmovg %esi,%edx
cmovg %r11d,%esi
mov %ecx,8(%r12)
mov %eax,12(%r12)
mov %edi,16(%r12)
mov %esi,20(%r12)
mov %edx,24(%r12)
jmp .L35
.L36: mov %rsi,%r15
cmp $16,%rsi
jne .L38
vmovdqa .LC0(%rip),%ymm0
vpxor 32(%rdi),%ymm0,%ymm2
vpxor (%rdi),%ymm0,%ymm0
vmovdqa .LC1(%rip),%ymm4
cmpl $0,-116(%rbp)
vpunpckldq %ymm2,%ymm0,%ymm1
vpunpckhdq %ymm2,%ymm0,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm3
vpunpckhqdq %ymm0,%ymm1,%ymm1
vpminsd %ymm3,%ymm1,%ymm2
vpmaxsd %ymm3,%ymm1,%ymm1
vpxor %ymm4,%ymm2,%ymm2
vpxor %ymm4,%ymm1,%ymm1
vpunpckldq %ymm1,%ymm2,%ymm0
vpunpckhdq %ymm1,%ymm2,%ymm1
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm1
vpunpcklqdq %ymm1,%ymm3,%ymm2
vpunpckhqdq %ymm1,%ymm3,%ymm3
vpunpckldq %ymm3,%ymm2,%ymm1
vpunpckhdq %ymm3,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm0,%ymm1,%ymm2
vpmaxsd %ymm0,%ymm1,%ymm1
vpunpckldq %ymm1,%ymm2,%ymm0
vpunpckhdq %ymm1,%ymm2,%ymm1
vpxor %ymm4,%ymm1,%ymm1
vpxor %ymm4,%ymm0,%ymm0
vperm2i128 $32,%ymm1,%ymm0,%ymm3
vperm2i128 $49,%ymm1,%ymm0,%ymm0
vpminsd %ymm3,%ymm0,%ymm2
vpmaxsd %ymm3,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm2
vpunpcklqdq %ymm2,%ymm3,%ymm1
vpunpckhqdq %ymm2,%ymm3,%ymm2
vpunpckldq %ymm2,%ymm1,%ymm0
vpunpckhdq %ymm2,%ymm1,%ymm2
vpunpcklqdq %ymm2,%ymm0,%ymm1
vpunpckhqdq %ymm2,%ymm0,%ymm0
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vpunpckldq %ymm0,%ymm2,%ymm1
vpunpckhdq %ymm0,%ymm2,%ymm0
vpunpcklqdq %ymm0,%ymm1,%ymm2
vpunpckhqdq %ymm0,%ymm1,%ymm1
vpcmpeqd %ymm0,%ymm0,%ymm0
je .L39
vpxor %ymm0,%ymm1,%ymm1
jmp .L40
.L39: vpxor %ymm0,%ymm2,%ymm2
.L40: mov -116(%rbp),%esi
vmovdqa %ymm2,%ymm0
mov %r12,%rdi
jmp .L134
.L38: cmp $32,%rsi
jne .L41
mov $1,%edx
mov $16,%esi
lea 64(%r12),%r13
call int32_sort_2power
xor %edx,%edx
mov $16,%esi
mov %r13,%rdi
call int32_sort_2power
cmpl $0,-116(%rbp)
vmovdqu (%r12),%ymm4
vmovdqu 32(%r12),%ymm1
vmovdqu 64(%r12),%ymm2
vmovdqu 96(%r12),%ymm3
je .L42
vpcmpeqd %ymm0,%ymm0,%ymm0
vpxor %ymm0,%ymm4,%ymm4
vpxor %ymm0,%ymm1,%ymm1
vpxor %ymm0,%ymm2,%ymm2
vpxor %ymm0,%ymm3,%ymm3
.L42: mov -116(%rbp),%esi
vpmaxsd %ymm1,%ymm3,%ymm5
vpminsd %ymm4,%ymm2,%ymm0
mov %r12,%rdi
vpmaxsd %ymm4,%ymm2,%ymm4
vpminsd %ymm1,%ymm3,%ymm1
vmovdqa %ymm5,-80(%rbp)
vmovdqa %ymm4,-112(%rbp)
call merge16_finish
vmovdqa -80(%rbp),%ymm5
mov -116(%rbp),%esi
mov %r13,%rdi
vmovdqa -112(%rbp),%ymm4
vmovdqa %ymm5,%ymm1
vmovdqa %ymm4,%ymm0
.L134: add $264,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
lea -16(%r13),%rsp
pop %r13
jmp merge16_finish
.L41: mov %rsi,%rax
sar $3,%rax
mov %rax,-80(%rbp)
lea 0(,%rax,4),%r13
salq $3,%rax
imul $-20,-80(%rbp),%rdx
lea (%rdi,%rax),%rdi
lea (%rdi,%rax),%rsi
lea (%rsi,%rax),%rcx
add %rcx,%rdx
lea (%rdx,%rax),%r9
lea (%r9,%rax),%r8
add %r8,%rax
mov %rax,-136(%rbp)
mov %rax,%r10
xor %eax,%eax
.L43: cmp -80(%rbp),%rax
jge .L135
add $32,%rdi
add $32,%rsi
add $32,%rcx
add $32,%rdx
vmovdqu (%r12,%rax,4),%ymm5
add $32,%r9
add $32,%r8
add $32,%r10
vpminsd -32(%rsi),%ymm5,%ymm4
vpmaxsd -32(%rsi),%ymm5,%ymm2
vmovdqu -32(%rdi),%ymm5
vpminsd -32(%rcx),%ymm5,%ymm1
vpmaxsd -32(%rcx),%ymm5,%ymm0
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vpminsd %ymm4,%ymm1,%ymm2
vpmaxsd %ymm4,%ymm1,%ymm1
vmovdqu %ymm0,(%r12,%rax,4)
add $8,%rax
vpminsd %ymm1,%ymm3,%ymm4
vpmaxsd %ymm1,%ymm3,%ymm1
vmovdqu %ymm4,-32(%rdi)
vmovdqu %ymm1,-32(%rsi)
vmovdqu %ymm2,-32(%rcx)
vmovdqu -32(%r8),%ymm5
vmovdqu -32(%r10),%ymm6
vpminsd -32(%rdx),%ymm5,%ymm1
vpminsd -32(%r9),%ymm6,%ymm3
vpmaxsd -32(%r9),%ymm6,%ymm2
vpmaxsd -32(%rdx),%ymm5,%ymm0
vpminsd %ymm3,%ymm1,%ymm4
vpmaxsd %ymm3,%ymm1,%ymm1
vpminsd %ymm2,%ymm0,%ymm3
vpmaxsd %ymm2,%ymm0,%ymm0
vmovdqu %ymm4,-32(%rdx)
vpminsd %ymm1,%ymm3,%ymm2
vpmaxsd %ymm1,%ymm3,%ymm1
vmovdqu %ymm1,-32(%r9)
vmovdqu %ymm2,-32(%r8)
vmovdqu %ymm0,-32(%r10)
jmp .L43
.L135: imul $-24,-80(%rbp),%rax
mov %rax,-128(%rbp)
cmp $127,%r15
jg .L105
.L63: lea (%r12,%r15,4),%rax
vmovdqa .LC1(%rip),%ymm10
movl $3,-272(%rbp)
mov $4,%r14d
mov %rax,-144(%rbp)
mov %r15,%rax
vmovdqa .LC3(%rip),%ymm11
sar $4,%rax
vmovdqa .LC2(%rip),%ymm12
mov %rax,-112(%rbp)
mov -136(%rbp),%rax
add -128(%rbp),%rax
mov %rax,-200(%rbp)
add %r13,%rax
mov %rax,-192(%rbp)
add %r13,%rax
mov %rax,-184(%rbp)
add %r13,%rax
mov %rax,-176(%rbp)
add %r13,%rax
mov %rax,-168(%rbp)
add %r13,%rax
mov %rax,-160(%rbp)
add %r13,%rax
mov %rax,-152(%rbp)
jmp .L46
.L105: xor %eax,%eax
vpcmpeqd %ymm0,%ymm0,%ymm0
.L45: vpxor 64(%r12,%rax,4),%ymm0,%ymm1
vpxor (%r12,%rax,4),%ymm0,%ymm2
vmovdqu %ymm1,64(%r12,%rax,4)
vmovdqu %ymm2,(%r12,%rax,4)
add $32,%rax
cmp %rax,%r15
jg .L45
mov -136(%rbp),%r14
add -128(%rbp),%r14
mov $8,%ebx
vpcmpeqd %ymm10,%ymm10,%ymm10
lea (%r14,%r13),%rax
mov %rax,-296(%rbp)
add %r13,%rax
lea (%rax,%r13),%r11
mov %rax,-176(%rbp)
lea (%r11,%r13),%rax
mov %rax,-288(%rbp)
add %r13,%rax
mov %rax,-144(%rbp)
add %r13,%rax
mov %rax,-112(%rbp)
add -128(%rbp),%rax
mov %rax,-200(%rbp)
add %r13,%rax
mov %rax,-192(%rbp)
add %r13,%rax
mov %rax,-184(%rbp)
add %r13,%rax
mov %rax,-168(%rbp)
add %r13,%rax
mov %rax,-160(%rbp)
add %r13,%rax
mov %rax,-152(%rbp)
add %r13,%rax
mov %rax,-280(%rbp)
.L64: mov %rbx,%rcx
sarq %rcx
.L47: cmp $127,%rcx
jle .L136
mov %rcx,%rdx
mov %r15,%rsi
mov %r12,%rdi
mov %r11,-272(%rbp)
sar $2,%rdx
mov %rcx,-240(%rbp)
call int32_threestages
mov -240(%rbp),%rcx
mov -272(%rbp),%r11
vpcmpeqd %ymm10,%ymm10,%ymm10
sar $3,%rcx
jmp .L47
.L136: cmp $64,%rcx
jne .L49
mov %r15,%rsi
mov %r12,%rdi
mov %r11,-240(%rbp)
call int32_twostages_32
mov -240(%rbp),%r11
vpcmpeqd %ymm10,%ymm10,%ymm10
.L54: xor %eax,%eax
jmp .L50
.L49: cmp $32,%rcx
jne .L51
mov %r12,%rax
xor %edx,%edx
.L52: vmovdqu (%rax),%ymm7
vmovdqu 32(%rax),%ymm5
add $64,%rdx
add $256,%rax
vpminsd -128(%rax),%ymm7,%ymm8
vpmaxsd -128(%rax),%ymm7,%ymm4
vpminsd -96(%rax),%ymm5,%ymm1
vpmaxsd -96(%rax),%ymm5,%ymm0
vmovdqu -192(%rax),%ymm6
vmovdqu -160(%rax),%ymm7
vpminsd -64(%rax),%ymm6,%ymm5
vpmaxsd -32(%rax),%ymm7,%ymm2
vpmaxsd -64(%rax),%ymm6,%ymm3
vmovdqu -160(%rax),%ymm6
vpminsd -32(%rax),%ymm6,%ymm6
vpminsd %ymm5,%ymm8,%ymm7
vpmaxsd %ymm5,%ymm8,%ymm5
vpminsd %ymm6,%ymm1,%ymm8
vpmaxsd %ymm6,%ymm1,%ymm1
vpminsd %ymm3,%ymm4,%ymm6
vpmaxsd %ymm3,%ymm4,%ymm3
vpminsd %ymm2,%ymm0,%ymm4
vpmaxsd %ymm2,%ymm0,%ymm0
vpminsd %ymm8,%ymm7,%ymm9
vpmaxsd %ymm8,%ymm7,%ymm2
vpminsd %ymm1,%ymm5,%ymm7
vpmaxsd %ymm1,%ymm5,%ymm1
vmovdqu %ymm9,-256(%rax)
vpminsd %ymm4,%ymm6,%ymm5
vpmaxsd %ymm4,%ymm6,%ymm6
vmovdqu %ymm2,-224(%rax)
vpminsd %ymm0,%ymm3,%ymm4
vpmaxsd %ymm0,%ymm3,%ymm3
vmovdqu %ymm5,-128(%rax)
vmovdqu %ymm7,-192(%rax)
vmovdqu %ymm1,-160(%rax)
vmovdqu %ymm6,-96(%rax)
vmovdqu %ymm4,-64(%rax)
vmovdqu %ymm3,-32(%rax)
cmp %rdx,%r15
jg .L52
.L56: lea (%rbx,%rbx),%rdx
xor %ecx,%ecx
cmp -80(%rbp),%rdx
setne %al
sete %cl
mov %rdx,%r8
xor %esi,%esi
movzbl %al,%eax
mov %eax,-204(%rbp)
jmp .L53
.L51: cmp $16,%rcx
jne .L131
jmp .L54
.L50: vmovdqu (%r12,%rax,4),%ymm5
vmovdqu 32(%r12,%rax,4),%ymm6
vpminsd 64(%r12,%rax,4),%ymm5,%ymm2
vpminsd 96(%r12,%rax,4),%ymm6,%ymm3
vpmaxsd 64(%r12,%rax,4),%ymm5,%ymm0
vpmaxsd 96(%r12,%rax,4),%ymm6,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%r12,%rax,4)
vmovdqu %ymm2,32(%r12,%rax,4)
vmovdqu %ymm3,64(%r12,%rax,4)
vmovdqu %ymm0,96(%r12,%rax,4)
add $32,%rax
cmp %rax,%r15
jg .L50
jmp .L56
.L131: cmp $8,%rcx
jne .L56
xor %eax,%eax
.L57: vmovdqu 32(%r12,%rax,4),%ymm7
vpmaxsd (%r12,%rax,4),%ymm7,%ymm0
vpminsd (%r12,%rax,4),%ymm7,%ymm1
vmovdqu %ymm0,32(%r12,%rax,4)
vmovdqu %ymm1,(%r12,%rax,4)
add $16,%rax
cmp %rax,%r15
jg .L57
jmp .L56
.L59: mov -176(%rbp),%r10
vmovdqu (%r12,%rax,4),%ymm5
vpminsd (%r14,%rax,4),%ymm5,%ymm6
vpmaxsd (%r14,%rax,4),%ymm5,%ymm15
vmovdqu (%r10,%rax,4),%ymm5
mov -296(%rbp),%r10
vmovdqu (%r10,%rax,4),%ymm7
mov -288(%rbp),%r10
vmovdqa %ymm5,-240(%rbp)
vmovdqa %ymm7,-272(%rbp)
vmovdqu (%r10,%rax,4),%ymm7
mov -112(%rbp),%r10
vmovdqa -272(%rbp),%ymm5
vpminsd -240(%rbp),%ymm5,%ymm1
vpmaxsd -240(%rbp),%ymm5,%ymm5
vmovdqa %ymm7,-240(%rbp)
vmovdqa -240(%rbp),%ymm4
vpmaxsd (%r11,%rax,4),%ymm4,%ymm0
vmovdqu (%r10,%rax,4),%ymm4
vpminsd %ymm1,%ymm6,%ymm8
mov -144(%rbp),%r10
vmovdqa -240(%rbp),%ymm7
vpmaxsd %ymm1,%ymm6,%ymm6
vpminsd %ymm5,%ymm15,%ymm1
vmovdqa %ymm4,-240(%rbp)
vpminsd (%r11,%rax,4),%ymm7,%ymm7
vpmaxsd %ymm5,%ymm15,%ymm15
vmovdqu (%r10,%rax,4),%ymm4
vmovdqa %ymm4,-272(%rbp)
vmovdqa -272(%rbp),%ymm4
vpminsd -240(%rbp),%ymm4,%ymm3
vpmaxsd -240(%rbp),%ymm4,%ymm4
vpminsd %ymm3,%ymm7,%ymm2
vpmaxsd %ymm3,%ymm7,%ymm3
vpminsd %ymm4,%ymm0,%ymm7
vpmaxsd %ymm4,%ymm0,%ymm0
vpminsd %ymm2,%ymm8,%ymm14
vpminsd %ymm7,%ymm1,%ymm13
vpminsd %ymm3,%ymm6,%ymm12
vpminsd %ymm0,%ymm15,%ymm11
vmovdqa %ymm14,%ymm9
vpmaxsd %ymm3,%ymm6,%ymm6
vpmaxsd %ymm2,%ymm8,%ymm2
vmovdqa %ymm13,%ymm8
vpmaxsd %ymm7,%ymm1,%ymm1
vpmaxsd %ymm0,%ymm15,%ymm0
vmovdqa %ymm6,-240(%rbp)
vmovdqa %ymm2,%ymm5
vmovdqa -240(%rbp),%ymm3
vmovdqa %ymm1,%ymm4
vmovdqa %ymm12,%ymm7
vmovdqa %ymm11,%ymm6
vmovdqa %ymm0,%ymm15
test %ecx,%ecx
je .L58
vpxor %ymm14,%ymm10,%ymm9
vpxor %ymm13,%ymm10,%ymm8
vpxor %ymm12,%ymm10,%ymm7
vpxor %ymm11,%ymm10,%ymm6
vpxor %ymm2,%ymm10,%ymm5
vpxor %ymm1,%ymm10,%ymm4
vpxor %ymm3,%ymm10,%ymm3
vpxor %ymm0,%ymm10,%ymm15
.L58: mov -200(%rbp),%r10
vmovdqu %ymm9,(%r12,%rax,4)
vmovdqu %ymm8,(%r10,%rax,4)
mov -192(%rbp),%r10
vmovdqu %ymm7,(%r10,%rax,4)
mov -184(%rbp),%r10
vmovdqu %ymm6,(%r10,%rax,4)
mov -168(%rbp),%r10
vmovdqu %ymm5,(%r10,%rax,4)
mov -160(%rbp),%r10
vmovdqu %ymm4,(%r10,%rax,4)
mov -152(%rbp),%r10
vmovdqu %ymm3,(%r10,%rax,4)
mov -280(%rbp),%r10
vmovdqu %ymm15,(%r10,%rax,4)
add $8,%rax
.L60: cmp %rax,%rdi
jg .L59
xor $1,%ecx
lea (%rdx,%r9),%rdi
.L62: mov %rdi,%r9
sub %rbx,%r9
mov %r9,%rax
cmp %r9,%r8
jg .L60
xor -204(%rbp),%ecx
add %rdx,%rsi
add %rdx,%r8
.L53: cmp -80(%rbp),%rsi
jge .L61
lea (%rsi,%rbx),%rdi
jmp .L62
.L61: salq $4,%rbx
cmp %r15,%rbx
je .L63
mov %rdx,%rbx
jmp .L64
.L46: cmp $4,%r14
jne .L132
mov %r12,%rax
.L65: cmp -144(%rbp),%rax
je .L72
vpxor 32(%rax),%ymm12,%ymm0
vpxor (%rax),%ymm12,%ymm1
add $64,%rax
vmovdqu %ymm1,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L65
.L72: mov -112(%rbp),%rbx
jmp .L68
.L132: mov %r12,%rax
cmp $2,%r14
jne .L70
.L69: cmp -144(%rbp),%rax
je .L72
vpxor 32(%rax),%ymm10,%ymm2
vpxor (%rax),%ymm10,%ymm1
add $64,%rax
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vmovdqu %ymm1,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L69
.L70: cmp -144(%rbp),%rax
je .L72
vpxor 32(%rax),%ymm11,%ymm2
vpxor (%rax),%ymm11,%ymm1
add $64,%rax
vperm2i128 $32,%ymm2,%ymm1,%ymm0
vperm2i128 $49,%ymm2,%ymm1,%ymm1
vpunpcklqdq %ymm1,%ymm0,%ymm2
vpunpckhqdq %ymm1,%ymm0,%ymm0
vpminsd %ymm0,%ymm2,%ymm1
vpmaxsd %ymm0,%ymm2,%ymm2
vpunpcklqdq %ymm2,%ymm1,%ymm0
vpunpckhqdq %ymm2,%ymm1,%ymm1
vpminsd %ymm1,%ymm0,%ymm2
vpmaxsd %ymm1,%ymm0,%ymm0
vperm2i128 $32,%ymm0,%ymm2,%ymm1
vperm2i128 $49,%ymm0,%ymm2,%ymm0
vmovdqu %ymm1,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L70
.L137: cmp $32,%rbx
jne .L75
.L74: mov %rbx,%rdx
mov %r15,%rsi
mov %r12,%rdi
sar $3,%rbx
sar $2,%rdx
call int32_threestages
vmovdqa .LC2(%rip),%ymm12
vmovdqa .LC3(%rip),%ymm11
vmovdqa .LC1(%rip),%ymm10
.L68: cmp $127,%rbx
jle .L137
jmp .L74
.L139: sar $2,%rbx
.L75: cmp $15,%rbx
jle .L138
mov %rbx,%rcx
xor %esi,%esi
sarq %rcx
imul $-8,%rcx,%rdi
lea 0(,%rcx,4),%rdx
lea (%r12,%rdx),%r11
lea (%r11,%rdx),%r10
lea (%r10,%rdx),%r8
lea (%rdi,%r8),%rax
lea (%rax,%rdx),%r9
mov %rax,-136(%rbp)
lea (%r9,%rdx),%rax
mov %rax,-240(%rbp)
.L76: cmp %r15,%rsi
jge .L139
mov %rsi,%rax
.L78: cmp %rcx,%rax
jge .L140
vmovdqu (%r12,%rax,4),%ymm6
vmovdqu (%r11,%rax,4),%ymm5
vpminsd (%r10,%rax,4),%ymm6,%ymm2
vpminsd (%r8,%rax,4),%ymm5,%ymm3
mov -136(%rbp),%rdi
vpmaxsd (%r10,%rax,4),%ymm6,%ymm0
vpmaxsd (%r8,%rax,4),%ymm5,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vmovdqu %ymm4,(%r12,%rax,4)
vmovdqu %ymm2,(%rdi,%rax,4)
mov -240(%rbp),%rdi
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm3,(%r9,%rax,4)
vmovdqu %ymm0,(%rdi,%rax,4)
add $8,%rax
jmp .L78
.L140: add %rdx,%rsi
add %rdx,%rcx
jmp .L76
.L138: cmp $8,%rbx
je .L109
.L83: mov -152(%rbp),%rdx
mov -160(%rbp),%rcx
xor %eax,%eax
mov -168(%rbp),%rsi
mov -176(%rbp),%rdi
mov -184(%rbp),%r8
mov -192(%rbp),%r9
mov -200(%rbp),%r10
jmp .L81
.L109: xor %eax,%eax
.L80: cmp %r15,%rax
jge .L83
vmovdqu (%r12,%rax,4),%ymm5
vpminsd 32(%r12,%rax,4),%ymm5,%ymm1
vpmaxsd 32(%r12,%rax,4),%ymm5,%ymm0
vmovdqu %ymm1,(%r12,%rax,4)
vmovdqu %ymm0,32(%r12,%rax,4)
add $16,%rax
jmp .L80
.L81: cmp -80(%rbp),%rax
jge .L141
vmovdqu (%rdi),%ymm7
add $32,%r10
add $32,%r9
add $32,%r8
add $32,%rdi
add $32,%rsi
add $32,%rcx
add $32,%rdx
vmovdqu (%r12,%rax,4),%ymm5
vmovdqu -32(%r9),%ymm6
vpminsd -32(%r10),%ymm5,%ymm3
vpmaxsd -32(%r10),%ymm5,%ymm1
vpminsd -32(%r8),%ymm6,%ymm2
vpmaxsd -32(%r8),%ymm6,%ymm0
vpminsd -32(%rsi),%ymm7,%ymm7
vmovdqu -32(%rcx),%ymm5
vmovdqu -32(%rdi),%ymm6
vpmaxsd -32(%rdx),%ymm5,%ymm4
vpminsd %ymm2,%ymm3,%ymm9
vpmaxsd -32(%rsi),%ymm6,%ymm8
vpminsd -32(%rdx),%ymm5,%ymm6
vpminsd %ymm0,%ymm1,%ymm13
vpmaxsd %ymm2,%ymm3,%ymm2
vpminsd %ymm6,%ymm7,%ymm5
vpminsd %ymm4,%ymm8,%ymm3
vpmaxsd %ymm6,%ymm7,%ymm6
vpmaxsd %ymm0,%ymm1,%ymm0
vpmaxsd %ymm4,%ymm8,%ymm4
vpminsd %ymm5,%ymm9,%ymm1
vpminsd %ymm6,%ymm2,%ymm8
vpminsd %ymm3,%ymm13,%ymm7
vmovdqu %ymm1,(%r12,%rax,4)
add $8,%rax
vpmaxsd %ymm6,%ymm2,%ymm2
vpmaxsd %ymm5,%ymm9,%ymm5
vmovdqu %ymm7,-32(%r10)
vpminsd %ymm4,%ymm0,%ymm6
vpmaxsd %ymm3,%ymm13,%ymm3
vmovdqu %ymm8,-32(%r9)
vpmaxsd %ymm4,%ymm0,%ymm0
vmovdqu %ymm6,-32(%r8)
vmovdqu %ymm5,-32(%rdi)
vmovdqu %ymm3,-32(%rsi)
vmovdqu %ymm2,-32(%rcx)
vmovdqu %ymm0,-32(%rdx)
jmp .L81
.L141: sarq %r14
decl -272(%rbp)
jne .L46
mov %r12,%rax
xor %edx,%edx
vpcmpeqd %ymm5,%ymm5,%ymm5
.L85: cmp %r15,%rdx
jge .L89
vmovdqu (%rax),%ymm7
vpunpckldq 32(%rax),%ymm7,%ymm12
vpunpckhdq 32(%rax),%ymm7,%ymm6
vmovdqu 64(%rax),%ymm7
vpunpckldq 96(%rax),%ymm7,%ymm2
vpunpckhdq 96(%rax),%ymm7,%ymm4
vmovdqu 128(%rax),%ymm7
vpunpckldq 160(%rax),%ymm7,%ymm1
vpunpckhdq 160(%rax),%ymm7,%ymm0
vpunpcklqdq %ymm2,%ymm12,%ymm8
vpunpcklqdq %ymm4,%ymm6,%ymm9
cmpl $0,-116(%rbp)
vmovdqu 192(%rax),%ymm7
vpunpckhqdq %ymm2,%ymm12,%ymm12
vpunpckhqdq %ymm4,%ymm6,%ymm4
vpunpckldq 224(%rax),%ymm7,%ymm10
vpunpckhdq 224(%rax),%ymm7,%ymm3
vpunpcklqdq %ymm10,%ymm1,%ymm11
vpunpckhqdq %ymm10,%ymm1,%ymm1
vpunpcklqdq %ymm3,%ymm0,%ymm7
vpunpckhqdq %ymm3,%ymm0,%ymm0
je .L86
vpxor %ymm5,%ymm12,%ymm12
vpxor %ymm5,%ymm4,%ymm4
vpxor %ymm5,%ymm1,%ymm1
vpxor %ymm5,%ymm0,%ymm0
jmp .L87
.L86: vpxor %ymm5,%ymm8,%ymm8
vpxor %ymm5,%ymm9,%ymm9
vpxor %ymm5,%ymm11,%ymm11
vpxor %ymm5,%ymm7,%ymm7
.L87: vperm2i128 $32,%ymm11,%ymm8,%ymm3
vperm2i128 $32,%ymm1,%ymm12,%ymm6
vperm2i128 $32,%ymm7,%ymm9,%ymm10
add $64,%rdx
vperm2i128 $32,%ymm0,%ymm4,%ymm13
vperm2i128 $49,%ymm11,%ymm8,%ymm11
vperm2i128 $49,%ymm7,%ymm9,%ymm9
add $256,%rax
vperm2i128 $49,%ymm1,%ymm12,%ymm1
vperm2i128 $49,%ymm0,%ymm4,%ymm0
vpmaxsd %ymm6,%ymm3,%ymm2
vpminsd %ymm6,%ymm3,%ymm4
vpminsd %ymm1,%ymm11,%ymm7
vpmaxsd %ymm13,%ymm10,%ymm3
vpminsd %ymm13,%ymm10,%ymm8
vpmaxsd %ymm1,%ymm11,%ymm1
vpminsd %ymm0,%ymm9,%ymm10
vpmaxsd %ymm0,%ymm9,%ymm0
vpminsd %ymm8,%ymm4,%ymm11
vpminsd %ymm3,%ymm2,%ymm9
vpmaxsd %ymm8,%ymm4,%ymm8
vpminsd %ymm10,%ymm7,%ymm6
vpmaxsd %ymm10,%ymm7,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm0,%ymm1,%ymm3
vpmaxsd %ymm0,%ymm1,%ymm1
vpminsd %ymm6,%ymm11,%ymm10
vpmaxsd %ymm6,%ymm11,%ymm0
vpminsd %ymm3,%ymm9,%ymm7
vpmaxsd %ymm3,%ymm9,%ymm6
vpminsd %ymm4,%ymm8,%ymm3
vpminsd %ymm1,%ymm2,%ymm9
vpmaxsd %ymm4,%ymm8,%ymm4
vpunpckldq %ymm7,%ymm10,%ymm8
vpmaxsd %ymm1,%ymm2,%ymm2
vpunpckhdq %ymm7,%ymm10,%ymm7
vpunpckldq %ymm9,%ymm3,%ymm1
vpunpckhdq %ymm9,%ymm3,%ymm3
vpunpckldq %ymm6,%ymm0,%ymm9
vpunpckhdq %ymm6,%ymm0,%ymm6
vpunpckldq %ymm2,%ymm4,%ymm0
vpunpckhdq %ymm2,%ymm4,%ymm2
vpunpcklqdq %ymm3,%ymm7,%ymm10
vpunpcklqdq %ymm1,%ymm8,%ymm4
vpunpcklqdq %ymm0,%ymm9,%ymm13
vpunpckhqdq %ymm1,%ymm8,%ymm8
vpunpckhqdq %ymm3,%ymm7,%ymm3
vpunpckhqdq %ymm0,%ymm9,%ymm1
vpunpcklqdq %ymm2,%ymm6,%ymm7
vpunpckhqdq %ymm2,%ymm6,%ymm0
vperm2i128 $32,%ymm13,%ymm4,%ymm12
vperm2i128 $32,%ymm1,%ymm8,%ymm11
vperm2i128 $32,%ymm0,%ymm3,%ymm6
vperm2i128 $32,%ymm7,%ymm10,%ymm9
vperm2i128 $49,%ymm13,%ymm4,%ymm4
vmovdqu %ymm12,-256(%rax)
vperm2i128 $49,%ymm1,%ymm8,%ymm1
vperm2i128 $49,%ymm7,%ymm10,%ymm2
vperm2i128 $49,%ymm0,%ymm3,%ymm0
vmovdqu %ymm11,-224(%rax)
vmovdqu %ymm9,-192(%rax)
vmovdqu %ymm6,-160(%rax)
vmovdqu %ymm4,-128(%rax)
vmovdqu %ymm1,-96(%rax)
vmovdqu %ymm2,-64(%rax)
vmovdqu %ymm0,-32(%rax)
jmp .L85
.L142: cmpq $32,-112(%rbp)
jne .L94
.L93: mov -112(%rbp),%rcx
sar $2,%rcx
lea 0(,%rcx,4),%rdx
lea 0(,%rcx,8),%rax
mov %rcx,-136(%rbp)
lea (%r12,%rdx),%r9
mov %rax,-184(%rbp)
imul $-24,%rcx,%rax
lea (%r9,%rdx),%r14
lea (%r14,%rdx),%rsi
lea (%rsi,%rdx),%rbx
lea (%rbx,%rdx),%r10
lea (%r10,%rdx),%r8
lea (%r8,%rdx),%rdi
add %rdi,%rax
mov %rax,-176(%rbp)
add %rdx,%rax
mov %rax,-168(%rbp)
add %rdx,%rax
lea (%rax,%rdx),%r11
mov %rax,-160(%rbp)
lea (%r11,%rdx),%rax
mov %rax,-200(%rbp)
add %rdx,%rax
add %rax,%rdx
mov %rax,-144(%rbp)
mov %rdx,-192(%rbp)
.L90: mov -136(%rbp),%rax
sub %rcx,%rax
cmp %rax,%r15
jg .L92
sarq $3,-112(%rbp)
.L89: cmpq $127,-112(%rbp)
jle .L142
jmp .L93
.L92: cmp -136(%rbp),%rax
jge .L143
vmovdqu (%r12,%rax,4),%ymm6
vpminsd (%rbx,%rax,4),%ymm6,%ymm7
vpmaxsd (%rbx,%rax,4),%ymm6,%ymm4
vmovdqu (%r9,%rax,4),%ymm6
vpminsd (%r10,%rax,4),%ymm6,%ymm1
vpmaxsd (%r10,%rax,4),%ymm6,%ymm0
vmovdqu (%r14,%rax,4),%ymm6
vpminsd (%r8,%rax,4),%ymm6,%ymm5
vpmaxsd (%r8,%rax,4),%ymm6,%ymm3
vmovdqu (%rsi,%rax,4),%ymm6
vpminsd (%rdi,%rax,4),%ymm6,%ymm6
vpminsd %ymm5,%ymm7,%ymm9
vmovdqu (%rsi,%rax,4),%ymm2
vpmaxsd %ymm5,%ymm7,%ymm5
mov -176(%rbp),%rdx
vpminsd %ymm3,%ymm4,%ymm8
vpminsd %ymm6,%ymm1,%ymm7
vpmaxsd %ymm3,%ymm4,%ymm3
vpminsd %ymm7,%ymm9,%ymm10
vpmaxsd %ymm7,%ymm9,%ymm4
vpmaxsd (%rdi,%rax,4),%ymm2,%ymm2
vpmaxsd %ymm6,%ymm1,%ymm1
vmovdqu %ymm10,(%r12,%rax,4)
vmovdqu %ymm4,(%rdx,%rax,4)
mov -168(%rbp),%rdx
vpminsd %ymm1,%ymm5,%ymm9
vpmaxsd %ymm1,%ymm5,%ymm1
vpminsd %ymm2,%ymm0,%ymm6
vpmaxsd %ymm2,%ymm0,%ymm0
vmovdqu %ymm9,(%rdx,%rax,4)
vpminsd %ymm6,%ymm8,%ymm7
vpmaxsd %ymm6,%ymm8,%ymm2
mov -160(%rbp),%rdx
vpminsd %ymm0,%ymm3,%ymm5
vpmaxsd %ymm0,%ymm3,%ymm3
vmovdqu %ymm1,(%rdx,%rax,4)
mov -200(%rbp),%rdx
vmovdqu %ymm7,(%r11,%rax,4)
vmovdqu %ymm2,(%rdx,%rax,4)
mov -144(%rbp),%rdx
vmovdqu %ymm5,(%rdx,%rax,4)
mov -192(%rbp),%rdx
vmovdqu %ymm3,(%rdx,%rax,4)
add $8,%rax
jmp .L92
.L143: mov -184(%rbp),%rdx
add %rdx,-136(%rbp)
jmp .L90
.L145: sarq $2,-112(%rbp)
.L94: cmpq $15,-112(%rbp)
jle .L144
mov -112(%rbp),%rcx
xor %esi,%esi
sarq %rcx
imul $-8,%rcx,%rdi
lea 0(,%rcx,4),%rdx
lea (%r12,%rdx),%r11
lea (%r11,%rdx),%r10
lea (%r10,%rdx),%r8
add %r8,%rdi
lea (%rdi,%rdx),%r9
lea (%r9,%rdx),%rbx
.L95: cmp %r15,%rsi
jge .L145
mov %rsi,%rax
.L97: cmp %rcx,%rax
jge .L146
vmovdqu (%r12,%rax,4),%ymm5
vpminsd (%r10,%rax,4),%ymm5,%ymm2
vpmaxsd (%r10,%rax,4),%ymm5,%ymm0
vmovdqu (%r11,%rax,4),%ymm5
vpminsd (%r8,%rax,4),%ymm5,%ymm3
vpmaxsd (%r8,%rax,4),%ymm5,%ymm1
vpminsd %ymm3,%ymm2,%ymm4
vpmaxsd %ymm3,%ymm2,%ymm2
vpminsd %ymm1,%ymm0,%ymm3
vpmaxsd %ymm1,%ymm0,%ymm0
vmovdqu %ymm4,(%r12,%rax,4)
vmovdqu %ymm2,(%rdi,%rax,4)
vmovdqu %ymm3,(%r9,%rax,4)
vmovdqu %ymm0,(%rbx,%rax,4)
add $8,%rax
jmp .L97
.L146: add %rdx,%rsi
add %rdx,%rcx
jmp .L95
.L144: cmpq $8,-112(%rbp)
je .L111
.L102: mov -152(%rbp),%rdx
add -128(%rbp),%rdx
xor %ecx,%ecx
vpcmpeqd %ymm6,%ymm6,%ymm6
lea (%rdx,%r13),%r10
lea (%r10,%r13),%r9
lea (%r9,%r13),%r8
lea (%r8,%r13),%rdi
lea (%rdi,%r13),%rsi
lea (%rsi,%r13),%rax
jmp .L100
.L111: xor %eax,%eax
.L99: cmp %r15,%rax
jge .L102
vmovdqu (%r12,%rax,4),%ymm5
vpminsd 32(%r12,%rax,4),%ymm5,%ymm1
vpmaxsd 32(%r12,%rax,4),%ymm5,%ymm0
vmovdqu %ymm1,(%r12,%rax,4)
vmovdqu %ymm0,32(%r12,%rax,4)
add $16,%rax
jmp .L99
.L104: vmovdqu (%r10),%ymm7
vmovdqu (%r12,%rcx,4),%ymm4
vpminsd (%r9),%ymm7,%ymm3
vpminsd (%rdx),%ymm4,%ymm5
vpmaxsd (%r9),%ymm7,%ymm2
vpmaxsd (%rdx),%ymm4,%ymm4
vmovdqu (%r8),%ymm7
vmovdqu (%rsi),%ymm14
vpminsd %ymm3,%ymm5,%ymm11
vpmaxsd %ymm3,%ymm5,%ymm3
vpminsd (%rdi),%ymm7,%ymm1
vpminsd %ymm2,%ymm4,%ymm10
cmpl $0,-116(%rbp)
vpmaxsd (%rdi),%ymm7,%ymm0
vmovdqu (%rsi),%ymm7
vpmaxsd %ymm2,%ymm4,%ymm2
vpminsd (%rax),%ymm7,%ymm7
vpmaxsd (%rax),%ymm14,%ymm9
vpminsd %ymm7,%ymm1,%ymm8
vpmaxsd %ymm7,%ymm1,%ymm1
vpminsd %ymm9,%ymm0,%ymm7
vpmaxsd %ymm9,%ymm0,%ymm0
vpminsd %ymm8,%ymm11,%ymm5
vpminsd %ymm1,%ymm3,%ymm9
vpminsd %ymm7,%ymm10,%ymm12
vpmaxsd %ymm1,%ymm3,%ymm3
vpminsd %ymm0,%ymm2,%ymm4
vpmaxsd %ymm8,%ymm11,%ymm8
vpmaxsd %ymm0,%ymm2,%ymm2
vpmaxsd %ymm7,%ymm10,%ymm7
vpunpckldq %ymm8,%ymm5,%ymm11
vpunpckldq %ymm7,%ymm12,%ymm10
vpunpckhdq %ymm8,%ymm5,%ymm8
vpunpckhdq %ymm7,%ymm12,%ymm7
vpunpckhdq %ymm3,%ymm9,%ymm5
vpunpckldq %ymm2,%ymm4,%ymm1
vpunpckldq %ymm3,%ymm9,%ymm0
vpunpckhdq %ymm2,%ymm4,%ymm4
vpunpcklqdq %ymm0,%ymm11,%ymm3
vpunpckhqdq %ymm0,%ymm11,%ymm9
vpunpcklqdq %ymm5,%ymm8,%ymm2
vpunpcklqdq %ymm4,%ymm7,%ymm11
vpunpckhqdq %ymm5,%ymm8,%ymm5
vpunpcklqdq %ymm1,%ymm10,%ymm12
vpunpckhqdq %ymm4,%ymm7,%ymm0
vpunpckhqdq %ymm1,%ymm10,%ymm1
vperm2i128 $32,%ymm11,%ymm2,%ymm8
vperm2i128 $32,%ymm12,%ymm3,%ymm10
vperm2i128 $32,%ymm1,%ymm9,%ymm7
vperm2i128 $32,%ymm0,%ymm5,%ymm4
vperm2i128 $49,%ymm12,%ymm3,%ymm3
vperm2i128 $49,%ymm11,%ymm2,%ymm2
vperm2i128 $49,%ymm1,%ymm9,%ymm1
vperm2i128 $49,%ymm0,%ymm5,%ymm0
je .L103
vpxor %ymm6,%ymm10,%ymm10
vpxor %ymm6,%ymm8,%ymm8
vpxor %ymm6,%ymm7,%ymm7
vpxor %ymm6,%ymm4,%ymm4
vpxor %ymm6,%ymm3,%ymm3
vpxor %ymm6,%ymm2,%ymm2
vpxor %ymm6,%ymm1,%ymm1
vpxor %ymm6,%ymm0,%ymm0
.L103: add $32,%rdx
add $32,%r10
add $32,%r9
add $32,%r8
vmovdqu %ymm10,(%r12,%rcx,4)
add $32,%rdi
add $8,%rcx
add $32,%rsi
vmovdqu %ymm3,-32(%rdx)
add $32,%rax
vmovdqu %ymm8,-32(%r10)
vmovdqu %ymm2,-32(%r9)
vmovdqu %ymm7,-32(%r8)
vmovdqu %ymm1,-32(%rdi)
vmovdqu %ymm4,-32(%rsi)
vmovdqu %ymm0,-32(%rax)
.L100: cmp -80(%rbp),%rcx
jl .L104
.L35: add $264,%rsp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
pop %rbp
lea -16(%r13),%rsp
pop %r13
ret
.endfn int32_sort_2power
.rodata.cst32
.LC0: .quad -1,0,-1,0
.LC1: .quad 0,-1,-1,0
.LC2: .quad -1,-1,0,0
.LC3: .quad -4294967296,4294967295,-4294967296,4294967295
.LC4: .quad 0x7fffffff7fffffff
.quad 0x7fffffff7fffffff
.quad 0x7fffffff7fffffff
.quad 0x7fffffff7fffffff