mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-12 05:59:10 +00:00
Initial import
This commit is contained in:
commit
c91b3c5006
14915 changed files with 590219 additions and 0 deletions
427
third_party/stb/idct-sse.S
vendored
Normal file
427
third_party/stb/idct-sse.S
vendored
Normal file
|
@ -0,0 +1,427 @@
|
|||
/*-*- mode:asm; indent-tabs-mode:t; tab-width:8; coding:utf-8 -*-│
|
||||
│vi: set et ft=asm ts=8 tw=8 fenc=utf-8 :vi│
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2020 Justine Alexandra Roberts Tunney │
|
||||
│ │
|
||||
│ This program is free software; you can redistribute it and/or modify │
|
||||
│ it under the terms of the GNU General Public License as published by │
|
||||
│ the Free Software Foundation; version 2 of the License. │
|
||||
│ │
|
||||
│ This program is distributed in the hope that it will be useful, but │
|
||||
│ WITHOUT ANY WARRANTY; without even the implied warranty of │
|
||||
│ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU │
|
||||
│ General Public License for more details. │
|
||||
│ │
|
||||
│ You should have received a copy of the GNU General Public License │
|
||||
│ along with this program; if not, write to the Free Software │
|
||||
│ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA │
|
||||
│ 02110-1301 USA │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/macros.h"
|
||||
|
||||
/ Computes inverse discrete cosine transform.
|
||||
/
|
||||
/ @note used to decode jpeg
|
||||
.p2align 4
|
||||
stbi__idct_simd$sse:
|
||||
push %rbp
|
||||
mov %rsp,%rbp
|
||||
movslq %esi,%rsi
|
||||
lea (%rdi,%rsi),%rax
|
||||
sub $96,%rsp
|
||||
movdqa 32(%rdx),%xmm0
|
||||
movdqa 112(%rdx),%xmm9
|
||||
movdqa 48(%rdx),%xmm1
|
||||
movdqa 80(%rdx),%xmm7
|
||||
movdqa %xmm0,%xmm2
|
||||
punpcklwd 96(%rdx),%xmm2
|
||||
punpckhwd 96(%rdx),%xmm0
|
||||
movdqa %xmm9,%xmm8
|
||||
movdqa 16(%rdx),%xmm5
|
||||
movdqa %xmm2,%xmm3
|
||||
movdqa %xmm2,%xmm6
|
||||
movdqa %xmm0,%xmm2
|
||||
pmaddwd .LC1(%rip),%xmm3
|
||||
movdqa %xmm0,%xmm4
|
||||
pmaddwd .LC1(%rip),%xmm2
|
||||
pmaddwd .LC0(%rip),%xmm4
|
||||
punpckhwd %xmm1,%xmm8
|
||||
pmaddwd .LC0(%rip),%xmm6
|
||||
movaps %xmm3,-48(%rbp)
|
||||
movdqa (%rdx),%xmm3
|
||||
movaps %xmm2,-64(%rbp)
|
||||
movdqa 64(%rdx),%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
movaps %xmm4,-32(%rbp)
|
||||
paddw %xmm2,%xmm0
|
||||
psubw %xmm2,%xmm3
|
||||
movaps %xmm6,-16(%rbp)
|
||||
movdqa %xmm0,%xmm4
|
||||
pxor %xmm0,%xmm0
|
||||
movdqa %xmm0,%xmm11
|
||||
movdqa %xmm0,%xmm12
|
||||
movdqa %xmm0,%xmm2
|
||||
punpcklwd %xmm4,%xmm11
|
||||
punpckhwd %xmm3,%xmm12
|
||||
punpcklwd %xmm3,%xmm2
|
||||
movdqa %xmm11,%xmm13
|
||||
movdqa %xmm0,%xmm11
|
||||
movdqa %xmm12,%xmm3
|
||||
punpckhwd %xmm4,%xmm11
|
||||
movdqa %xmm8,%xmm12
|
||||
movdqa %xmm8,%xmm4
|
||||
movdqa %xmm11,%xmm14
|
||||
movdqa %xmm7,%xmm8
|
||||
movdqa %xmm9,%xmm11
|
||||
punpckhwd %xmm5,%xmm8
|
||||
psrad $4,%xmm3
|
||||
punpcklwd %xmm1,%xmm11
|
||||
psrad $4,%xmm13
|
||||
psrad $4,%xmm14
|
||||
movdqa %xmm11,%xmm15
|
||||
movaps %xmm13,-80(%rbp)
|
||||
movdqa %xmm8,%xmm6
|
||||
paddw %xmm7,%xmm1
|
||||
pmaddwd .LC3(%rip),%xmm15
|
||||
movaps %xmm14,-96(%rbp)
|
||||
movdqa %xmm8,%xmm14
|
||||
movdqa %xmm5,%xmm8
|
||||
pmaddwd .LC2(%rip),%xmm11
|
||||
pmaddwd .LC2(%rip),%xmm12
|
||||
paddw %xmm9,%xmm8
|
||||
psrad $4,%xmm2
|
||||
pmaddwd .LC3(%rip),%xmm4
|
||||
pmaddwd .LC5(%rip),%xmm6
|
||||
pmaddwd .LC4(%rip),%xmm14
|
||||
movdqa %xmm4,%xmm10
|
||||
movdqa %xmm7,%xmm4
|
||||
movdqa %xmm8,%xmm7
|
||||
punpcklwd %xmm5,%xmm4
|
||||
punpcklwd %xmm1,%xmm7
|
||||
punpckhwd %xmm1,%xmm8
|
||||
movdqa %xmm4,%xmm13
|
||||
movdqa %xmm7,%xmm9
|
||||
pmaddwd .LC5(%rip),%xmm4
|
||||
pmaddwd .LC6(%rip),%xmm9
|
||||
movdqa %xmm8,%xmm5
|
||||
movdqa %xmm7,%xmm1
|
||||
pmaddwd .LC7(%rip),%xmm8
|
||||
pmaddwd .LC6(%rip),%xmm5
|
||||
movdqa %xmm15,%xmm7
|
||||
paddd %xmm9,%xmm11
|
||||
paddd %xmm9,%xmm4
|
||||
movdqa .LC8(%rip),%xmm9
|
||||
paddd %xmm8,%xmm14
|
||||
paddd %xmm10,%xmm8
|
||||
movdqa -96(%rbp),%xmm10
|
||||
paddd -64(%rbp),%xmm10
|
||||
pmaddwd .LC7(%rip),%xmm1
|
||||
pmaddwd .LC4(%rip),%xmm13
|
||||
paddd %xmm5,%xmm12
|
||||
paddd %xmm5,%xmm6
|
||||
paddd %xmm9,%xmm10
|
||||
movdqa -80(%rbp),%xmm5
|
||||
paddd -48(%rbp),%xmm5
|
||||
paddd %xmm1,%xmm13
|
||||
paddd %xmm1,%xmm7
|
||||
movdqa %xmm10,%xmm1
|
||||
psubd %xmm6,%xmm10
|
||||
paddd %xmm9,%xmm5
|
||||
paddd %xmm6,%xmm1
|
||||
psrad $10,%xmm10
|
||||
movdqa -16(%rbp),%xmm6
|
||||
movdqa %xmm1,%xmm15
|
||||
movdqa %xmm5,%xmm1
|
||||
psubd %xmm4,%xmm5
|
||||
psrad $10,%xmm5
|
||||
paddd %xmm4,%xmm1
|
||||
paddd %xmm2,%xmm6
|
||||
packssdw %xmm10,%xmm5
|
||||
movdqa -32(%rbp),%xmm10
|
||||
paddd %xmm9,%xmm6
|
||||
paddd %xmm9,%xmm2
|
||||
psrad $10,%xmm15
|
||||
psrad $10,%xmm1
|
||||
psubd -16(%rbp),%xmm2
|
||||
paddd %xmm3,%xmm10
|
||||
paddd %xmm9,%xmm3
|
||||
packssdw %xmm15,%xmm1
|
||||
paddd %xmm9,%xmm10
|
||||
psubd -32(%rbp),%xmm3
|
||||
movdqa %xmm10,%xmm4
|
||||
psubd %xmm8,%xmm10
|
||||
paddd %xmm8,%xmm4
|
||||
psrad $10,%xmm10
|
||||
movdqa %xmm4,%xmm15
|
||||
movdqa %xmm6,%xmm4
|
||||
psubd %xmm7,%xmm6
|
||||
psrad $10,%xmm6
|
||||
psrad $10,%xmm15
|
||||
paddd %xmm7,%xmm4
|
||||
movdqa %xmm3,%xmm7
|
||||
psubd %xmm14,%xmm3
|
||||
packssdw %xmm10,%xmm6
|
||||
psrad $10,%xmm3
|
||||
psrad $10,%xmm4
|
||||
paddd %xmm14,%xmm7
|
||||
movdqa %xmm7,%xmm8
|
||||
movdqa %xmm2,%xmm7
|
||||
psubd %xmm13,%xmm2
|
||||
paddd %xmm13,%xmm7
|
||||
psrad $10,%xmm8
|
||||
packssdw %xmm15,%xmm4
|
||||
psrad $10,%xmm7
|
||||
psrad $10,%xmm2
|
||||
packssdw %xmm8,%xmm7
|
||||
movdqa -80(%rbp),%xmm8
|
||||
packssdw %xmm3,%xmm2
|
||||
paddd %xmm9,%xmm8
|
||||
paddd -96(%rbp),%xmm9
|
||||
psubd -48(%rbp),%xmm8
|
||||
psubd -64(%rbp),%xmm9
|
||||
movdqa %xmm8,%xmm3
|
||||
movdqa %xmm9,%xmm10
|
||||
psubd %xmm11,%xmm8
|
||||
paddd %xmm12,%xmm10
|
||||
paddd %xmm11,%xmm3
|
||||
psrad $10,%xmm8
|
||||
psrad $10,%xmm10
|
||||
psrad $10,%xmm3
|
||||
psubd %xmm12,%xmm9
|
||||
psrad $10,%xmm9
|
||||
packssdw %xmm10,%xmm3
|
||||
movdqa %xmm1,%xmm10
|
||||
packssdw %xmm9,%xmm8
|
||||
movdqa %xmm7,%xmm9
|
||||
punpckhwd %xmm6,%xmm7
|
||||
punpcklwd %xmm6,%xmm9
|
||||
punpcklwd %xmm8,%xmm10
|
||||
punpckhwd %xmm8,%xmm1
|
||||
movdqa %xmm3,%xmm6
|
||||
movdqa %xmm4,%xmm8
|
||||
punpckhwd %xmm5,%xmm3
|
||||
punpcklwd %xmm5,%xmm6
|
||||
punpcklwd %xmm2,%xmm8
|
||||
movdqa %xmm3,%xmm5
|
||||
punpckhwd %xmm2,%xmm4
|
||||
movdqa %xmm8,%xmm3
|
||||
movdqa %xmm10,%xmm2
|
||||
punpckhwd %xmm6,%xmm8
|
||||
punpcklwd %xmm6,%xmm3
|
||||
punpcklwd %xmm9,%xmm2
|
||||
movdqa %xmm8,%xmm6
|
||||
movdqa %xmm4,%xmm8
|
||||
punpckhwd %xmm9,%xmm10
|
||||
punpcklwd %xmm5,%xmm8
|
||||
punpckhwd %xmm5,%xmm4
|
||||
movdqa %xmm2,%xmm5
|
||||
punpcklwd %xmm3,%xmm5
|
||||
punpckhwd %xmm3,%xmm2
|
||||
movdqa %xmm1,%xmm15
|
||||
movdqa %xmm10,%xmm3
|
||||
punpckhwd %xmm7,%xmm1
|
||||
punpckhwd %xmm6,%xmm10
|
||||
punpcklwd %xmm6,%xmm3
|
||||
movdqa %xmm1,%xmm6
|
||||
punpckhwd %xmm4,%xmm1
|
||||
punpcklwd %xmm4,%xmm6
|
||||
movdqa %xmm3,%xmm4
|
||||
punpcklwd %xmm7,%xmm15
|
||||
punpcklwd %xmm6,%xmm4
|
||||
punpckhwd %xmm6,%xmm3
|
||||
movdqa %xmm15,%xmm7
|
||||
movdqa %xmm4,%xmm6
|
||||
punpcklwd %xmm8,%xmm7
|
||||
movdqa %xmm3,%xmm11
|
||||
movdqa %xmm4,%xmm12
|
||||
movdqa %xmm3,%xmm4
|
||||
movdqa %xmm5,%xmm3
|
||||
paddw %xmm7,%xmm3
|
||||
movdqa %xmm1,%xmm9
|
||||
punpckhwd %xmm8,%xmm15
|
||||
punpcklwd %xmm10,%xmm9
|
||||
psubw %xmm7,%xmm5
|
||||
movdqa %xmm15,%xmm7
|
||||
movdqa %xmm9,%xmm14
|
||||
punpcklwd %xmm2,%xmm7
|
||||
movdqa %xmm1,%xmm8
|
||||
pmaddwd .LC0(%rip),%xmm6
|
||||
punpckhwd %xmm10,%xmm8
|
||||
paddw %xmm15,%xmm10
|
||||
movaps %xmm6,-16(%rbp)
|
||||
pmaddwd .LC1(%rip),%xmm4
|
||||
movdqa %xmm0,%xmm6
|
||||
pmaddwd .LC0(%rip),%xmm11
|
||||
pmaddwd .LC2(%rip),%xmm14
|
||||
pmaddwd .LC1(%rip),%xmm12
|
||||
pmaddwd .LC3(%rip),%xmm9
|
||||
movaps %xmm4,-64(%rbp)
|
||||
movdqa %xmm3,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
punpckhwd %xmm4,%xmm6
|
||||
punpcklwd %xmm4,%xmm3
|
||||
movdqa %xmm0,%xmm4
|
||||
movaps %xmm11,-32(%rbp)
|
||||
movdqa %xmm6,%xmm13
|
||||
movdqa %xmm15,%xmm6
|
||||
punpcklwd %xmm5,%xmm4
|
||||
movaps %xmm12,-48(%rbp)
|
||||
punpckhwd %xmm2,%xmm6
|
||||
paddw %xmm1,%xmm2
|
||||
punpckhwd %xmm5,%xmm0
|
||||
movdqa %xmm14,%xmm11
|
||||
movdqa %xmm2,%xmm5
|
||||
movdqa %xmm7,%xmm14
|
||||
punpckhwd %xmm10,%xmm2
|
||||
psrad $4,%xmm13
|
||||
punpcklwd %xmm10,%xmm5
|
||||
movaps %xmm13,-80(%rbp)
|
||||
movdqa %xmm8,%xmm12
|
||||
movdqa %xmm5,%xmm10
|
||||
pmaddwd .LC4(%rip),%xmm14
|
||||
pmaddwd .LC6(%rip),%xmm10
|
||||
movdqa %xmm2,%xmm15
|
||||
pmaddwd .LC7(%rip),%xmm5
|
||||
pmaddwd .LC3(%rip),%xmm8
|
||||
pmaddwd .LC5(%rip),%xmm7
|
||||
movdqa %xmm14,%xmm13
|
||||
movdqa %xmm6,%xmm14
|
||||
paddd %xmm5,%xmm13
|
||||
paddd %xmm5,%xmm9
|
||||
pmaddwd .LC5(%rip),%xmm6
|
||||
psrad $4,%xmm3
|
||||
pmaddwd .LC6(%rip),%xmm15
|
||||
paddd %xmm10,%xmm7
|
||||
paddd %xmm10,%xmm11
|
||||
psrad $4,%xmm4
|
||||
pmaddwd .LC2(%rip),%xmm12
|
||||
psrad $4,%xmm0
|
||||
pmaddwd .LC4(%rip),%xmm14
|
||||
pmaddwd .LC7(%rip),%xmm2
|
||||
movdqa -80(%rbp),%xmm5
|
||||
paddd %xmm15,%xmm12
|
||||
paddd -64(%rbp),%xmm5
|
||||
paddd %xmm2,%xmm14
|
||||
paddd %xmm8,%xmm2
|
||||
movdqa -48(%rbp),%xmm8
|
||||
paddd %xmm6,%xmm15
|
||||
movdqa .LC9(%rip),%xmm6
|
||||
paddd %xmm3,%xmm8
|
||||
paddd %xmm6,%xmm8
|
||||
paddd %xmm6,%xmm5
|
||||
movdqa %xmm5,%xmm10
|
||||
movdqa %xmm8,%xmm1
|
||||
psubd %xmm15,%xmm5
|
||||
psubd %xmm7,%xmm8
|
||||
psrad $17,%xmm5
|
||||
paddd %xmm7,%xmm1
|
||||
movdqa -32(%rbp),%xmm7
|
||||
psrad $17,%xmm8
|
||||
paddd %xmm15,%xmm10
|
||||
paddd %xmm6,%xmm3
|
||||
packssdw %xmm5,%xmm8
|
||||
movdqa -16(%rbp),%xmm5
|
||||
paddd %xmm0,%xmm7
|
||||
paddd %xmm6,%xmm0
|
||||
paddd %xmm6,%xmm7
|
||||
psrad $17,%xmm10
|
||||
psubd -32(%rbp),%xmm0
|
||||
paddd %xmm4,%xmm5
|
||||
psrad $17,%xmm1
|
||||
movdqa %xmm7,%xmm15
|
||||
paddd %xmm6,%xmm5
|
||||
packssdw %xmm10,%xmm1
|
||||
psubd %xmm2,%xmm7
|
||||
movdqa %xmm5,%xmm10
|
||||
paddd %xmm6,%xmm4
|
||||
psubd %xmm9,%xmm5
|
||||
psubd -16(%rbp),%xmm4
|
||||
psrad $17,%xmm7
|
||||
paddd %xmm2,%xmm15
|
||||
psrad $17,%xmm5
|
||||
psubd -48(%rbp),%xmm3
|
||||
paddd -80(%rbp),%xmm6
|
||||
packssdw %xmm7,%xmm5
|
||||
movdqa %xmm4,%xmm2
|
||||
movdqa %xmm0,%xmm7
|
||||
psubd -64(%rbp),%xmm6
|
||||
paddd %xmm14,%xmm7
|
||||
psrad $17,%xmm15
|
||||
paddd %xmm13,%xmm2
|
||||
psubd %xmm14,%xmm0
|
||||
psrad $17,%xmm7
|
||||
psubd %xmm13,%xmm4
|
||||
psrad $17,%xmm0
|
||||
paddd %xmm9,%xmm10
|
||||
psrad $17,%xmm2
|
||||
psrad $17,%xmm4
|
||||
packuswb %xmm8,%xmm5
|
||||
packssdw %xmm0,%xmm4
|
||||
packssdw %xmm7,%xmm2
|
||||
movdqa %xmm3,%xmm0
|
||||
movdqa %xmm6,%xmm7
|
||||
psrad $17,%xmm10
|
||||
paddd %xmm11,%xmm0
|
||||
paddd %xmm12,%xmm7
|
||||
psubd %xmm12,%xmm6
|
||||
packssdw %xmm15,%xmm10
|
||||
psubd %xmm11,%xmm3
|
||||
psrad $17,%xmm7
|
||||
packuswb %xmm10,%xmm1
|
||||
psrad $17,%xmm0
|
||||
psrad $17,%xmm6
|
||||
psrad $17,%xmm3
|
||||
packssdw %xmm7,%xmm0
|
||||
packssdw %xmm6,%xmm3
|
||||
packuswb %xmm0,%xmm2
|
||||
movdqa %xmm1,%xmm0
|
||||
packuswb %xmm4,%xmm3
|
||||
movdqa %xmm2,%xmm4
|
||||
punpckhbw %xmm5,%xmm2
|
||||
punpcklbw %xmm3,%xmm0
|
||||
punpcklbw %xmm5,%xmm4
|
||||
punpckhbw %xmm3,%xmm1
|
||||
movdqa %xmm2,%xmm3
|
||||
movdqa %xmm0,%xmm2
|
||||
movdqa %xmm1,%xmm5
|
||||
punpcklbw %xmm4,%xmm2
|
||||
punpckhbw %xmm4,%xmm0
|
||||
punpcklbw %xmm3,%xmm5
|
||||
movdqa %xmm2,%xmm4
|
||||
punpckhbw %xmm5,%xmm2
|
||||
punpckhbw %xmm3,%xmm1
|
||||
punpcklbw %xmm5,%xmm4
|
||||
movdqa %xmm0,%xmm3
|
||||
punpckhbw %xmm1,%xmm0
|
||||
movq %xmm4,(%rdi)
|
||||
pshufd $78,%xmm4,%xmm4
|
||||
punpcklbw %xmm1,%xmm3
|
||||
movq %xmm4,(%rax)
|
||||
add %rsi,%rax
|
||||
movq %xmm2,(%rax)
|
||||
add %rsi,%rax
|
||||
pshufd $78,%xmm2,%xmm2
|
||||
movq %xmm2,(%rax)
|
||||
add %rsi,%rax
|
||||
movq %xmm3,(%rax)
|
||||
add %rsi,%rax
|
||||
pshufd $78,%xmm3,%xmm3
|
||||
movq %xmm3,(%rax)
|
||||
movq %xmm0,(%rax,%rsi)
|
||||
pshufd $78,%xmm0,%xmm0
|
||||
movq %xmm0,(%rax,%rsi,2)
|
||||
leave
|
||||
ret
|
||||
.endfn stbi__idct_simd$sse,globl
|
||||
|
||||
.rodata.cst16
|
||||
.LC0: .value 2217,-5350,2217,-5350,2217,-5350,2217,-5350
|
||||
.LC1: .value 5352,2217,5352,2217,5352,2217,5352,2217
|
||||
.LC2: .value -6811,-8034,-6811,-8034,-6811,-8034,-6811,-8034
|
||||
.LC3: .value -8034,4552,-8034,4552,-8034,4552,-8034,4552
|
||||
.LC4: .value 6813,-1597,6813,-1597,6813,-1597,6813,-1597
|
||||
.LC5: .value -1597,4552,-1597,4552,-1597,4552,-1597,4552
|
||||
.LC6: .value 1131,4816,1131,4816,1131,4816,1131,4816
|
||||
.LC7: .value 4816,-5681,4816,-5681,4816,-5681,4816,-5681
|
||||
.LC8: .long 0x200,0x200,0x200,0x200
|
||||
.LC9: .long 0x1010000,0x1010000,0x1010000,0x1010000
|
Loading…
Add table
Add a link
Reference in a new issue