mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-12 14:09:12 +00:00
Upgrade to Cosmopolitan GCC 11.2.0 for x86_64
This commit is contained in:
parent
682b74ed88
commit
39f20dbb13
137 changed files with 48523 additions and 34001 deletions
238
third_party/gcc/README.cosmo
vendored
238
third_party/gcc/README.cosmo
vendored
|
@ -1,232 +1,28 @@
|
|||
This is a modern statically-linked GNU C2X toolchain.
|
||||
DESCRIPTION
|
||||
|
||||
You have the freedom to obtain the original sources to these binaries,
|
||||
and build ones just like them, by visiting:
|
||||
Cosmopolitan GCC
|
||||
Prebuilt x86_64-linux binaries
|
||||
An APE-friendly C/C++ compiler
|
||||
|
||||
https://www.gnu.org/
|
||||
https://github.com/richfelker/musl-cross-make
|
||||
LICENSE
|
||||
|
||||
The musl-cross-make tool also produces libraries and header files. We've
|
||||
only vendored the statically-linked executable files, since Cosmopolitan
|
||||
won't depend on GPL-licensed headers / runtime libraries.
|
||||
GPLv3 and other licenses (see LICENSE.txt)
|
||||
|
||||
We haven't made any modifications to the original software. The versions
|
||||
we chose are documented in $PKG/LICENSE.txt. Here's our Musl
|
||||
build config for maximum transparency:
|
||||
ORIGIN
|
||||
|
||||
commit 38e52db8358c043ae82b346a2e6e66bc86a53bc1
|
||||
Author: Rich Felker <dalias@aerifal.cx>
|
||||
Date: Wed Dec 18 14:29:07 2019 -0500
|
||||
@ahgamut's musl-cross-make fork
|
||||
https://github.com/ahgamut/musl-cross-make/
|
||||
d0f33e2162cf5e5b30cdf3b3accc0d0f7756830c
|
||||
|
||||
switch linux kernel headers to 4.19.88 by default
|
||||
|
||||
using slim headers-only version. this change is needed to support all
|
||||
future versions of musl on 32-bit archs, since prior to 4.16 the
|
||||
kernel headers had incompatibility with userspace time_t not matching
|
||||
the kernel's old (32-bit) time_t. support for older headers will be
|
||||
dropped entirely soon.
|
||||
MODIFICATIONS
|
||||
|
||||
TARGET = x86_64-linux-musl
|
||||
OUTPUT = /opt/cross9
|
||||
GCC_VER = 9.2.0
|
||||
export LANG=en_US.UTF-8
|
||||
export LC_CTYPE=en_US.UTF-8
|
||||
COMMON_CONFIG += CC="/opt/cross9/bin/x86_64-linux-musl-cc -static --static -g -Os -ftree-vectorize -fvect-cost-model=unlimited -mstringop-strategy=vector_loop -save-temps -fno-ident"
|
||||
COMMON_CONFIG += CXX="/opt/cross9/bin/x86_64-linux-musl-c++ -static --static -g -Os -ftree-vectorize -fvect-cost-model=unlimited -mstringop-strategy=vector_loop -save-temps -fno-ident"
|
||||
COMMON_CONFIG += LD="/opt/cross9/bin/x86_64-linux-musl-ld --build-id=none"
|
||||
COMMON_CONFIG += NM="/opt/cross9/bin/x86_64-linux-musl-nm"
|
||||
COMMON_CONFIG += LDFLAGS="-Wl,--build-id=none"
|
||||
COMMON_CONFIG += OBJCOPY="/opt/cross9/bin/x86_64-linux-musl-objcopy"
|
||||
COMMON_CONFIG += --disable-nls --disable-lto
|
||||
GCC_CONFIG += --enable-languages=c,c++
|
||||
GCC_CONFIG += --disable-multilib
|
||||
GCC_CONFIG += --with-gnu-as
|
||||
GCC_CONFIG += --with-gnu-ld
|
||||
GCC_CONFIG += --disable-multilib
|
||||
GCC_CONFIG += --enable-sjlj-exceptions
|
||||
GCC_CONFIG += --disable-threads
|
||||
GCC_CONFIG += --disable-tls
|
||||
COMMON_CONFIG += --with-debug-prefix-map=$(CURDIR)=
|
||||
ahgamut's musl-cross-make fork includes a 2kLOC patch that modifies
|
||||
GCC so it'll compile C code like `switch(errno){case EINVAL: etc.}`
|
||||
|
||||
#!/bin/sh
|
||||
set -e
|
||||
export LC_ALL=C
|
||||
export GUNZ="/bin/gzip --rsyncable -9 -c"
|
||||
BASE=/opt/cross9
|
||||
PKG=third_party/gcc
|
||||
VERS=9.2.0
|
||||
SEE ALSO
|
||||
|
||||
if [ ! -d $BASE ]; then
|
||||
echo error: run make install >&2
|
||||
exit 1
|
||||
fi
|
||||
third_party/gcc/portcosmo.patch
|
||||
|
||||
if [ -d $BASE/$PKG ]; then
|
||||
rm -rf $BASE/$PKG
|
||||
fi
|
||||
NOTES
|
||||
|
||||
mkdir -p $BASE/$PKG/bin
|
||||
mkdir -p $BASE/$PKG/libexec/gcc/x86_64-linux-musl/$VERS
|
||||
mkdir -p $BASE/$PKG/x86_64-linux-musl/bin
|
||||
|
||||
cp $BASE/bin/x86_64-linux-musl-gcov-dump $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
||||
cp $BASE/bin/x86_64-linux-musl-cc $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
||||
cp $BASE/bin/x86_64-linux-musl-addr2line $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
||||
cp $BASE/bin/x86_64-linux-musl-ar $BASE/$PKG/bin/x86_64-linux-musl-ar
|
||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
||||
cp $BASE/bin/x86_64-linux-musl-c++ $BASE/$PKG/bin/x86_64-linux-musl-g++
|
||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/collect2 $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
||||
cp $BASE/bin/x86_64-linux-musl-gcc-nm $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
||||
cp $BASE/bin/x86_64-linux-musl-c++filt $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
||||
cp $BASE/bin/x86_64-linux-musl-elfedit $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
||||
cp $BASE/bin/x86_64-linux-musl-ld $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
||||
cp $BASE/bin/x86_64-linux-musl-size $BASE/$PKG/bin/x86_64-linux-musl-size
|
||||
cp $BASE/bin/x86_64-linux-musl-strings $BASE/$PKG/bin/x86_64-linux-musl-strings
|
||||
cp $BASE/bin/x86_64-linux-musl-objcopy $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
||||
cp $BASE/bin/x86_64-linux-musl-nm $BASE/$PKG/bin/x86_64-linux-musl-nm
|
||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/cc1 $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
||||
cp $BASE/bin/x86_64-linux-musl-readelf $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
||||
cp $BASE/bin/x86_64-linux-musl-objdump $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
||||
cp $BASE/bin/x86_64-linux-musl-gcc-ar $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
||||
cp $BASE/bin/x86_64-linux-musl-gcov $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
||||
cp $BASE/bin/x86_64-linux-musl-ranlib $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
||||
cp $BASE/bin/x86_64-linux-musl-as $BASE/$PKG/bin/x86_64-linux-musl-as
|
||||
cp $BASE/bin/x86_64-linux-musl-gcc-ranlib $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
||||
cp $BASE/bin/x86_64-linux-musl-cpp $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
||||
cp $BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strip
|
||||
cp $BASE/bin/x86_64-linux-musl-gprof $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
||||
cp $BASE/bin/x86_64-linux-musl-gcov-tool $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
||||
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-ar
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-g++
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-size
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strings
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-nm
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-as
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strip
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
||||
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump >$BASE/$PKG/bin/x86_64-linux-musl-gcov-dump.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc >$BASE/$PKG/bin/x86_64-linux-musl-gcc.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-addr2line >$BASE/$PKG/bin/x86_64-linux-musl-addr2line.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-ar >$BASE/$PKG/bin/x86_64-linux-musl-ar.gz
|
||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-g++ >$BASE/$PKG/bin/x86_64-linux-musl-g++.gz
|
||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2 >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm >$BASE/$PKG/bin/x86_64-linux-musl-gcc-nm.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-c++filt >$BASE/$PKG/bin/x86_64-linux-musl-c++filt.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-elfedit >$BASE/$PKG/bin/x86_64-linux-musl-elfedit.gz
|
||||
$GUNZ $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd >$BASE/$PKG/x86_64-linux-musl/bin/ld.bfd.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-size >$BASE/$PKG/bin/x86_64-linux-musl-size.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-strings >$BASE/$PKG/bin/x86_64-linux-musl-strings.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-objcopy >$BASE/$PKG/bin/x86_64-linux-musl-objcopy.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-nm >$BASE/$PKG/bin/x86_64-linux-musl-nm.gz
|
||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1 >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-readelf >$BASE/$PKG/bin/x86_64-linux-musl-readelf.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-objdump >$BASE/$PKG/bin/x86_64-linux-musl-objdump.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar >$BASE/$PKG/bin/x86_64-linux-musl-gcc-ar.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov >$BASE/$PKG/bin/x86_64-linux-musl-gcov.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-ranlib >$BASE/$PKG/bin/x86_64-linux-musl-ranlib.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-as >$BASE/$PKG/bin/x86_64-linux-musl-as.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib >$BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-cpp >$BASE/$PKG/bin/x86_64-linux-musl-cpp.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-strip >$BASE/$PKG/bin/x86_64-linux-musl-strip.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gprof >$BASE/$PKG/bin/x86_64-linux-musl-gprof.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool >$BASE/$PKG/bin/x86_64-linux-musl-gcov-tool.gz
|
||||
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-ar
|
||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-g++
|
||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
||||
rm $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-size
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-strings
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-nm
|
||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-as
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-strip
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
||||
|
||||
ln -s x86_64-linux-musl-gcc $BASE/$PKG/bin/x86_64-linux-musl-cc
|
||||
ln -s x86_64-linux-musl-gcc $BASE/$PKG/bin/x86_64-linux-musl-gcc-9.2.0
|
||||
ln -s ../../bin/x86_64-linux-musl-ar $BASE/$PKG/x86_64-linux-musl/bin/ar
|
||||
ln -s x86_64-linux-musl-g++ $BASE/$PKG/bin/x86_64-linux-musl-c++
|
||||
ln -s ld.bfd $BASE/$PKG/x86_64-linux-musl/bin/ld
|
||||
ln -s ../x86_64-linux-musl/bin/ld.bfd $BASE/$PKG/bin/x86_64-linux-musl-ld.bfd
|
||||
ln -s ../x86_64-linux-musl/bin/ld.bfd $BASE/$PKG/bin/x86_64-linux-musl-ld
|
||||
ln -s ../../bin/x86_64-linux-musl-objcopy $BASE/$PKG/x86_64-linux-musl/bin/objcopy
|
||||
ln -s ../../bin/x86_64-linux-musl-nm $BASE/$PKG/x86_64-linux-musl/bin/nm
|
||||
ln -s ../../bin/x86_64-linux-musl-readelf $BASE/$PKG/x86_64-linux-musl/bin/readelf
|
||||
ln -s ../../bin/x86_64-linux-musl-objdump $BASE/$PKG/x86_64-linux-musl/bin/objdump
|
||||
ln -s ../../bin/x86_64-linux-musl-ranlib $BASE/$PKG/x86_64-linux-musl/bin/ranlib
|
||||
ln -s ../../bin/x86_64-linux-musl-as $BASE/$PKG/x86_64-linux-musl/bin/as
|
||||
ln -s ../../bin/x86_64-linux-musl-strip $BASE/$PKG/x86_64-linux-musl/bin/strip
|
||||
|
||||
{
|
||||
cat <<'EOF'
|
||||
This is a modern statically-linked GNU C2X toolchain.
|
||||
|
||||
You have the freedom to obtain the original sources to these binaries,
|
||||
and build ones just like them, by visiting:
|
||||
|
||||
https://www.gnu.org/
|
||||
https://github.com/richfelker/musl-cross-make
|
||||
|
||||
The musl-cross-make tool also produces libraries and header files. We've
|
||||
only vendored the statically-linked executable files, since Cosmopolitan
|
||||
won't depend on GPL-licensed headers / runtime libraries.
|
||||
|
||||
We haven't made any modifications to the original software. The versions
|
||||
we chose are documented in $PKG/LICENSE.txt. Here's our Musl
|
||||
build config for maximum transparency:
|
||||
|
||||
EOF
|
||||
git show --quiet
|
||||
echo
|
||||
cat config.mak
|
||||
echo
|
||||
cat bundle.sh
|
||||
} >$BASE/$PKG/README.cosmo
|
||||
|
||||
{
|
||||
for f in $(find . -iname \*copying\* -or -iname \*license\* | sort); do
|
||||
printf '\n'
|
||||
printf '%s\n' "$f"
|
||||
printf '========================================================================\n'
|
||||
cat "$f"
|
||||
done
|
||||
} >$BASE/$PKG/LICENSE.txt
|
||||
My name is Justine Tunney and I approve of these binaries.
|
||||
|
|
BIN
third_party/gcc/bin/x86_64-linux-musl-addr2line.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-addr2line.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-ar.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-ar.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-as.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-as.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-c++filt.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-c++filt.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-cpp.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-cpp.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-elfedit.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-elfedit.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-g++.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-g++.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ar.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ar.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-nm.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-nm.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ranlib.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ranlib.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-dump.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-dump.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-tool.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-tool.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gprof.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gprof.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-nm.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-nm.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-objcopy.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-objcopy.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-objdump.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-objdump.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-ranlib.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-ranlib.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-readelf.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-readelf.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-size.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-size.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-strings.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-strings.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-strip.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-strip.gz
vendored
Binary file not shown.
97
third_party/gcc/config.mak
vendored
Normal file
97
third_party/gcc/config.mak
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
#
|
||||
# config.mak.dist - sample musl-cross-make configuration
|
||||
#
|
||||
# Copy to config.mak and edit as desired.
|
||||
#
|
||||
|
||||
# There is no default TARGET; you must select one here or on the make
|
||||
# command line. Some examples:
|
||||
|
||||
# TARGET = i486-linux-musl
|
||||
TARGET = aarch64-linux-musl
|
||||
# TARGET = arm-linux-musleabi
|
||||
# TARGET = arm-linux-musleabihf
|
||||
# TARGET = sh2eb-linux-muslfdpic
|
||||
# TARGET = powerpc64le-linux-musl
|
||||
# TARGET = aarch64-linux-musl
|
||||
|
||||
# By default, cross compilers are installed to ./output under the top-level
|
||||
# musl-cross-make directory and can later be moved wherever you want them.
|
||||
# To install directly to a specific location, set it here. Multiple targets
|
||||
# can safely be installed in the same location. Some examples:
|
||||
|
||||
OUTPUT = /opt/cross11portcosmo
|
||||
# OUTPUT = /usr/local
|
||||
|
||||
# By default, latest supported release versions of musl and the toolchain
|
||||
# components are used. You can override those here, but the version selected
|
||||
# must be supported (under hashes/ and patches/) to work. For musl, you
|
||||
# can use "git-refname" (e.g. git-master) instead of a release. Setting a
|
||||
# blank version for gmp, mpc, mpfr and isl will suppress download and
|
||||
# in-tree build of these libraries and instead depend on pre-installed
|
||||
# libraries when available (isl is optional and not set by default).
|
||||
# Setting a blank version for linux will suppress installation of kernel
|
||||
# headers, which are not needed unless compiling programs that use them.
|
||||
|
||||
# BINUTILS_VER = 2.25.1
|
||||
GCC_VER = 11.2.0
|
||||
# MUSL_VER = git-master
|
||||
# GMP_VER =
|
||||
# MPC_VER =
|
||||
# MPFR_VER =
|
||||
# ISL_VER =
|
||||
# LINUX_VER =
|
||||
|
||||
# By default source archives are downloaded with wget. curl is also an option.
|
||||
|
||||
# DL_CMD = wget -c -O
|
||||
# DL_CMD = curl -C - -L -o
|
||||
|
||||
# Check sha-1 hashes of downloaded source archives. On gnu systems this is
|
||||
# usually done with sha1sum.
|
||||
|
||||
# SHA1_CMD = sha1sum -c
|
||||
# SHA1_CMD = sha1 -c
|
||||
# SHA1_CMD = shasum -a 1 -c
|
||||
|
||||
# Something like the following can be used to produce a static-linked
|
||||
# toolchain that's deployable to any system with matching arch, using
|
||||
# an existing musl-targeted cross compiler. This only works if the
|
||||
# system you build on can natively (or via binfmt_misc and qemu) run
|
||||
# binaries produced by the existing toolchain (in this example, i486).
|
||||
|
||||
# MUSL_CONFIG += --enable-debug
|
||||
# MUSL_CONFIG += CFLAGS="-Os -fno-omit-frame-pointer -fno-optimize-sibling-calls -mno-omit-leaf-frame-pointer"
|
||||
MUSL_CONFIG += CFLAGS="-Os"
|
||||
|
||||
COMMON_CONFIG += CC="/opt/cross/bin/x86_64-linux-musl-gcc -static --static"
|
||||
COMMON_CONFIG += CXX="/opt/cross/bin/x86_64-linux-musl-g++ -static --static"
|
||||
# COMMON_CONFIG += CC="gcc -static --static"
|
||||
# COMMON_CONFIG += CXX="g++ -static --static"
|
||||
|
||||
# Recommended options for smaller build for deploying binaries:
|
||||
|
||||
COMMON_CONFIG += CFLAGS="-Os -g0"
|
||||
COMMON_CONFIG += CXXFLAGS="-Os -g0"
|
||||
COMMON_CONFIG += LDFLAGS="-s"
|
||||
|
||||
# Options you can add for faster/simpler build at the expense of features:
|
||||
|
||||
COMMON_CONFIG += --disable-nls
|
||||
GCC_CONFIG += --disable-libquadmath --disable-decimal-float
|
||||
GCC_CONFIG += --disable-libitm
|
||||
GCC_CONFIG += --disable-fixed-point
|
||||
GCC_CONFIG += --disable-lto
|
||||
|
||||
# By default C and C++ are the only languages enabled, and these are
|
||||
# the only ones tested and known to be supported. You can uncomment the
|
||||
# following and add other languages if you want to try getting them to
|
||||
# work too.
|
||||
|
||||
GCC_CONFIG += --enable-languages=c,c++ #--enable-plugin
|
||||
|
||||
# You can keep the local build path out of your toolchain binaries and
|
||||
# target libraries with the following, but then gdb needs to be told
|
||||
# where to look for source files.
|
||||
|
||||
# COMMON_CONFIG += --with-debug-prefix-map=$(CURDIR)=
|
0
third_party/gcc/lib/gcc/x86_64-linux-musl/11.2.0/specs
vendored
Normal file
0
third_party/gcc/lib/gcc/x86_64-linux-musl/11.2.0/specs
vendored
Normal file
|
@ -1,141 +0,0 @@
|
|||
*asm:
|
||||
%{m16|m32:--32} %{m16|m32:;:--64} %{msse2avx:%{!mavx:-msse2avx}}
|
||||
|
||||
*asm_debug:
|
||||
%{%:debug-level-gt(0):%{gstabs*:--gstabs}%{!gstabs*:%{g*:--gdwarf2}}} %{fdebug-prefix-map=*:--debug-prefix-map %*}
|
||||
|
||||
*asm_final:
|
||||
%{gsplit-dwarf:
|
||||
objcopy --extract-dwo %{c:%{o*:%*}%{!o*:%b%O}}%{!c:%U%O} %{c:%{o*:%:replace-extension(%{o*:%*} .dwo)}%{!o*:%b.dwo}}%{!c:%b.dwo}
|
||||
objcopy --strip-dwo %{c:%{o*:%*}%{!o*:%b%O}}%{!c:%U%O} }
|
||||
|
||||
*asm_options:
|
||||
%{-target-help:%:print-asm-header()} %{v} %{w:-W} %{I*} %{gz|gz=zlib:--compress-debug-sections=zlib} %{gz=none:--compress-debug-sections=none} %{gz=zlib-gnu:--compress-debug-sections=zlib-gnu} %a %Y %{c:%W{o*}%{!o*:-o %w%b%O}}%{!c:-o %d%w%u%O}
|
||||
|
||||
*invoke_as:
|
||||
%{!fwpa*: %{fcompare-debug=*|fdump-final-insns=*:%:compare-debug-dump-opt()} %{!S:-o %|.s |
|
||||
as %(asm_options) %m.s %A } }
|
||||
|
||||
*cpp:
|
||||
%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}
|
||||
|
||||
*cpp_options:
|
||||
%(cpp_unique_options) %1 %{m*} %{std*&ansi&trigraphs} %{W*&pedantic*} %{w} %{f*} %{g*:%{%:debug-level-gt(0):%{g*} %{!fno-working-directory:-fworking-directory}}} %{O*} %{undef} %{save-temps*:-fpch-preprocess}
|
||||
|
||||
*cpp_debug_options:
|
||||
%{d*}
|
||||
|
||||
*cpp_unique_options:
|
||||
%{!Q:-quiet} %{nostdinc*} %{C} %{CC} %{v} %@{I*&F*} %{P} %I %{MD:-MD %{!o:%b.d}%{o*:%.d%*}} %{MMD:-MMD %{!o:%b.d}%{o*:%.d%*}} %{M} %{MM} %{MF*} %{MG} %{MP} %{MQ*} %{MT*} %{!E:%{!M:%{!MM:%{!MT:%{!MQ:%{MD|MMD:%{o*:-MQ %*}}}}}}} %{remap} %{g3|ggdb3|gstabs3|gxcoff3|gvms3:-dD} %{!iplugindir*:%{fplugin*:%:find-plugindir()}} %{H} %C %{D*&U*&A*} %{i*} %Z %i %{E|M|MM:%W{o*}}
|
||||
|
||||
*trad_capable_cpp:
|
||||
cc1 -E %{traditional|traditional-cpp:-traditional-cpp}
|
||||
|
||||
*cc1:
|
||||
%{!mandroid|tno-android-cc:%(cc1_cpu) %{profile:-p};:%(cc1_cpu) %{profile:-p} %{!fno-pic:%{!fno-PIC:%{!fpic:%{!fPIC: -fPIC}}}}}
|
||||
|
||||
*cc1_options:
|
||||
%{pg:%{fomit-frame-pointer:%e-pg and -fomit-frame-pointer are incompatible}} %{!iplugindir*:%{fplugin*:%:find-plugindir()}} %1 %{!Q:-quiet} %{!dumpbase:-dumpbase %B} %{d*} %{m*} %{aux-info*} %{fcompare-debug-second:%:compare-debug-auxbase-opt(%b)} %{!fcompare-debug-second:%{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}}%{!c:%{!S:-auxbase %b}} %{g*} %{O*} %{W*&pedantic*} %{w} %{std*&ansi&trigraphs} %{v:-version} %{pg:-p} %{p} %{f*} %{undef} %{Qn:-fno-ident} %{Qy:} %{-help:--help} %{-target-help:--target-help} %{-version:--version} %{-help=*:--help=%*} %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %b.s}}} %{fsyntax-only:-o %j} %{-param*} %{coverage:-fprofile-arcs -ftest-coverage} %{fprofile-arcs|fprofile-generate*|coverage: %{!fprofile-update=single: %{pthread:-fprofile-update=prefer-atomic}}}
|
||||
|
||||
*cc1plus:
|
||||
|
||||
|
||||
*link_gcc_c_sequence:
|
||||
%{static|static-pie:--start-group} %G %{!nolibc:%L} %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}
|
||||
|
||||
*link_ssp:
|
||||
%{fstack-protector|fstack-protector-all|fstack-protector-strong|fstack-protector-explicit:-lssp_nonshared}
|
||||
|
||||
*endfile:
|
||||
--push-state --pop-state
|
||||
|
||||
*link:
|
||||
%{!mandroid|tno-android-ld:%{m16|m32:;:-m elf_x86_64} %{m16|m32:-m elf_i386} %{shared:-shared} %{!shared: %{!static: %{!static-pie: %{rdynamic:-export-dynamic} }} %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}};:%{m16|m32:;:-m elf_x86_64} %{m16|m32:-m elf_i386} %{mx32:-m elf32_x86_64} %{shared:-shared} %{!shared: %{!static: %{!static-pie: %{rdynamic:-export-dynamic} %{m16|m32:-dynamic-linker } %{m16|m32:;:-dynamic-linker} }} %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}} %{shared: -Bsymbolic}}
|
||||
|
||||
*lib:
|
||||
--push-state --pop-state
|
||||
|
||||
*link_gomp:
|
||||
|
||||
|
||||
*libgcc:
|
||||
--push-state --pop-state
|
||||
|
||||
*startfile:
|
||||
--push-state --pop-state
|
||||
|
||||
*cross_compile:
|
||||
1
|
||||
|
||||
*version:
|
||||
9.2.0
|
||||
|
||||
*multilib:
|
||||
. ;
|
||||
|
||||
*multilib_defaults:
|
||||
m64
|
||||
|
||||
*multilib_extra:
|
||||
|
||||
|
||||
*multilib_matches:
|
||||
|
||||
|
||||
*multilib_exclusions:
|
||||
|
||||
|
||||
*multilib_options:
|
||||
|
||||
|
||||
*multilib_reuse:
|
||||
|
||||
|
||||
*linker:
|
||||
collect2
|
||||
|
||||
*linker_plugin_file:
|
||||
|
||||
|
||||
*lto_wrapper:
|
||||
|
||||
|
||||
*lto_gcc:
|
||||
|
||||
|
||||
*post_link:
|
||||
|
||||
|
||||
*link_libgcc:
|
||||
%D
|
||||
|
||||
*md_exec_prefix:
|
||||
|
||||
|
||||
*md_startfile_prefix:
|
||||
|
||||
|
||||
*md_startfile_prefix_1:
|
||||
|
||||
|
||||
*startfile_prefix_spec:
|
||||
|
||||
|
||||
*sysroot_spec:
|
||||
--sysroot=%R
|
||||
|
||||
*sysroot_suffix_spec:
|
||||
|
||||
|
||||
*sysroot_hdrs_suffix_spec:
|
||||
|
||||
|
||||
*self_spec:
|
||||
|
||||
|
||||
*cc1_cpu:
|
||||
%{march=native:%>march=native %:local_cpu_detect(arch) %{!mtune=*:%>mtune=native %:local_cpu_detect(tune)}} %{mtune=native:%>mtune=native %:local_cpu_detect(tune)}
|
||||
|
||||
*link_command:
|
||||
%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S: %(linker) %{fuse-linker-plugin: %e-fuse-linker-plugin is not supported in this configuration}%{flto|flto=*:%<fcompare-debug*} %{flto} %{fno-lto} %{flto=*} %l %{shared|r:;pie|static-pie:-pie %{static|static-pie:--no-dynamic-linker -z text -Bsymbolic}} %{fuse-ld=*:-fuse-ld=%*} %{gz|gz=zlib:--compress-debug-sections=zlib} %{gz=none:--compress-debug-sections=none} %{gz=zlib-gnu:--compress-debug-sections=zlib-gnu} %X %{o*} %{e*} %{N} %{n} %{r} %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!r:%{!nostartfiles:%S}}} %{static|no-pie|static-pie:} %@{L*} %(mfwrap) %(link_libgcc) %{fvtable-verify=none:} %{fvtable-verify=std: %e-fvtable-verify=std is not supported in this configuration} %{fvtable-verify=preinit: %e-fvtable-verify=preinit is not supported in this configuration} %{!nostdlib:%{!r:%{!nodefaultlibs:%{%:sanitize(address):%{!shared:libasan_preinit%O%s} %{static-libasan:%{!shared:-Bstatic --whole-archive -lasan --no-whole-archive -Bdynamic}}%{!static-libasan:-lasan}} %{%:sanitize(thread):%{!shared:libtsan_preinit%O%s} %{static-libtsan:%{!shared:-Bstatic --whole-archive -ltsan --no-whole-archive -Bdynamic}}%{!static-libtsan:-ltsan}} %{%:sanitize(leak):%{!shared:liblsan_preinit%O%s} %{static-liblsan:%{!shared:-Bstatic --whole-archive -llsan --no-whole-archive -Bdynamic}}%{!static-liblsan:-llsan}}}}} %o %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*:%*} 1): %:include(libgomp.spec)%(link_gomp)} %{fgnu-tm:%:include(libitm.spec)%(link_itm)} %(mflib) %{fsplit-stack: --wrap=pthread_create} %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} %{!nostdlib:%{!r:%{!nodefaultlibs:%{%:sanitize(address): %{static-libasan|static:%:include(libsanitizer.spec)%(link_libasan)} %{static:%ecannot specify -static with -fsanitize=address}} %{%:sanitize(thread): %{static-libtsan|static:%:include(libsanitizer.spec)%(link_libtsan)} %{static:%ecannot specify -static with -fsanitize=thread}} %{%:sanitize(undefined):%{static-libubsan:-Bstatic} -lubsan %{static-libubsan:-Bdynamic} %{static-libubsan|static:%:include(libsanitizer.spec)%(link_libubsan)}} %{%:sanitize(leak): %{static-liblsan|static:%:include(libsanitizer.spec)%(link_liblsan)}}}}} %{!nostdlib:%{!r:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}} %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*}
|
||||
%(post_link) }}}}}}
|
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/cc1.gz
vendored
Normal file
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/cc1.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/collect2.gz
vendored
Normal file
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/collect2.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
1869
third_party/gcc/portcosmo.patch
vendored
Normal file
1869
third_party/gcc/portcosmo.patch
vendored
Normal file
File diff suppressed because it is too large
Load diff
46
third_party/gcc/upgrade-cosmo-gcc.sh
vendored
Executable file
46
third_party/gcc/upgrade-cosmo-gcc.sh
vendored
Executable file
|
@ -0,0 +1,46 @@
|
|||
#!/bin/sh
|
||||
|
||||
ARCH=${1:-x86_64}
|
||||
IMPORT=${2:-/opt/cross11portcosmo}
|
||||
PREFIX=third_party/gcc/
|
||||
OLDVERSION=9.2.0
|
||||
NEWVERSION=11.2.0
|
||||
|
||||
rm -rf o/third_party/gcc
|
||||
mv $PREFIX/libexec/gcc/$ARCH-linux-musl/$OLDVERSION $PREFIX/libexec/gcc/$ARCH-linux-musl/$NEWVERSION
|
||||
mv $PREFIX/lib/gcc/$ARCH-linux-musl/$OLDVERSION $PREFIX/lib/gcc/$ARCH-linux-musl/$NEWVERSION
|
||||
sed -i -e "s/$OLDVERSION/$NEWVERSION/g" $(find $PREFIX -name \*.sym | grep $ARCH)
|
||||
|
||||
FILES="
|
||||
$ARCH-linux-musl/bin/ld.bfd
|
||||
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/collect2
|
||||
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/cc1
|
||||
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/cc1plus
|
||||
bin/$ARCH-linux-musl-elfedit
|
||||
bin/$ARCH-linux-musl-nm
|
||||
bin/$ARCH-linux-musl-objcopy
|
||||
bin/$ARCH-linux-musl-gcc
|
||||
bin/$ARCH-linux-musl-c++filt
|
||||
bin/$ARCH-linux-musl-gcc-ranlib
|
||||
bin/$ARCH-linux-musl-addr2line
|
||||
bin/$ARCH-linux-musl-objdump
|
||||
bin/$ARCH-linux-musl-gcov
|
||||
bin/$ARCH-linux-musl-ranlib
|
||||
bin/$ARCH-linux-musl-gcc-nm
|
||||
bin/$ARCH-linux-musl-strip
|
||||
bin/$ARCH-linux-musl-gcov-tool
|
||||
bin/$ARCH-linux-musl-gprof
|
||||
bin/$ARCH-linux-musl-strings
|
||||
bin/$ARCH-linux-musl-gcov-dump
|
||||
bin/$ARCH-linux-musl-cpp
|
||||
bin/$ARCH-linux-musl-ar
|
||||
bin/$ARCH-linux-musl-readelf
|
||||
bin/$ARCH-linux-musl-size
|
||||
bin/$ARCH-linux-musl-as
|
||||
bin/$ARCH-linux-musl-g++
|
||||
bin/$ARCH-linux-musl-gcc-ar
|
||||
"
|
||||
|
||||
for f in $FILES; do
|
||||
gzip -9 <$IMPORT/$f >$PREFIX/$f.gz || exit
|
||||
done
|
BIN
third_party/gcc/x86_64-linux-musl/bin/ld.bfd.gz
vendored
BIN
third_party/gcc/x86_64-linux-musl/bin/ld.bfd.gz
vendored
Binary file not shown.
74
third_party/intel/adxintrin.internal.h
vendored
74
third_party/intel/adxintrin.internal.h
vendored
|
@ -1,43 +1,53 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <adxintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <adxintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _ADXINTRIN_H_INCLUDED
|
||||
#define _ADXINTRIN_H_INCLUDED
|
||||
|
||||
__funline unsigned char _subborrow_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_sbb_u32(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_subborrow_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_sbb_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
__funline unsigned char _addcarry_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_addcarryx_u32(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarry_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
__funline unsigned char _addcarryx_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_addcarryx_u32(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarryx_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline unsigned char _subborrow_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_sbb_u64(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_subborrow_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_sbb_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
__funline unsigned char _addcarry_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_addcarryx_u64(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarry_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
__funline unsigned char _addcarryx_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_addcarryx_u64(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarryx_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ADXINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
|
|
68
third_party/intel/ammintrin.internal.h
vendored
68
third_party/intel/ammintrin.internal.h
vendored
|
@ -1,58 +1,54 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _AMMINTRIN_H_INCLUDED
|
||||
#define _AMMINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
#include "third_party/intel/pmmintrin.internal.h"
|
||||
|
||||
#ifndef __SSE4A__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4a")
|
||||
#define __DISABLE_SSE4A__
|
||||
#endif /* __SSE4A__ */
|
||||
|
||||
__funline void _mm_stream_sd(double* __P, __m128d __Y) {
|
||||
__builtin_ia32_movntsd(__P, (__v2df)__Y);
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_sd (double * __P, __m128d __Y)
|
||||
{
|
||||
__builtin_ia32_movntsd (__P, (__v2df) __Y);
|
||||
}
|
||||
|
||||
__funline void _mm_stream_ss(float* __P, __m128 __Y) {
|
||||
__builtin_ia32_movntss(__P, (__v4sf)__Y);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_ss (float * __P, __m128 __Y)
|
||||
{
|
||||
__builtin_ia32_movntss (__P, (__v4sf) __Y);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_extract_si64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_extrq((__v2di)__X, (__v16qi)__Y);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_si64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m128i _mm_extracti_si64(__m128i __X, unsigned const int __I,
|
||||
unsigned const int __L) {
|
||||
return (__m128i)__builtin_ia32_extrqi((__v2di)__X, __I, __L);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
|
||||
}
|
||||
#else
|
||||
#define _mm_extracti_si64(X, I, L) \
|
||||
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(X), (unsigned int)(I), \
|
||||
(unsigned int)(L)))
|
||||
#define _mm_extracti_si64(X, I, L) ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), (unsigned int)(I), (unsigned int)(L)))
|
||||
#endif
|
||||
|
||||
__funline __m128i _mm_insert_si64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_insertq((__v2di)__X, (__v2di)__Y);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_si64 (__m128i __X,__m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m128i _mm_inserti_si64(__m128i __X, __m128i __Y,
|
||||
unsigned const int __I,
|
||||
unsigned const int __L) {
|
||||
return (__m128i)__builtin_ia32_insertqi((__v2di)__X, (__v2di)__Y, __I, __L);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
|
||||
}
|
||||
#else
|
||||
#define _mm_inserti_si64(X, Y, I, L) \
|
||||
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(X), \
|
||||
(__v2di)(__m128i)(Y), (unsigned int)(I), \
|
||||
(unsigned int)(L)))
|
||||
#define _mm_inserti_si64(X, Y, I, L) ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), (__v2di)(__m128i)(Y), (unsigned int)(I), (unsigned int)(L)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_SSE4A__
|
||||
#undef __DISABLE_SSE4A__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4A__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _AMMINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
22
third_party/intel/amxbf16intrin.internal.h
vendored
Normal file
22
third_party/intel/amxbf16intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AMXBF16INTRIN_H_INCLUDED
|
||||
#define _AMXBF16INTRIN_H_INCLUDED
|
||||
#if !defined(__AMX_BF16__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-bf16")
|
||||
#define __DISABLE_AMX_BF16__
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__AMX_BF16__)
|
||||
#define _tile_dpbf16ps_internal(dst,src1,src2) __asm__ volatile ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
#define _tile_dpbf16ps(dst,src1,src2) _tile_dpbf16ps_internal (dst, src1, src2)
|
||||
#endif
|
||||
#ifdef __DISABLE_AMX_BF16__
|
||||
#undef __DISABLE_AMX_BF16__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
25
third_party/intel/amxint8intrin.internal.h
vendored
Normal file
25
third_party/intel/amxint8intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AMXINT8INTRIN_H_INCLUDED
|
||||
#define _AMXINT8INTRIN_H_INCLUDED
|
||||
#if !defined(__AMX_INT8__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-int8")
|
||||
#define __DISABLE_AMX_INT8__
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__AMX_INT8__)
|
||||
#define _tile_int8_dp_internal(name,dst,src1,src2) __asm__ volatile ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
#define _tile_dpbssd(dst,src1,src2) _tile_int8_dp_internal (tdpbssd, dst, src1, src2)
|
||||
#define _tile_dpbsud(dst,src1,src2) _tile_int8_dp_internal (tdpbsud, dst, src1, src2)
|
||||
#define _tile_dpbusd(dst,src1,src2) _tile_int8_dp_internal (tdpbusd, dst, src1, src2)
|
||||
#define _tile_dpbuud(dst,src1,src2) _tile_int8_dp_internal (tdpbuud, dst, src1, src2)
|
||||
#endif
|
||||
#ifdef __DISABLE_AMX_INT8__
|
||||
#undef __DISABLE_AMX_INT8__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
46
third_party/intel/amxtileintrin.internal.h
vendored
Normal file
46
third_party/intel/amxtileintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AMXTILEINTRIN_H_INCLUDED
|
||||
#define _AMXTILEINTRIN_H_INCLUDED
|
||||
#if !defined(__AMX_TILE__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-tile")
|
||||
#define __DISABLE_AMX_TILE__
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__AMX_TILE__)
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_loadconfig (const void *__config)
|
||||
{
|
||||
__asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_storeconfig (void *__config)
|
||||
{
|
||||
__asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_release (void)
|
||||
{
|
||||
__asm__ volatile ("tilerelease" ::);
|
||||
}
|
||||
#define _tile_loadd(dst,base,stride) _tile_loadd_internal (dst, base, stride)
|
||||
#define _tile_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
|
||||
#define _tile_stream_loadd(dst,base,stride) _tile_stream_loadd_internal (dst, base, stride)
|
||||
#define _tile_stream_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
|
||||
#define _tile_stored(dst,base,stride) _tile_stored_internal (dst, base, stride)
|
||||
#define _tile_stored_internal(src,base,stride) __asm__ volatile ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" :: "r" ((void*) base), "r" ((long) stride) : "memory")
|
||||
#define _tile_zero(dst) _tile_zero_internal (dst)
|
||||
#define _tile_zero_internal(dst) __asm__ volatile ("tilezero\t%%tmm"#dst ::)
|
||||
#endif
|
||||
#ifdef __DISABLE_AMX_TILE__
|
||||
#undef __DISABLE_AMX_TILE__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
2100
third_party/intel/avx2intrin.internal.h
vendored
2100
third_party/intel/avx2intrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
246
third_party/intel/avx5124fmapsintrin.internal.h
vendored
246
third_party/intel/avx5124fmapsintrin.internal.h
vendored
|
@ -1,112 +1,180 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||
#define _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX5124FMAPS__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx5124fmaps")
|
||||
#define __DISABLE_AVX5124FMAPS__
|
||||
#endif /* __AVX5124FMAPS__ */
|
||||
|
||||
__funline __m512 _mm512_4fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
||||
__m512 __E, __m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps((__v16sf)__B, (__v16sf)__C,
|
||||
(__v16sf)__D, (__v16sf)__E,
|
||||
(__v16sf)__A, (const __v4sf *)__F);
|
||||
#endif
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_4fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_4fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fmadd_ps (__mmask16 __U,
|
||||
__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_4fmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
|
||||
__m128 __E, __m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
|
||||
(__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_mask_4fmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_maskz_4fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) _mm_setzero_ps (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_4fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
||||
__m512 __E, __m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps((__v16sf)__B, (__v16sf)__C,
|
||||
(__v16sf)__D, (__v16sf)__E,
|
||||
(__v16sf)__A, (const __v4sf *)__F);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fnmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_4fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_4fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fnmadd_ps (__mmask16 __U,
|
||||
__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_4fnmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
|
||||
__m128 __E, __m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
|
||||
(__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fnmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_mask_4fnmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fnmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_maskz_4fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) _mm_setzero_ps (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX5124FMAPS__
|
||||
#undef __DISABLE_AVX5124FMAPS__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX5124FMAPS__ */
|
||||
|
||||
#endif /* _AVX5124FMAPSINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
137
third_party/intel/avx5124vnniwintrin.internal.h
vendored
137
third_party/intel/avx5124vnniwintrin.internal.h
vendored
|
@ -1,69 +1,102 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||
#define _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX5124VNNIW__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx5124vnniw")
|
||||
#define __DISABLE_AVX5124VNNIW__
|
||||
#endif /* __AVX5124VNNIW__ */
|
||||
|
||||
__funline __m512i _mm512_4dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd((__v16si)__B, (__v16si)__C,
|
||||
(__v16si)__D, (__v16si)__E,
|
||||
(__v16si)__A, (const __v4si *)__F);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_4dpwssd_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B, __m512i __C, __m512i __D,
|
||||
__m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssd_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_4dpwssd_epi32(__mmask16 __U, __m512i __A,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssd_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_4dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds((__v16si)__B, (__v16si)__C,
|
||||
(__v16si)__D, (__v16si)__E,
|
||||
(__v16si)__A, (const __v4si *)__F);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_4dpwssds_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssds_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_4dpwssds_epi32(__mmask16 __U, __m512i __A,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssds_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX5124VNNIW__
|
||||
#undef __DISABLE_AVX5124VNNIW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX5124VNNIW__ */
|
||||
|
||||
#endif /* _AVX5124VNNIWINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
74
third_party/intel/avx512bf16intrin.internal.h
vendored
Normal file
74
third_party/intel/avx512bf16intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,74 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVX512BF16INTRIN_H_INCLUDED
|
||||
#define _AVX512BF16INTRIN_H_INCLUDED
|
||||
#ifndef __AVX512BF16__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bf16")
|
||||
#define __DISABLE_AVX512BF16__
|
||||
#endif
|
||||
typedef short __v32bh __attribute__ ((__vector_size__ (64)));
|
||||
typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
|
||||
}
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
|
||||
}
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_cvtneps_pbh (__m512 __A)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
|
||||
}
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
|
||||
}
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512BF16__
|
||||
#undef __DISABLE_AVX512BF16__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
130
third_party/intel/avx512bf16vlintrin.internal.h
vendored
Normal file
130
third_party/intel/avx512bf16vlintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,130 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
|
||||
#define _AVX512BF16VLINTRIN_H_INCLUDED
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bf16,avx512vl")
|
||||
#define __DISABLE_AVX512BF16VL__
|
||||
#endif
|
||||
typedef short __v16bh __attribute__ ((__vector_size__ (32)));
|
||||
typedef short __v8bh __attribute__ ((__vector_size__ (16)));
|
||||
typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
|
||||
typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtneps_pbh (__m256 __A)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtneps_pbh (__m128 __A)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512BF16VL__
|
||||
#undef __DISABLE_AVX512BF16VL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
293
third_party/intel/avx512bitalgintrin.internal.h
vendored
293
third_party/intel/avx512bitalgintrin.internal.h
vendored
|
@ -1,172 +1,231 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512BITALGINTRIN_H_INCLUDED
|
||||
#define _AVX512BITALGINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512BITALG__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg")
|
||||
#define __DISABLE_AVX512BITALG__
|
||||
#endif /* __AVX512BITALG__ */
|
||||
|
||||
__funline __m512i _mm512_popcnt_epi8(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi((__v64qi)__A);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi8 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_popcnt_epi16(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi((__v32hi)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi16 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALG__
|
||||
#undef __DISABLE_AVX512BITALG__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALG__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512bw")
|
||||
#define __DISABLE_AVX512BITALGBW__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
__funline __m512i _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
|
||||
(__v64qi)__A, (__v64qi)_mm512_setzero_si512(), (__mmask64)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask64) __U);
|
||||
}
|
||||
__funline __m512i _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__B, (__mmask32)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
|
||||
(__v32hi) __W,
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)_mm512_setzero_si512(), (__mmask32)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
|
||||
(__v32hi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
__funline __mmask64 _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
|
||||
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
|
||||
extern __inline __mmask64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
__funline __mmask64 _mm512_mask_bitshuffle_epi64_mask(__mmask64 __M, __m512i __A,
|
||||
__m512i __B) {
|
||||
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__M);
|
||||
extern __inline __mmask64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALGBW__
|
||||
#undef __DISABLE_AVX512BITALGBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGBW__ */
|
||||
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || \
|
||||
!defined(__AVX512BW__)
|
||||
#endif
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512vl,avx512bw")
|
||||
#define __DISABLE_AVX512BITALGVLBW__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
__funline __m256i _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
|
||||
(__v32qi)__A, (__v32qi)_mm256_setzero_si256(), (__mmask32)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
__funline __mmask32 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
|
||||
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
|
||||
extern __inline __mmask32
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
__funline __mmask32 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __M, __m256i __A,
|
||||
__m256i __B) {
|
||||
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__M);
|
||||
extern __inline __mmask32
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALGVLBW__
|
||||
#undef __DISABLE_AVX512BITALGVLBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGVLBW__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512vl")
|
||||
#define __DISABLE_AVX512BITALGVL__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
__funline __mmask16 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
|
||||
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
|
||||
#endif
|
||||
extern __inline __mmask16
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __mmask16 _mm_mask_bitshuffle_epi64_mask(__mmask16 __M, __m128i __A,
|
||||
__m128i __B) {
|
||||
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__M);
|
||||
extern __inline __mmask16
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_popcnt_epi8(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi((__v32qi)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi8 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_popcnt_epi16(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi((__v16hi)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi16 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_popcnt_epi8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi((__v16qi)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi8 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_popcnt_epi16(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi((__v8hi)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi16 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
|
||||
(__v16hi)__A, (__v16hi)__B, (__mmask16)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
|
||||
(__v16hi) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
|
||||
(__v16hi)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
|
||||
(__v16hi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
|
||||
(__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
__funline __m128i _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask((__v8hi)__A, (__v8hi)__B,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
|
||||
(__v8hi) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask(
|
||||
(__v8hi)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
|
||||
(__v8hi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512BITALGVL__
|
||||
#undef __DISABLE_AVX512BITALGVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGBW__ */
|
||||
|
||||
#endif /* _AVX512BITALGINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
4770
third_party/intel/avx512bwintrin.internal.h
vendored
4770
third_party/intel/avx512bwintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
184
third_party/intel/avx512cdintrin.internal.h
vendored
184
third_party/intel/avx512cdintrin.internal.h
vendored
|
@ -1,100 +1,140 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512CDINTRIN_H_INCLUDED
|
||||
#define _AVX512CDINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512CD__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512cd")
|
||||
#define __DISABLE_AVX512CD__
|
||||
#endif /* __AVX512CD__ */
|
||||
|
||||
typedef long long __v8di __attribute__((__vector_size__(64)));
|
||||
typedef int __v16si __attribute__((__vector_size__(64)));
|
||||
|
||||
typedef long long __m512i __attribute__((__vector_size__(64), __may_alias__));
|
||||
typedef double __m512d __attribute__((__vector_size__(64), __may_alias__));
|
||||
|
||||
#endif
|
||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
__funline __m512i _mm512_conflict_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_conflict_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)__W, (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_conflict_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_conflict_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask((__v8di)__A, (__v8di)__W,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_lzcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_lzcnt_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask((__v8di)__A, (__v8di)__W,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_lzcnt_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_lzcnt_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask((__v16si)__A, (__v16si)__W,
|
||||
(__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_broadcastmb_epi64(__mmask8 __A) {
|
||||
return (__m512i)__builtin_ia32_broadcastmb512(__A);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_broadcastmw_epi32(__mmask16 __A) {
|
||||
return (__m512i)__builtin_ia32_broadcastmw512(__A);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512CD__
|
||||
#undef __DISABLE_AVX512CD__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512CD__ */
|
||||
|
||||
#endif /* _AVX512CDINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
3894
third_party/intel/avx512dqintrin.internal.h
vendored
3894
third_party/intel/avx512dqintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
536
third_party/intel/avx512erintrin.internal.h
vendored
536
third_party/intel/avx512erintrin.internal.h
vendored
|
@ -1,281 +1,357 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512ERINTRIN_H_INCLUDED
|
||||
#define _AVX512ERINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512ER__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512er")
|
||||
#define __DISABLE_AVX512ER__
|
||||
#endif /* __AVX512ER__ */
|
||||
|
||||
typedef double __v8df __attribute__((__vector_size__(64)));
|
||||
typedef float __v16sf __attribute__((__vector_size__(64)));
|
||||
|
||||
typedef float __m512 __attribute__((__vector_size__(64), __may_alias__));
|
||||
typedef double __m512d __attribute__((__vector_size__(64), __may_alias__));
|
||||
|
||||
#endif
|
||||
typedef double __v8df __attribute__ ((__vector_size__ (64)));
|
||||
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
|
||||
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m512d _mm512_exp2a23_round_pd(__m512d __A, int __R) {
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_mask_exp2a23_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_maskz_exp2a23_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_exp2a23_round_ps(__m512 __A, int __R) {
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_exp2a23_round_ps(__m512 __W, __mmask16 __U,
|
||||
__m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_exp2a23_round_ps(__mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_exp2ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_rcp28_round_pd(__m512d __A, int __R) {
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_mask_rcp28_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_maskz_rcp28_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_rcp28_round_ps(__m512 __A, int __R) {
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_rcp28_round_ps(__m512 __W, __mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_rcp28_round_ps(__mmask16 __U, __m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_rcp28_round_sd(__m128d __A, __m128d __B, int __R) {
|
||||
return (__m128d)__builtin_ia32_rcp28sd_round((__v2df)__B, (__v2df)__A, __R);
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_rcp28_round_ss(__m128 __A, __m128 __B, int __R) {
|
||||
return (__m128)__builtin_ia32_rcp28ss_round((__v4sf)__B, (__v4sf)__A, __R);
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||
__m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_rsqrt28_round_pd(__m512d __A, int __R) {
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||
__m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_mask_rsqrt28_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_maskz_rsqrt28_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_rsqrt28_round_ps(__m512 __A, int __R) {
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_rsqrt28_round_ps(__m512 __W, __mmask16 __U,
|
||||
__m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_rsqrt28_round_ps(__mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_rsqrt28_round_sd(__m128d __A, __m128d __B, int __R) {
|
||||
return (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)__B, (__v2df)__A, __R);
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) {
|
||||
return (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)__B, (__v4sf)__A, __R);
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||
__m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||
__m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_exp2a23_round_pd(A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_pd(U, A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_exp2a23_round_ps(A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_ps(U, A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_rcp28_round_pd(A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rcp28_round_pd(U, A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_rcp28_round_ps(A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rcp28_round_ps(U, A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_rsqrt28_round_pd(A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_rsqrt28_round_ps(A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_exp2a23_round_pd(A, C) __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) __builtin_ia32_exp2pd_mask(A, W, U, C)
|
||||
#define _mm512_maskz_exp2a23_round_pd(U, A, C) __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
#define _mm512_exp2a23_round_ps(A, C) __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) __builtin_ia32_exp2ps_mask(A, W, U, C)
|
||||
#define _mm512_maskz_exp2a23_round_ps(U, A, C) __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
#define _mm512_rcp28_round_pd(A, C) __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
#define _mm512_mask_rcp28_round_pd(W, U, A, C) __builtin_ia32_rcp28pd_mask(A, W, U, C)
|
||||
#define _mm512_maskz_rcp28_round_pd(U, A, C) __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
#define _mm512_rcp28_round_ps(A, C) __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
#define _mm512_mask_rcp28_round_ps(W, U, A, C) __builtin_ia32_rcp28ps_mask(A, W, U, C)
|
||||
#define _mm512_maskz_rcp28_round_ps(U, A, C) __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
#define _mm512_rsqrt28_round_pd(A, C) __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
|
||||
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
#define _mm512_rsqrt28_round_ps(A, C) __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
|
||||
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
#define _mm_rcp28_round_sd(A, B, R) __builtin_ia32_rcp28sd_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rcp28_round_sd(W, U, A, B, R) __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
|
||||
#define _mm_maskz_rcp28_round_sd(U, A, B, R) __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (R))
|
||||
#define _mm_rcp28_round_ss(A, B, R) __builtin_ia32_rcp28ss_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rcp28_round_ss(W, U, A, B, R) __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
|
||||
#define _mm_maskz_rcp28_round_ss(U, A, B, R) __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (R))
|
||||
#define _mm_rsqrt28_round_sd(A, B, R) __builtin_ia32_rsqrt28sd_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
|
||||
#define _mm_maskz_rsqrt28_round_sd(U, A, B, R) __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (R))
|
||||
#define _mm_rsqrt28_round_ss(A, B, R) __builtin_ia32_rsqrt28ss_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
|
||||
#define _mm_maskz_rsqrt28_round_ss(U, A, B, R) __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (R))
|
||||
#endif
|
||||
|
||||
#define _mm512_exp2a23_pd(A) \
|
||||
_mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_pd(W, U, A) \
|
||||
_mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_pd(U, A) \
|
||||
_mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_exp2a23_ps(A) \
|
||||
_mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_ps(W, U, A) \
|
||||
_mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_ps(U, A) \
|
||||
_mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rcp28_sd(W, U, A, B) _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_maskz_rcp28_sd(U, A, B) _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_mask_rcp28_ss(W, U, A, B) _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_maskz_rcp28_ss(U, A, B) _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_mask_rsqrt28_sd(W, U, A, B) _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_maskz_rsqrt28_sd(U, A, B) _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_mask_rsqrt28_ss(W, U, A, B) _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_maskz_rsqrt28_ss(U, A, B) _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_exp2a23_pd(A) _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_mask_exp2a23_pd(W, U, A) _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_exp2a23_pd(U, A) _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_exp2a23_ps(A) _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_mask_exp2a23_ps(W, U, A) _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_exp2a23_ps(U, A) _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_rcp28_pd(A) _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_pd(W, U, A) \
|
||||
_mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_pd(U, A) \
|
||||
_mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_pd(W, U, A) _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_rcp28_pd(U, A) _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_rcp28_ps(A) _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_ps(W, U, A) \
|
||||
_mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_ps(U, A) \
|
||||
_mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_pd(A) \
|
||||
_mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_pd(W, U, A) \
|
||||
_mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_pd(U, A) \
|
||||
_mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_ps(A) \
|
||||
_mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_ps(W, U, A) \
|
||||
_mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_ps(U, A) \
|
||||
_mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_sd(A, B) \
|
||||
__builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_ss(A, B) \
|
||||
__builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_sd(A, B) \
|
||||
__builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_ss(A, B) \
|
||||
__builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_ps(W, U, A) _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_rcp28_ps(U, A) _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_rsqrt28_pd(A) _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_mask_rsqrt28_pd(W, U, A) _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_rsqrt28_pd(U, A) _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_rsqrt28_ps(A) _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_mask_rsqrt28_ps(W, U, A) _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_rsqrt28_ps(U, A) _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_rcp28_sd(A, B) __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_rcp28_ss(A, B) __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_rsqrt28_sd(A, B) __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_rsqrt28_ss(A, B) __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#ifdef __DISABLE_AVX512ER__
|
||||
#undef __DISABLE_AVX512ER__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512ER__ */
|
||||
|
||||
#endif /* _AVX512ERINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
21582
third_party/intel/avx512fintrin.internal.h
vendored
21582
third_party/intel/avx512fintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
91
third_party/intel/avx512ifmaintrin.internal.h
vendored
91
third_party/intel/avx512ifmaintrin.internal.h
vendored
|
@ -1,53 +1,74 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512IFMAINTRIN_H_INCLUDED
|
||||
#define _AVX512IFMAINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512IFMA__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512ifma")
|
||||
#define __DISABLE_AVX512IFMA__
|
||||
#endif /* __AVX512IFMA__ */
|
||||
|
||||
__funline __m512i _mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_mask((__v8di)__X, (__v8di)__Y,
|
||||
(__v8di)__Z, (__mmask8)-1);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_mask((__v8di)__X, (__v8di)__Y,
|
||||
(__v8di)__Z, (__mmask8)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_mask(
|
||||
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
|
||||
(__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_mask(
|
||||
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
|
||||
(__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X,
|
||||
__m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_maskz(
|
||||
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X,
|
||||
__m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_maskz(
|
||||
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512IFMA__
|
||||
#undef __DISABLE_AVX512IFMA__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512IFMA__ */
|
||||
|
||||
#endif /* _AVX512IFMAINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
170
third_party/intel/avx512ifmavlintrin.internal.h
vendored
170
third_party/intel/avx512ifmavlintrin.internal.h
vendored
|
@ -1,88 +1,128 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
|
||||
#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
|
||||
#define _AVX512IFMAVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512ifma,avx512vl")
|
||||
#define __DISABLE_AVX512IFMAVL__
|
||||
#endif /* __AVX512IFMAVL__ */
|
||||
|
||||
__funline __m128i _mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_mask((__v2di)__X, (__v2di)__Y,
|
||||
(__v2di)__Z, (__mmask8)-1);
|
||||
#endif
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_mask((__v2di)__X, (__v2di)__Y,
|
||||
(__v2di)__Z, (__mmask8)-1);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_mask((__v4di)__X, (__v4di)__Y,
|
||||
(__v4di)__Z, (__mmask8)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_mask((__v4di)__X, (__v4di)__Y,
|
||||
(__v4di)__Z, (__mmask8)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_mask(
|
||||
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
|
||||
(__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_mask(
|
||||
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
|
||||
(__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_mask(
|
||||
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
|
||||
(__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_mask(
|
||||
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
|
||||
(__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
|
||||
__m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_maskz(
|
||||
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
|
||||
__m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_maskz(
|
||||
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X,
|
||||
__m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_maskz(
|
||||
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X,
|
||||
__m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_maskz(
|
||||
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512IFMAVL__
|
||||
#undef __DISABLE_AVX512IFMAVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512IFMAVL__ */
|
||||
|
||||
#endif /* _AVX512IFMAVLINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
294
third_party/intel/avx512pfintrin.internal.h
vendored
294
third_party/intel/avx512pfintrin.internal.h
vendored
|
@ -1,190 +1,170 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512PFINTRIN_H_INCLUDED
|
||||
#define _AVX512PFINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512PF__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512pf")
|
||||
#define __DISABLE_AVX512PF__
|
||||
#endif /* __AVX512PF__ */
|
||||
|
||||
typedef long long __v8di __attribute__((__vector_size__(64)));
|
||||
typedef int __v16si __attribute__((__vector_size__(64)));
|
||||
typedef long long __m512i __attribute__((__vector_size__(64), __may_alias__));
|
||||
#endif
|
||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline void _mm512_prefetch_i32gather_pd(__m256i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
|
||||
__hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i32gather_ps(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfdps((__mmask16)0xFFFF, (__v16si)__index, __addr,
|
||||
__scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i32gather_pd(__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i32gather_ps(__m512i __index,
|
||||
__mmask16 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfdps(__mask, (__v16si)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i64gather_pd(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i64gather_ps(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i64gather_pd(__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i64gather_ps(__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i32scatter_pd(void *__addr, __m256i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
|
||||
__hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i32scatter_ps(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfdps((__mmask16)0xFFFF, (__v16si)__index, __addr,
|
||||
__scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i32scatter_pd(void *__addr, __mmask8 __mask,
|
||||
__m256i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
|
||||
__m256i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i32scatter_ps(void *__addr, __mmask16 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfdps(__mask, (__v16si)__index, __addr, __scale,
|
||||
__hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i64scatter_pd(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i64scatter_ps(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i64scatter_pd(void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i64scatter_ps(void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
|
||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
|
||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
|
||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
|
||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AVX512PF__
|
||||
#undef __DISABLE_AVX512PF__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512PF__ */
|
||||
|
||||
#endif /* _AVX512PFINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
680
third_party/intel/avx512vbmi2intrin.internal.h
vendored
680
third_party/intel/avx512vbmi2intrin.internal.h
vendored
|
@ -1,381 +1,407 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED
|
||||
#define __AVX512VBMI2INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VBMI2__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi2")
|
||||
#define __DISABLE_AVX512VBMI2__
|
||||
#endif /* __AVX512VBMI2__ */
|
||||
|
||||
#endif
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m512i _mm512_shrdi_epi16(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi((__v32hi)__A, (__v32hi)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi16 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shrdi_epi32(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si((__v16si)__A, (__v16si)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi32 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shrdi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
|
||||
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__C,
|
||||
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdi_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
|
||||
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
|
||||
(__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__B,
|
||||
(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shrdi_epi64(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di((__v8di)__A, (__v8di)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi64 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shrdi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask((__v8di)__C, (__v8di)__D, __E,
|
||||
(__v8di)__A, (__mmask8)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__C, (__v8di) __D,
|
||||
__E, (__v8di) __A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask(
|
||||
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
|
||||
(__mmask8)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__B, (__v8di) __C,
|
||||
__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldi_epi16(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi((__v32hi)__A, (__v32hi)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi16 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldi_epi32(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si((__v16si)__A, (__v16si)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi32 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v16si ((__v16si)__A, (__v16si) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
|
||||
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__C,
|
||||
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldi_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
|
||||
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
|
||||
(__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__B,
|
||||
(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldi_epi64(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di((__v8di)__A, (__v8di)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi64 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v8di ((__v8di)__A, (__v8di) __B, __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask((__v8di)__C, (__v8di)__D, __E,
|
||||
(__v8di)__A, (__mmask8)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__C, (__v8di) __D,
|
||||
__E, (__v8di) __A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask(
|
||||
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
|
||||
(__mmask8)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__B, (__v8di) __C,
|
||||
__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
|
||||
}
|
||||
#else
|
||||
#define _mm512_shrdi_epi16(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)(__m512i)(B),(int)(C))
|
||||
#define _mm512_shrdi_epi32(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B),(int)(C))
|
||||
#define _mm512_mask_shrdi_epi32(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), \
|
||||
(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B))
|
||||
#define _mm512_maskz_shrdi_epi32(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), \
|
||||
(__v16si)(__m512i)(C),(int)(D), \
|
||||
(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A))
|
||||
#define _mm512_shrdi_epi64(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B),(int)(C))
|
||||
#define _mm512_mask_shrdi_epi64(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), \
|
||||
(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
|
||||
#define _mm512_maskz_shrdi_epi64(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), \
|
||||
(__v8di)(__m512i)(C),(int)(D), \
|
||||
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
|
||||
#define _mm512_shldi_epi16(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)(__m512i)(B),(int)(C))
|
||||
#define _mm512_shldi_epi32(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B),(int)(C))
|
||||
#define _mm512_mask_shldi_epi32(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), \
|
||||
(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B))
|
||||
#define _mm512_maskz_shldi_epi32(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), \
|
||||
(__v16si)(__m512i)(C),(int)(D), \
|
||||
(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A))
|
||||
#define _mm512_shldi_epi64(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B),(int)(C))
|
||||
#define _mm512_mask_shldi_epi64(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), \
|
||||
(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
|
||||
#define _mm512_maskz_shldi_epi64(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), \
|
||||
(__v8di)(__m512i)(C),(int)(D), \
|
||||
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
|
||||
#define _mm512_shrdi_epi16(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), (__v32hi)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_shrdi_epi32(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shrdi_epi32(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), (__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A), (__mmask16)(B)))
|
||||
#define _mm512_maskz_shrdi_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), (__v16si)(__m512i)(C),(int)(D), (__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)))
|
||||
#define _mm512_shrdi_epi64(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shrdi_epi64(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), (__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A), (__mmask8)(B)))
|
||||
#define _mm512_maskz_shrdi_epi64(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), (__v8di)(__m512i)(C),(int)(D), (__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A)))
|
||||
#define _mm512_shldi_epi16(A, B, C) ((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), (__v32hi)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_shldi_epi32(A, B, C) ((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shldi_epi32(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), (__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A), (__mmask16)(B)))
|
||||
#define _mm512_maskz_shldi_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), (__v16si)(__m512i)(C),(int)(D), (__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)))
|
||||
#define _mm512_shldi_epi64(A, B, C) ((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_shldi_epi64(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), (__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A), (__mmask8)(B)))
|
||||
#define _mm512_maskz_shldi_epi64(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), (__v8di)(__m512i)(C),(int)(D), (__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A)))
|
||||
#endif
|
||||
|
||||
__funline __m512i _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi16 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
(__v32hi) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdv_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi64 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v8di ((__v8di)__A, (__v8di) __B,
|
||||
(__v8di) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask((__v8di)__A, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz((__v8di)__B, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__A);
|
||||
}
|
||||
__funline __m512i _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi16 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
(__v32hi) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldv_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi64 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v8di ((__v8di)__A, (__v8di) __B,
|
||||
(__v8di) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_mask((__v8di)__A, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz((__v8di)__B, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI2__
|
||||
#undef __DISABLE_AVX512VBMI2__
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI2__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi2,avx512bw")
|
||||
#define __DISABLE_AVX512VBMI2BW__
|
||||
#endif /* __AVX512VBMI2BW__ */
|
||||
|
||||
__funline __m512i _mm512_mask_compress_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_compressqi512_mask((__v64qi)__C, (__v64qi)__A,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_compress_epi8(__mmask64 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_compressqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_compressstoreu_epi8(void *__A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
__builtin_ia32_compressstoreuqi512_mask((__v64qi *)__A, (__v64qi)__C,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_compress_epi16(__m512i __A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_compresshi512_mask((__v32hi)__C, (__v32hi)__A,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_compress_epi16(__mmask32 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_compresshi512_mask(
|
||||
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_compressstoreu_epi16(void *__A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
__builtin_ia32_compressstoreuhi512_mask((__v32hi *)__A, (__v32hi)__C,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_expand_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_expandqi512_mask((__v64qi)__C, (__v64qi)__A,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_expand_epi8(__mmask64 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_expandqi512_maskz(
|
||||
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_expandloadu_epi8(__m512i __A, __mmask64 __B,
|
||||
const void *__C) {
|
||||
return (__m512i)__builtin_ia32_expandloadqi512_mask(
|
||||
(const __v64qi *)__C, (__v64qi)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_expandloadu_epi8(__mmask64 __A, const void *__B) {
|
||||
return (__m512i)__builtin_ia32_expandloadqi512_maskz(
|
||||
(const __v64qi *)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_expand_epi16(__m512i __A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_expandhi512_mask((__v32hi)__C, (__v32hi)__A,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_expand_epi16(__mmask32 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_expandhi512_maskz(
|
||||
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_expandloadu_epi16(__m512i __A, __mmask32 __B,
|
||||
const void *__C) {
|
||||
return (__m512i)__builtin_ia32_expandloadhi512_mask(
|
||||
(const __v32hi *)__C, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_expandloadu_epi16(__mmask32 __A, const void *__B) {
|
||||
return (__m512i)__builtin_ia32_expandloadhi512_maskz(
|
||||
(const __v32hi *)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m512i _mm512_mask_shrdi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
|
||||
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdi_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
|
||||
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
|
||||
(__mmask32)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
|
||||
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldi_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
|
||||
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
|
||||
(__mmask32)__A);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_mask_shrdi_epi16(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), \
|
||||
(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B))
|
||||
#define _mm512_maskz_shrdi_epi16(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), \
|
||||
(__v32hi)(__m512i)(C),(int)(D), \
|
||||
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
|
||||
#define _mm512_mask_shldi_epi16(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), \
|
||||
(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B))
|
||||
#define _mm512_maskz_shldi_epi16(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), \
|
||||
(__v32hi)(__m512i)(C),(int)(D), \
|
||||
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
|
||||
#endif
|
||||
|
||||
__funline __m512i _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compress_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__C,
|
||||
(__v64qi)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdv_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz(
|
||||
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_compress_epi8 (__mmask64 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__B,
|
||||
(__v64qi)_mm512_setzero_si512 (), (__mmask64)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compressstoreu_epi8 (void * __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
__builtin_ia32_compressstoreuqi512_mask ((__v64qi *) __A, (__v64qi) __C,
|
||||
(__mmask64) __B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldv_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz(
|
||||
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compress_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__C,
|
||||
(__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_compress_epi16 (__mmask32 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__B,
|
||||
(__v32hi)_mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compressstoreu_epi16 (void * __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
__builtin_ia32_compressstoreuhi512_mask ((__v32hi *) __A, (__v32hi) __C,
|
||||
(__mmask32) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expand_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __C,
|
||||
(__v64qi) __A,
|
||||
(__mmask64) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expand_epi8 (__mmask64 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_maskz ((__v64qi) __B,
|
||||
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expandloadu_epi8 (__m512i __A, __mmask64 __B, const void * __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *) __C,
|
||||
(__v64qi) __A, (__mmask64) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expandloadu_epi8 (__mmask64 __A, const void * __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_maskz ((const __v64qi *) __B,
|
||||
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expand_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __C,
|
||||
(__v32hi) __A,
|
||||
(__mmask32) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expand_epi16 (__mmask32 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_maskz ((__v32hi) __B,
|
||||
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expandloadu_epi16 (__m512i __A, __mmask32 __B, const void * __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *) __C,
|
||||
(__v32hi) __A, (__mmask32) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expandloadu_epi16 (__mmask32 __A, const void * __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_maskz ((const __v32hi *) __B,
|
||||
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
|
||||
}
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__C,
|
||||
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__B,
|
||||
(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__C,
|
||||
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__B,
|
||||
(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
#else
|
||||
#define _mm512_mask_shrdi_epi16(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), (__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A), (__mmask32)(B)))
|
||||
#define _mm512_maskz_shrdi_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), (__v32hi)(__m512i)(C),(int)(D), (__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A)))
|
||||
#define _mm512_mask_shldi_epi16(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), (__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A), (__mmask32)(B)))
|
||||
#define _mm512_maskz_shldi_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), (__v32hi)(__m512i)(C),(int)(D), (__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A)))
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask ((__v32hi)__A,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz ((__v32hi)__B,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask ((__v32hi)__A,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz ((__v32hi)__B,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI2BW__
|
||||
#undef __DISABLE_AVX512VBMI2BW__
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI2BW__ */
|
||||
|
||||
#endif /* __AVX512VBMI2INTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
1248
third_party/intel/avx512vbmi2vlintrin.internal.h
vendored
1248
third_party/intel/avx512vbmi2vlintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
170
third_party/intel/avx512vbmiintrin.internal.h
vendored
170
third_party/intel/avx512vbmiintrin.internal.h
vendored
|
@ -1,90 +1,124 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VBMIINTRIN_H_INCLUDED
|
||||
#define _AVX512VBMIINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512VBMI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi")
|
||||
#define __DISABLE_AVX512VBMI__
|
||||
#endif /* __AVX512VBMI__ */
|
||||
|
||||
__funline __m512i _mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)__W, (__mmask64)__M);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X,
|
||||
__m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_setzero_si512(),
|
||||
(__mmask64)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_undefined_epi32(),
|
||||
(__mmask64)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi)
|
||||
_mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_undefined_epi32(),
|
||||
(__mmask64)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_setzero_si512(),
|
||||
(__mmask64)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512(),
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M,
|
||||
__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)__W, (__mmask64)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_permutex2var_epi8(__m512i __A, __m512i __I,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||
,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U,
|
||||
__m512i __I, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
|
||||
__m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||
,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I,
|
||||
__mmask64 __U, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermi2varqi512_mask((__v64qi)__A,
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__B,
|
||||
(__mmask64)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
|
||||
__mmask64 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
|
||||
(__v64qi) __I
|
||||
,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A,
|
||||
__m512i __I, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_maskz(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
|
||||
__m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
|
||||
,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI__
|
||||
#undef __DISABLE_AVX512VBMI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI__ */
|
||||
|
||||
#endif /* _AVX512VBMIINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
326
third_party/intel/avx512vbmivlintrin.internal.h
vendored
326
third_party/intel/avx512vbmivlintrin.internal.h
vendored
|
@ -1,159 +1,229 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VBMIVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi,avx512vl")
|
||||
#define __DISABLE_AVX512VBMIVL__
|
||||
#endif /* __AVX512VBMIVL__ */
|
||||
|
||||
__funline __m256i _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)__W, (__mmask32)__M);
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X,
|
||||
__m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_setzero_si256(),
|
||||
(__mmask32)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_undefined_si256(),
|
||||
(__mmask32)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M,
|
||||
__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)__W, (__mmask16)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_undefined_si128(),
|
||||
(__mmask16)-1);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_undefined_si256(),
|
||||
(__mmask32)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_setzero_si256(),
|
||||
(__mmask32)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M,
|
||||
__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)__W, (__mmask32)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_permutexvar_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_undefined_si128(),
|
||||
(__mmask16)-1);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A,
|
||||
__m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M,
|
||||
__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)__W, (__mmask16)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_permutex2var_epi8(__m256i __A, __m256i __I,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||
,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U,
|
||||
__m256i __I, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
|
||||
__m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||
,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I,
|
||||
__mmask32 __U, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermi2varqi256_mask((__v32qi)__A,
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__B,
|
||||
(__mmask32)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
|
||||
__mmask32 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
|
||||
(__v32qi) __I
|
||||
,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A,
|
||||
__m256i __I, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_maskz(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
|
||||
__m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
|
||||
,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||
,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U,
|
||||
__m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||
,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I,
|
||||
__mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermi2varqi128_mask((__v16qi)__A,
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__B,
|
||||
(__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
|
||||
(__v16qi) __I
|
||||
,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A,
|
||||
__m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_maskz(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
|
||||
,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMIVL__
|
||||
#undef __DISABLE_AVX512VBMIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMIVL__ */
|
||||
|
||||
#endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
6789
third_party/intel/avx512vlbwintrin.internal.h
vendored
6789
third_party/intel/avx512vlbwintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
2791
third_party/intel/avx512vldqintrin.internal.h
vendored
2791
third_party/intel/avx512vldqintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
19470
third_party/intel/avx512vlintrin.internal.h
vendored
19470
third_party/intel/avx512vlintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
147
third_party/intel/avx512vnniintrin.internal.h
vendored
147
third_party/intel/avx512vnniintrin.internal.h
vendored
|
@ -1,87 +1,108 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VNNIINTRIN_H_INCLUDED
|
||||
#define __AVX512VNNIINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vnni")
|
||||
#define __DISABLE_AVX512VNNI__
|
||||
#endif /* __AVX512VNNI__ */
|
||||
|
||||
__funline __m512i _mm512_dpbusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbusd_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_dpbusd_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbusd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_dpbusd_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbusd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_dpbusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbusds_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_dpbusds_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbusds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_dpbusds_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbusds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_dpwssd_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpwssd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_dpwssd_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpwssd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_dpwssds_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpwssds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_dpwssds_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpwssds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VNNI__
|
||||
#undef __DISABLE_AVX512VNNI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VNNI__ */
|
||||
|
||||
#endif /* __AVX512VNNIINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
240
third_party/intel/avx512vnnivlintrin.internal.h
vendored
240
third_party/intel/avx512vnnivlintrin.internal.h
vendored
|
@ -1,154 +1,140 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#error "Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VNNIVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vnni,avx512vl")
|
||||
#define __DISABLE_AVX512VNNIVL__
|
||||
#endif /* __AVX512VNNIVL__ */
|
||||
|
||||
__funline __m256i _mm256_dpbusd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
#endif
|
||||
#define _mm256_dpbusd_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbusd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
||||
(__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_dpbusd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask((__v8si)__A, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__B);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_dpbusd_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
#define _mm_dpbusd_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbusd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
||||
(__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_dpbusd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_dpbusd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask((__v4si)__A, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__B);
|
||||
#define _mm256_dpbusds_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbusds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask ((__v8si)__A,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_dpbusd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_dpbusds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
#define _mm_dpbusds_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbusds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask ((__v4si)__A,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_dpbusds_epi32(__m256i __A, __mmask8 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask(
|
||||
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_dpbusds_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
#define _mm256_dpwssd_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpwssd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
||||
(__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_dpbusds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_dpbusds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask(
|
||||
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
|
||||
#define _mm_dpwssd_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpwssd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
||||
(__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_dpbusds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_dpwssd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
#define _mm256_dpwssds_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpwssds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask ((__v8si)__A,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_dpwssd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask((__v8si)__A, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__B);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_dpwssd_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
#define _mm_dpwssds_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpwssds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask ((__v4si)__A,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_dpwssd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_dpwssd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask((__v4si)__A, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_dpwssd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_dpwssds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_dpwssds_epi32(__m256i __A, __mmask8 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask(
|
||||
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_dpwssds_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_dpwssds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_dpwssds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask(
|
||||
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_dpwssds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpwssds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512VNNIVL__
|
||||
#undef __DISABLE_AVX512VNNIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VNNIVL__ */
|
||||
#endif /* __DISABLE_AVX512VNNIVL__ */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
32
third_party/intel/avx512vp2intersectintrin.internal.h
vendored
Normal file
32
third_party/intel/avx512vp2intersectintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
|
||||
#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
|
||||
#if !defined(__AVX512VP2INTERSECT__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vp2intersect")
|
||||
#define __DISABLE_AVX512VP2INTERSECT__
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_2intersect_epi32 (__m512i __A, __m512i __B, __mmask16 *__U,
|
||||
__mmask16 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd512 (__U, __M, (__v16si) __A, (__v16si) __B);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_2intersect_epi64 (__m512i __A, __m512i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq512 (__U, __M, (__v8di) __A, (__v8di) __B);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512VP2INTERSECT__
|
||||
#undef __DISABLE_AVX512VP2INTERSECT__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
44
third_party/intel/avx512vp2intersectvlintrin.internal.h
vendored
Normal file
44
third_party/intel/avx512vp2intersectvlintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
|
||||
#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vp2intersect,avx512vl")
|
||||
#define __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_2intersect_epi32 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd128 (__U, __M, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_2intersect_epi32 (__m256i __A, __m256i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd256 (__U, __M, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_2intersect_epi64 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq128 (__U, __M, (__v2di) __A, (__v2di) __B);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_2intersect_epi64 (__m256i __A, __m256i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq256 (__U, __M, (__v4di) __A, (__v4di) __B);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#undef __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
|
@ -1,50 +1,64 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
||||
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512VPOPCNTDQ__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vpopcntdq")
|
||||
#define __DISABLE_AVX512VPOPCNTDQ__
|
||||
#endif /* __AVX512VPOPCNTDQ__ */
|
||||
|
||||
__funline __m512i _mm512_popcnt_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si((__v16si)__A);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si ((__v16si) __A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_popcnt_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__B, (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_popcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di((__v8di)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di ((__v8di) __A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_popcnt_epi64(__m512i __A, __mmask8 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask((__v8di)__A, (__v8di)__B,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VPOPCNTDQ__
|
||||
#undef __DISABLE_AVX512VPOPCNTDQ__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VPOPCNTDQ__ */
|
||||
|
||||
#endif /* _AVX512VPOPCNTDQINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
142
third_party/intel/avx512vpopcntdqvlintrin.internal.h
vendored
142
third_party/intel/avx512vpopcntdqvlintrin.internal.h
vendored
|
@ -1,78 +1,110 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
||||
# error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vpopcntdq,avx512vl")
|
||||
#define __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#endif /* __AVX512VPOPCNTDQVL__ */
|
||||
|
||||
__funline __m128i _mm_popcnt_epi32(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si((__v4si)__A);
|
||||
#endif
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi32 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_popcnt_epi32(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask((__v4si)__A, (__v4si)__B,
|
||||
(__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi32 (__m128i __W, __mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
|
||||
(__v4si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_popcnt_epi32(__mmask16 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask(
|
||||
(__v4si)__A, (__v4si)_mm_setzero_si128(), (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
|
||||
(__v4si)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_popcnt_epi32(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si((__v8si)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi32 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_popcnt_epi32(__m256i __A, __mmask16 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask((__v8si)__A, (__v8si)__B,
|
||||
(__mmask16)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi32 (__m256i __W, __mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
|
||||
(__v8si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_popcnt_epi32(__mmask16 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask(
|
||||
(__v8si)__A, (__v8si)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
|
||||
(__v8si)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_popcnt_epi64(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di((__v2di)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi64 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_popcnt_epi64(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask((__v2di)__A, (__v2di)__B,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
|
||||
(__v2di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask(
|
||||
(__v2di)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
|
||||
(__v2di)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_popcnt_epi64(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di((__v4di)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi64 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_popcnt_epi64(__m256i __A, __mmask8 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask((__v4di)__A, (__v4di)__B,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
|
||||
(__v4di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask(
|
||||
(__v4di)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
|
||||
(__v4di)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#undef __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */
|
||||
|
||||
#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
1738
third_party/intel/avxintrin.internal.h
vendored
1738
third_party/intel/avxintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
82
third_party/intel/avxvnniintrin.internal.h
vendored
Normal file
82
third_party/intel/avxvnniintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avxvnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVXVNNIINTRIN_H_INCLUDED
|
||||
#define _AVXVNNIINTRIN_H_INCLUDED
|
||||
#if !defined(__AVXVNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avxvnni")
|
||||
#define __DISABLE_AVXVNNIVL__
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbusd_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbusd_avx_epi32(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbusds_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbusds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpwssd_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpwssd_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpwssds_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpwssds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
#ifdef __DISABLE_AVXVNNIVL__
|
||||
#undef __DISABLE_AVXVNNIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
103
third_party/intel/bmi2intrin.internal.h
vendored
103
third_party/intel/bmi2intrin.internal.h
vendored
|
@ -1,67 +1,74 @@
|
|||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <bmi2intrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _BMI2INTRIN_H_INCLUDED
|
||||
#define _BMI2INTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __BMI2__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("bmi2")
|
||||
#define __DISABLE_BMI2__
|
||||
#endif /* __BMI2__ */
|
||||
|
||||
__funline unsigned int _bzhi_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_bzhi_si(__X, __Y);
|
||||
#endif
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_si (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned int _pdep_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_pdep_si(__X, __Y);
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_pdep_si (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned int _pext_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_pext_si(__X, __Y);
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_pext_si (__X, __Y);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
__funline unsigned long long _bzhi_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_bzhi_di(__X, __Y);
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_di (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned long long _pdep_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_pdep_di(__X, __Y);
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_pdep_di (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned long long _pext_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_pext_di(__X, __Y);
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_pext_di (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned long long _mulx_u64(unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
unsigned __int128 __res = (unsigned __int128)__X * __Y;
|
||||
*__P = (unsigned long long)(__res >> 64);
|
||||
return (unsigned long long)__res;
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
|
||||
unsigned long long *__P)
|
||||
{
|
||||
unsigned __int128 __res = (unsigned __int128) __X * __Y;
|
||||
*__P = (unsigned long long) (__res >> 64);
|
||||
return (unsigned long long) __res;
|
||||
}
|
||||
|
||||
#else /* !__x86_64__ */
|
||||
|
||||
__funline unsigned int _mulx_u32(unsigned int __X, unsigned int __Y,
|
||||
unsigned int *__P) {
|
||||
unsigned long long __res = (unsigned long long)__X * __Y;
|
||||
*__P = (unsigned int)(__res >> 32);
|
||||
return (unsigned int)__res;
|
||||
#else
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
unsigned long long __res = (unsigned long long) __X * __Y;
|
||||
*__P = (unsigned int) (__res >> 32);
|
||||
return (unsigned int) __res;
|
||||
}
|
||||
|
||||
#endif /* !__x86_64__ */
|
||||
|
||||
#endif
|
||||
#ifdef __DISABLE_BMI2__
|
||||
#undef __DISABLE_BMI2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_BMI2__ */
|
||||
|
||||
#endif /* _BMI2INTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
211
third_party/intel/bmiintrin.internal.h
vendored
211
third_party/intel/bmiintrin.internal.h
vendored
|
@ -1,160 +1,135 @@
|
|||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <bmiintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _BMIINTRIN_H_INCLUDED
|
||||
#define _BMIINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __BMI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("bmi")
|
||||
#define __DISABLE_BMI__
|
||||
#endif /* __BMI__ */
|
||||
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u16(unsigned short __X) {
|
||||
return __builtin_ia32_tzcnt_u16(__X);
|
||||
#endif
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u16 (unsigned short __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u16 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u32(unsigned int __X, unsigned int __Y) {
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_bextr_u32(__X, __Y);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_bextr_u32 (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u32(unsigned int __X, unsigned int __Y, unsigned __Z) {
|
||||
return __builtin_ia32_bextr_u32(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
|
||||
{
|
||||
return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u32(unsigned int __X) {
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u32 (unsigned int __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u32(unsigned int __X) {
|
||||
return __blsi_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsi_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u32(unsigned int __X) {
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u32 (unsigned int __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u32(unsigned int __X) {
|
||||
return __blsmsk_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsmsk_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u32(unsigned int __X) {
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u32 (unsigned int __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u32(unsigned int __X) {
|
||||
return __blsr_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsr_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ia32_tzcnt_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ia32_tzcnt_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
return __builtin_ia32_bextr_u64(__X, __Y);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_bextr_u64 (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) {
|
||||
return __builtin_ia32_bextr_u64(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
|
||||
{
|
||||
return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u64(unsigned long long __X) {
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u64(unsigned long long __X) {
|
||||
return __blsi_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsi_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u64(unsigned long long __X) {
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u64(unsigned long long __X) {
|
||||
return __blsmsk_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsmsk_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u64(unsigned long long __X) {
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u64(unsigned long long __X) {
|
||||
return __blsr_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsr_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ia32_tzcnt_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ia32_tzcnt_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#endif
|
||||
#ifdef __DISABLE_BMI__
|
||||
#undef __DISABLE_BMI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_BMI__ */
|
||||
|
||||
#endif /* _BMIINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
108
third_party/intel/cetintrin.internal.h
vendored
108
third_party/intel/cetintrin.internal.h
vendored
|
@ -1,73 +1,95 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <cetintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <cetintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CETINTRIN_H_INCLUDED
|
||||
#define _CETINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __SHSTK__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("shstk")
|
||||
#pragma GCC target ("shstk")
|
||||
#define __DISABLE_SHSTK__
|
||||
#endif /* __SHSTK__ */
|
||||
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
__funline unsigned long long _get_ssp(void) {
|
||||
return __builtin_ia32_rdsspq();
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_get_ssp (void)
|
||||
{
|
||||
return __builtin_ia32_rdsspq ();
|
||||
}
|
||||
#else
|
||||
__funline unsigned int _get_ssp(void) {
|
||||
return __builtin_ia32_rdsspd();
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_get_ssp (void)
|
||||
{
|
||||
return __builtin_ia32_rdsspd ();
|
||||
}
|
||||
#endif
|
||||
|
||||
__funline void _inc_ssp(unsigned int __B) {
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_inc_ssp (unsigned int __B)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
__builtin_ia32_incsspq((unsigned long long)__B);
|
||||
__builtin_ia32_incsspq ((unsigned long long) __B);
|
||||
#else
|
||||
__builtin_ia32_incsspd(__B);
|
||||
__builtin_ia32_incsspd (__B);
|
||||
#endif
|
||||
}
|
||||
|
||||
__funline void _saveprevssp(void) {
|
||||
__builtin_ia32_saveprevssp();
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_saveprevssp (void)
|
||||
{
|
||||
__builtin_ia32_saveprevssp ();
|
||||
}
|
||||
|
||||
__funline void _rstorssp(void *__B) {
|
||||
__builtin_ia32_rstorssp(__B);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rstorssp (void *__B)
|
||||
{
|
||||
__builtin_ia32_rstorssp (__B);
|
||||
}
|
||||
|
||||
__funline void _wrssd(unsigned int __B, void *__C) {
|
||||
__builtin_ia32_wrssd(__B, __C);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrssd (unsigned int __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrssd (__B, __C);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void _wrssq(unsigned long long __B, void *__C) {
|
||||
__builtin_ia32_wrssq(__B, __C);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrssq (unsigned long long __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrssq (__B, __C);
|
||||
}
|
||||
#endif
|
||||
|
||||
__funline void _wrussd(unsigned int __B, void *__C) {
|
||||
__builtin_ia32_wrussd(__B, __C);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrussd (unsigned int __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrussd (__B, __C);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void _wrussq(unsigned long long __B, void *__C) {
|
||||
__builtin_ia32_wrussq(__B, __C);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrussq (unsigned long long __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrussq (__B, __C);
|
||||
}
|
||||
#endif
|
||||
|
||||
__funline void _setssbsy(void) {
|
||||
__builtin_ia32_setssbsy();
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_setssbsy (void)
|
||||
{
|
||||
__builtin_ia32_setssbsy ();
|
||||
}
|
||||
|
||||
__funline void _clrssbsy(void *__B) {
|
||||
__builtin_ia32_clrssbsy(__B);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_clrssbsy (void *__B)
|
||||
{
|
||||
__builtin_ia32_clrssbsy (__B);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SHSTK__
|
||||
#undef __DISABLE_SHSTK__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SHSTK__ */
|
||||
|
||||
#endif /* _CETINTRIN_H_INCLUDED. */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
23
third_party/intel/cldemoteintrin.internal.h
vendored
23
third_party/intel/cldemoteintrin.internal.h
vendored
|
@ -1,21 +1,24 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <cldemoteintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <cldemoteintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLDEMOTE_H_INCLUDED
|
||||
#define _CLDEMOTE_H_INCLUDED
|
||||
|
||||
#ifndef __CLDEMOTE__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("cldemote")
|
||||
#define __DISABLE_CLDEMOTE__
|
||||
#endif /* __CLDEMOTE__ */
|
||||
__funline void _cldemote(void *__A) {
|
||||
__builtin_ia32_cldemote(__A);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cldemote (void *__A)
|
||||
{
|
||||
__builtin_ia32_cldemote (__A);
|
||||
}
|
||||
#ifdef __DISABLE_CLDEMOTE__
|
||||
#undef __DISABLE_CLDEMOTE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLDEMOTE__ */
|
||||
|
||||
#endif /* _CLDEMOTE_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
25
third_party/intel/clflushoptintrin.internal.h
vendored
25
third_party/intel/clflushoptintrin.internal.h
vendored
|
@ -1,23 +1,24 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <clflushoptintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <clflushoptintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
|
||||
#define _CLFLUSHOPTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __CLFLUSHOPT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clflushopt")
|
||||
#define __DISABLE_CLFLUSHOPT__
|
||||
#endif /* __CLFLUSHOPT__ */
|
||||
|
||||
__funline void _mm_clflushopt(void *__A) {
|
||||
__builtin_ia32_clflushopt(__A);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clflushopt (void *__A)
|
||||
{
|
||||
__builtin_ia32_clflushopt (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLFLUSHOPT__
|
||||
#undef __DISABLE_CLFLUSHOPT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLFLUSHOPT__ */
|
||||
|
||||
#endif /* _CLFLUSHOPTINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
25
third_party/intel/clwbintrin.internal.h
vendored
25
third_party/intel/clwbintrin.internal.h
vendored
|
@ -1,23 +1,24 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <clwbintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLWBINTRIN_H_INCLUDED
|
||||
#define _CLWBINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __CLWB__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clwb")
|
||||
#define __DISABLE_CLWB__
|
||||
#endif /* __CLWB__ */
|
||||
|
||||
__funline void _mm_clwb(void *__A) {
|
||||
__builtin_ia32_clwb(__A);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clwb (void *__A)
|
||||
{
|
||||
__builtin_ia32_clwb (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLWB__
|
||||
#undef __DISABLE_CLWB__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLWB__ */
|
||||
|
||||
#endif /* _CLWBINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
21
third_party/intel/clzerointrin.internal.h
vendored
21
third_party/intel/clzerointrin.internal.h
vendored
|
@ -1,21 +1,20 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _CLZEROINTRIN_H_INCLUDED
|
||||
#define _CLZEROINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
|
||||
#ifndef __CLZERO__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clzero")
|
||||
#define __DISABLE_CLZERO__
|
||||
#endif /* __CLZERO__ */
|
||||
|
||||
__funline void _mm_clzero(void* __I) {
|
||||
__builtin_ia32_clzero(__I);
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clzero (void * __I)
|
||||
{
|
||||
__builtin_ia32_clzero (__I);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLZERO__
|
||||
#undef __DISABLE_CLZERO__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLZERO__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _CLZEROINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
317
third_party/intel/cpuid.internal.h
vendored
317
third_party/intel/cpuid.internal.h
vendored
|
@ -1,237 +1,220 @@
|
|||
#ifndef COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_
|
||||
#ifdef __x86_64__
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
#define bit_SSE3 (1 << 0)
|
||||
#define bit_PCLMUL (1 << 1)
|
||||
#define bit_LZCNT (1 << 5)
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#define bit_FMA (1 << 12)
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _CPUID_H_INCLUDED
|
||||
#define _CPUID_H_INCLUDED
|
||||
#define bit_AVXVNNI (1 << 4)
|
||||
#define bit_AVX512BF16 (1 << 5)
|
||||
#define bit_HRESET (1 << 22)
|
||||
#define bit_SSE3 (1 << 0)
|
||||
#define bit_PCLMUL (1 << 1)
|
||||
#define bit_LZCNT (1 << 5)
|
||||
#define bit_SSSE3 (1 << 9)
|
||||
#define bit_FMA (1 << 12)
|
||||
#define bit_CMPXCHG16B (1 << 13)
|
||||
#define bit_SSE4_1 (1 << 19)
|
||||
#define bit_SSE4_2 (1 << 20)
|
||||
#define bit_MOVBE (1 << 22)
|
||||
#define bit_POPCNT (1 << 23)
|
||||
#define bit_AES (1 << 25)
|
||||
#define bit_XSAVE (1 << 26)
|
||||
#define bit_OSXSAVE (1 << 27)
|
||||
#define bit_AVX (1 << 28)
|
||||
#define bit_F16C (1 << 29)
|
||||
#define bit_RDRND (1 << 30)
|
||||
|
||||
#define bit_SSE4_1 (1 << 19)
|
||||
#define bit_SSE4_2 (1 << 20)
|
||||
#define bit_MOVBE (1 << 22)
|
||||
#define bit_POPCNT (1 << 23)
|
||||
#define bit_AES (1 << 25)
|
||||
#define bit_XSAVE (1 << 26)
|
||||
#define bit_OSXSAVE (1 << 27)
|
||||
#define bit_AVX (1 << 28)
|
||||
#define bit_F16C (1 << 29)
|
||||
#define bit_RDRND (1 << 30)
|
||||
#define bit_CMPXCHG8B (1 << 8)
|
||||
#define bit_CMOV (1 << 15)
|
||||
#define bit_MMX (1 << 23)
|
||||
#define bit_FXSAVE (1 << 24)
|
||||
#define bit_SSE (1 << 25)
|
||||
#define bit_SSE2 (1 << 26)
|
||||
|
||||
#define bit_CMOV (1 << 15)
|
||||
#define bit_MMX (1 << 23)
|
||||
#define bit_FXSAVE (1 << 24)
|
||||
#define bit_SSE (1 << 25)
|
||||
#define bit_SSE2 (1 << 26)
|
||||
#define bit_LAHF_LM (1 << 0)
|
||||
#define bit_ABM (1 << 5)
|
||||
#define bit_SSE4a (1 << 6)
|
||||
#define bit_PRFCHW (1 << 8)
|
||||
#define bit_XOP (1 << 11)
|
||||
#define bit_LWP (1 << 15)
|
||||
#define bit_FMA4 (1 << 16)
|
||||
#define bit_TBM (1 << 21)
|
||||
#define bit_MWAITX (1 << 29)
|
||||
|
||||
#define bit_ABM (1 << 5)
|
||||
#define bit_SSE4a (1 << 6)
|
||||
#define bit_PRFCHW (1 << 8)
|
||||
#define bit_XOP (1 << 11)
|
||||
#define bit_LWP (1 << 15)
|
||||
#define bit_FMA4 (1 << 16)
|
||||
#define bit_TBM (1 << 21)
|
||||
#define bit_MWAITX (1 << 29)
|
||||
#define bit_MMXEXT (1 << 22)
|
||||
#define bit_LM (1 << 29)
|
||||
#define bit_LM (1 << 29)
|
||||
#define bit_3DNOWP (1 << 30)
|
||||
#define bit_3DNOW (1u << 31)
|
||||
|
||||
#define bit_CLZERO (1 << 0)
|
||||
#define bit_3DNOW (1u << 31)
|
||||
#define bit_CLZERO (1 << 0)
|
||||
#define bit_WBNOINVD (1 << 9)
|
||||
|
||||
#define bit_FSGSBASE (1 << 0)
|
||||
#define bit_SGX (1 << 2)
|
||||
#define bit_BMI (1 << 3)
|
||||
#define bit_HLE (1 << 4)
|
||||
#define bit_AVX2 (1 << 5)
|
||||
#define bit_BMI2 (1 << 8)
|
||||
#define bit_RTM (1 << 11)
|
||||
#define bit_MPX (1 << 14)
|
||||
#define bit_AVX512F (1 << 16)
|
||||
#define bit_AVX512DQ (1 << 17)
|
||||
#define bit_RDSEED (1 << 18)
|
||||
#define bit_ADX (1 << 19)
|
||||
#define bit_FSGSBASE (1 << 0)
|
||||
#define bit_SGX (1 << 2)
|
||||
#define bit_BMI (1 << 3)
|
||||
#define bit_HLE (1 << 4)
|
||||
#define bit_AVX2 (1 << 5)
|
||||
#define bit_BMI2 (1 << 8)
|
||||
#define bit_RTM (1 << 11)
|
||||
#define bit_MPX (1 << 14)
|
||||
#define bit_AVX512F (1 << 16)
|
||||
#define bit_AVX512DQ (1 << 17)
|
||||
#define bit_RDSEED (1 << 18)
|
||||
#define bit_ADX (1 << 19)
|
||||
#define bit_AVX512IFMA (1 << 21)
|
||||
#define bit_CLFLUSHOPT (1 << 23)
|
||||
#define bit_CLWB (1 << 24)
|
||||
#define bit_AVX512PF (1 << 26)
|
||||
#define bit_AVX512ER (1 << 27)
|
||||
#define bit_AVX512CD (1 << 28)
|
||||
#define bit_SHA (1 << 29)
|
||||
#define bit_AVX512BW (1 << 30)
|
||||
#define bit_AVX512VL (1u << 31)
|
||||
|
||||
#define bit_PREFETCHWT1 (1 << 0)
|
||||
#define bit_AVX512VBMI (1 << 1)
|
||||
#define bit_PKU (1 << 3)
|
||||
#define bit_OSPKE (1 << 4)
|
||||
#define bit_WAITPKG (1 << 5)
|
||||
#define bit_AVX512VBMI2 (1 << 6)
|
||||
#define bit_SHSTK (1 << 7)
|
||||
#define bit_GFNI (1 << 8)
|
||||
#define bit_VAES (1 << 9)
|
||||
#define bit_AVX512VNNI (1 << 11)
|
||||
#define bit_VPCLMULQDQ (1 << 10)
|
||||
#define bit_AVX512BITALG (1 << 12)
|
||||
#define bit_CLWB (1 << 24)
|
||||
#define bit_AVX512PF (1 << 26)
|
||||
#define bit_AVX512ER (1 << 27)
|
||||
#define bit_AVX512CD (1 << 28)
|
||||
#define bit_SHA (1 << 29)
|
||||
#define bit_AVX512BW (1 << 30)
|
||||
#define bit_AVX512VL (1u << 31)
|
||||
#define bit_PREFETCHWT1 (1 << 0)
|
||||
#define bit_AVX512VBMI (1 << 1)
|
||||
#define bit_PKU (1 << 3)
|
||||
#define bit_OSPKE (1 << 4)
|
||||
#define bit_WAITPKG (1 << 5)
|
||||
#define bit_AVX512VBMI2 (1 << 6)
|
||||
#define bit_SHSTK (1 << 7)
|
||||
#define bit_GFNI (1 << 8)
|
||||
#define bit_VAES (1 << 9)
|
||||
#define bit_AVX512VNNI (1 << 11)
|
||||
#define bit_VPCLMULQDQ (1 << 10)
|
||||
#define bit_AVX512BITALG (1 << 12)
|
||||
#define bit_AVX512VPOPCNTDQ (1 << 14)
|
||||
#define bit_RDPID (1 << 22)
|
||||
#define bit_MOVDIRI (1 << 27)
|
||||
#define bit_MOVDIR64B (1 << 28)
|
||||
#define bit_CLDEMOTE (1 << 25)
|
||||
|
||||
#define bit_RDPID (1 << 22)
|
||||
#define bit_MOVDIRI (1 << 27)
|
||||
#define bit_MOVDIR64B (1 << 28)
|
||||
#define bit_ENQCMD (1 << 29)
|
||||
#define bit_CLDEMOTE (1 << 25)
|
||||
#define bit_KL (1 << 23)
|
||||
#define bit_AVX5124VNNIW (1 << 2)
|
||||
#define bit_AVX5124FMAPS (1 << 3)
|
||||
#define bit_IBT (1 << 20)
|
||||
#define bit_PCONFIG (1 << 18)
|
||||
|
||||
#define bit_AVX512VP2INTERSECT (1 << 8)
|
||||
#define bit_IBT (1 << 20)
|
||||
#define bit_UINTR (1 << 5)
|
||||
#define bit_PCONFIG (1 << 18)
|
||||
#define bit_SERIALIZE (1 << 14)
|
||||
#define bit_TSXLDTRK (1 << 16)
|
||||
#define bit_AMX_BF16 (1 << 22)
|
||||
#define bit_AMX_TILE (1 << 24)
|
||||
#define bit_AMX_INT8 (1 << 25)
|
||||
#define bit_BNDREGS (1 << 3)
|
||||
#define bit_BNDCSR (1 << 4)
|
||||
|
||||
#define bit_BNDCSR (1 << 4)
|
||||
#define bit_XSAVEOPT (1 << 0)
|
||||
#define bit_XSAVEC (1 << 1)
|
||||
#define bit_XSAVES (1 << 3)
|
||||
|
||||
#define bit_XSAVEC (1 << 1)
|
||||
#define bit_XSAVES (1 << 3)
|
||||
#define bit_PTWRITE (1 << 4)
|
||||
|
||||
#define bit_AESKLE ( 1<<0 )
|
||||
#define bit_WIDEKL ( 1<<2 )
|
||||
#define signature_AMD_ebx 0x68747541
|
||||
#define signature_AMD_ecx 0x444d4163
|
||||
#define signature_AMD_edx 0x69746e65
|
||||
|
||||
#define signature_CENTAUR_ebx 0x746e6543
|
||||
#define signature_CENTAUR_ecx 0x736c7561
|
||||
#define signature_CENTAUR_edx 0x48727561
|
||||
|
||||
#define signature_CYRIX_ebx 0x69727943
|
||||
#define signature_CYRIX_ecx 0x64616574
|
||||
#define signature_CYRIX_edx 0x736e4978
|
||||
|
||||
#define signature_INTEL_ebx 0x756e6547
|
||||
#define signature_INTEL_ecx 0x6c65746e
|
||||
#define signature_INTEL_edx 0x49656e69
|
||||
|
||||
#define signature_TM1_ebx 0x6e617254
|
||||
#define signature_TM1_ecx 0x55504361
|
||||
#define signature_TM1_edx 0x74656d73
|
||||
|
||||
#define signature_TM2_ebx 0x756e6547
|
||||
#define signature_TM2_ecx 0x3638784d
|
||||
#define signature_TM2_edx 0x54656e69
|
||||
|
||||
#define signature_NSC_ebx 0x646f6547
|
||||
#define signature_NSC_ecx 0x43534e20
|
||||
#define signature_NSC_edx 0x79622065
|
||||
|
||||
#define signature_NEXGEN_ebx 0x4778654e
|
||||
#define signature_NEXGEN_ecx 0x6e657669
|
||||
#define signature_NEXGEN_edx 0x72446e65
|
||||
|
||||
#define signature_RISE_ebx 0x65736952
|
||||
#define signature_RISE_ecx 0x65736952
|
||||
#define signature_RISE_edx 0x65736952
|
||||
|
||||
#define signature_SIS_ebx 0x20536953
|
||||
#define signature_SIS_ecx 0x20536953
|
||||
#define signature_SIS_edx 0x20536953
|
||||
|
||||
#define signature_UMC_ebx 0x20434d55
|
||||
#define signature_UMC_ecx 0x20434d55
|
||||
#define signature_UMC_edx 0x20434d55
|
||||
|
||||
#define signature_VIA_ebx 0x20414956
|
||||
#define signature_VIA_ecx 0x20414956
|
||||
#define signature_VIA_edx 0x20414956
|
||||
|
||||
#define signature_VORTEX_ebx 0x74726f56
|
||||
#define signature_VORTEX_ecx 0x436f5320
|
||||
#define signature_VORTEX_edx 0x36387865
|
||||
|
||||
#ifndef __x86_64__
|
||||
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
do { \
|
||||
if (__builtin_constant_p(level) && (level) != 1) \
|
||||
__asm__("cpuid\n\t" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(level)); \
|
||||
else \
|
||||
__asm__("cpuid\n\t" \
|
||||
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \
|
||||
: "0"(level), "1"(0), "2"(0)); \
|
||||
} while (0)
|
||||
#define __cpuid(level, a, b, c, d) do { if (__builtin_constant_p (level) && (level) != 1) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level)); else __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level), "1" (0), "2" (0)); } while (0)
|
||||
#else
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__("cpuid\n\t" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(level))
|
||||
#define __cpuid(level, a, b, c, d) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level))
|
||||
#endif
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__("cpuid\n\t" \
|
||||
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \
|
||||
: "0"(level), "2"(count))
|
||||
|
||||
static __inline unsigned int __get_cpuid_max(unsigned int __ext,
|
||||
unsigned int *__sig) {
|
||||
#define __cpuid_count(level, count, a, b, c, d) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level), "2" (count))
|
||||
static __inline unsigned int
|
||||
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
|
||||
{
|
||||
unsigned int __eax, __ebx, __ecx, __edx;
|
||||
#ifndef __x86_64__
|
||||
#if __GNUC__ >= 3
|
||||
__asm__("pushf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"mov{l}\t{%0, %1|%1, %0}\n\t"
|
||||
"xor{l}\t{%2, %0|%0, %2}\n\t"
|
||||
"push{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
: "=&r"(__eax), "=&r"(__ebx)
|
||||
: "i"(0x00200000));
|
||||
__asm__ ("pushf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"mov{l}\t{%0, %1|%1, %0}\n\t"
|
||||
"xor{l}\t{%2, %0|%0, %2}\n\t"
|
||||
"push{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#else
|
||||
__asm__("pushfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"movl\t%0, %1\n\t"
|
||||
"xorl\t%2, %0\n\t"
|
||||
"pushl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
: "=&r"(__eax), "=&r"(__ebx)
|
||||
: "i"(0x00200000));
|
||||
__asm__ ("pushfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"movl\t%0, %1\n\t"
|
||||
"xorl\t%2, %0\n\t"
|
||||
"pushl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#endif
|
||||
if (!((__eax ^ __ebx) & 0x00200000)) return 0;
|
||||
if (!((__eax ^ __ebx) & 0x00200000))
|
||||
return 0;
|
||||
#endif
|
||||
__cpuid(__ext, __eax, __ebx, __ecx, __edx);
|
||||
if (__sig) *__sig = __ebx;
|
||||
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
|
||||
if (__sig)
|
||||
*__sig = __ebx;
|
||||
return __eax;
|
||||
}
|
||||
|
||||
static __inline int __get_cpuid(unsigned int __leaf, unsigned int *__eax,
|
||||
unsigned int *__ebx, unsigned int *__ecx,
|
||||
unsigned int *__edx) {
|
||||
static __inline int
|
||||
__get_cpuid (unsigned int __leaf,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
unsigned int __ext = __leaf & 0x80000000;
|
||||
unsigned int __maxlevel = __get_cpuid_max(__ext, 0);
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf) return 0;
|
||||
__cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf)
|
||||
return 0;
|
||||
__cpuid (__leaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static __inline int __get_cpuid_count(unsigned int __leaf,
|
||||
unsigned int __subleaf,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx,
|
||||
unsigned int *__edx) {
|
||||
static __inline int
|
||||
__get_cpuid_count (unsigned int __leaf, unsigned int __subleaf,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
unsigned int __ext = __leaf & 0x80000000;
|
||||
unsigned int __maxlevel = __get_cpuid_max(__ext, 0);
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf) return 0;
|
||||
__cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf)
|
||||
return 0;
|
||||
__cpuid_count (__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_ */
|
||||
static __inline void
|
||||
__cpuidex (int __cpuid_info[4], int __leaf, int __subleaf)
|
||||
{
|
||||
__cpuid_count (__leaf, __subleaf, __cpuid_info[0], __cpuid_info[1],
|
||||
__cpuid_info[2], __cpuid_info[3]);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
1879
third_party/intel/emmintrin.internal.h
vendored
1879
third_party/intel/emmintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
30
third_party/intel/enqcmdintrin.internal.h
vendored
Normal file
30
third_party/intel/enqcmdintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <enqcmdintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _ENQCMDINTRIN_H_INCLUDED
|
||||
#define _ENQCMDINTRIN_H_INCLUDED
|
||||
#ifndef __ENQCMD__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("enqcmd")
|
||||
#define __DISABLE_ENQCMD__
|
||||
#endif
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enqcmd (void * __P, const void * __Q)
|
||||
{
|
||||
return __builtin_ia32_enqcmd (__P, __Q);
|
||||
}
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enqcmds (void * __P, const void * __Q)
|
||||
{
|
||||
return __builtin_ia32_enqcmds (__P, __Q);
|
||||
}
|
||||
#ifdef __DISABLE_ENQCMD__
|
||||
#undef __DISABLE_ENQCMD__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
95
third_party/intel/f16cintrin.internal.h
vendored
95
third_party/intel/f16cintrin.internal.h
vendored
|
@ -1,75 +1,58 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
|
||||
# error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _F16CINTRIN_H_INCLUDED
|
||||
#define _F16CINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __F16C__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("f16c")
|
||||
#define __DISABLE_F16C__
|
||||
#endif /* __F16C__ */
|
||||
|
||||
__funline float _cvtsh_ss(unsigned short __S) {
|
||||
__v8hi __H = __extension__(__v8hi){(short)__S, 0, 0, 0, 0, 0, 0, 0};
|
||||
__v4sf __A = __builtin_ia32_vcvtph2ps(__H);
|
||||
return __builtin_ia32_vec_ext_v4sf(__A, 0);
|
||||
#endif
|
||||
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cvtsh_ss (unsigned short __S)
|
||||
{
|
||||
__v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
|
||||
__v4sf __A = __builtin_ia32_vcvtph2ps (__H);
|
||||
return __builtin_ia32_vec_ext_v4sf (__A, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts four half-precision (16-bit) floating point values to
|
||||
* single-precision floating point values.
|
||||
*/
|
||||
__funline __m128 _mm_cvtph_ps(__m128i __A) {
|
||||
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__A);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtph_ps (__m128i __A)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts eight half-precision (16-bit) floating point values to
|
||||
* single-precision floating point values.
|
||||
*/
|
||||
__funline __m256 _mm256_cvtph_ps(__m128i __A) {
|
||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__A);
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtph_ps (__m128i __A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline unsigned short _cvtss_sh(float __F, const int __I) {
|
||||
__v4sf __A = __extension__(__v4sf){__F, 0, 0, 0};
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph(__A, __I);
|
||||
return (unsigned short)__builtin_ia32_vec_ext_v8hi(__H, 0);
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cvtss_sh (float __F, const int __I)
|
||||
{
|
||||
__v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 };
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
|
||||
return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_cvtps_ph(__m128 __A, const int __I) {
|
||||
return (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__A, __I);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtps_ph (__m128 __A, const int __I)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts eight single-precision floating point values to
|
||||
* half-precision (16-bit) floating point values.
|
||||
*/
|
||||
__funline __m128i _mm256_cvtps_ph(__m256 __A, const int __I) {
|
||||
return (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__A, __I);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtps_ph (__m256 __A, const int __I)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
|
||||
}
|
||||
#else
|
||||
#define _cvtss_sh(__F, __I) \
|
||||
(__extension__({ \
|
||||
__v4sf __A = __extension__(__v4sf){__F, 0, 0, 0}; \
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph(__A, __I); \
|
||||
(unsigned short)__builtin_ia32_vec_ext_v8hi(__H, 0); \
|
||||
}))
|
||||
|
||||
#define _mm_cvtps_ph(A, I) \
|
||||
((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)A, (int)(I)))
|
||||
|
||||
#define _mm256_cvtps_ph(A, I) \
|
||||
((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)A, (int)(I)))
|
||||
#endif /* __OPTIMIZE */
|
||||
|
||||
#define _cvtss_sh(__F, __I) (__extension__ ({ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); }))
|
||||
#define _mm_cvtps_ph(A, I) ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) (A), (int) (I)))
|
||||
#define _mm256_cvtps_ph(A, I) ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) (A), (int) (I)))
|
||||
#endif
|
||||
#ifdef __DISABLE_F16C__
|
||||
#undef __DISABLE_F16C__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_F16C__ */
|
||||
|
||||
#endif /* _F16CINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
311
third_party/intel/fma4intrin.internal.h
vendored
311
third_party/intel/fma4intrin.internal.h
vendored
|
@ -1,184 +1,179 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86INTRIN_H_INCLUDED
|
||||
#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FMA4INTRIN_H_INCLUDED
|
||||
#define _FMA4INTRIN_H_INCLUDED
|
||||
|
||||
#include "third_party/intel/ammintrin.internal.h"
|
||||
|
||||
#ifndef __FMA4__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fma4")
|
||||
#define __DISABLE_FMA4__
|
||||
#endif /* __FMA4__ */
|
||||
|
||||
__funline __m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
#endif
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
/* 256b Floating point multiply/add type instructions. */
|
||||
__funline __m256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_FMA4__
|
||||
#undef __DISABLE_FMA4__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FMA4__ */
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
365
third_party/intel/fmaintrin.internal.h
vendored
365
third_party/intel/fmaintrin.internal.h
vendored
|
@ -1,177 +1,246 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
|
||||
# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FMAINTRIN_H_INCLUDED
|
||||
#define _FMAINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __FMA__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fma")
|
||||
#define __DISABLE_FMA__
|
||||
#endif /* __FMA__ */
|
||||
|
||||
__funline __m128d _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B,
|
||||
#endif
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
|
||||
(__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
|
||||
(__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
|
||||
(__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
|
||||
(__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_FMA__
|
||||
#undef __DISABLE_FMA__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FMA__ */
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
49
third_party/intel/fxsrintrin.internal.h
vendored
49
third_party/intel/fxsrintrin.internal.h
vendored
|
@ -1,37 +1,44 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <fxsrintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <fxsrintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FXSRINTRIN_H_INCLUDED
|
||||
#define _FXSRINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __FXSR__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fxsr")
|
||||
#define __DISABLE_FXSR__
|
||||
#endif /* __FXSR__ */
|
||||
|
||||
__funline void _fxsave(void *__P) {
|
||||
__builtin_ia32_fxsave(__P);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxsave (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxsave (__P);
|
||||
}
|
||||
|
||||
__funline void _fxrstor(void *__P) {
|
||||
__builtin_ia32_fxrstor(__P);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxrstor (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxrstor (__P);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void _fxsave64(void *__P) {
|
||||
__builtin_ia32_fxsave64(__P);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxsave64 (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxsave64 (__P);
|
||||
}
|
||||
|
||||
__funline void _fxrstor64(void *__P) {
|
||||
__builtin_ia32_fxrstor64(__P);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxrstor64 (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxrstor64 (__P);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_FXSR__
|
||||
#undef __DISABLE_FXSR__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FXSR__ */
|
||||
|
||||
#endif /* _FXSRINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
457
third_party/intel/gfniintrin.internal.h
vendored
457
third_party/intel/gfniintrin.internal.h
vendored
|
@ -1,311 +1,310 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _GFNIINTRIN_H_INCLUDED
|
||||
#define _GFNIINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__GFNI__) || !defined(__SSE2__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,sse2")
|
||||
#define __DISABLE_GFNI__
|
||||
#endif /* __GFNI__ */
|
||||
|
||||
__funline __m128i _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi((__v16qi)__A, (__v16qi)__B);
|
||||
#endif
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
|
||||
(__v16qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m128i _mm_gf2p8affineinv_epi64_epi8(__m128i __A, __m128i __B,
|
||||
const int __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)__A,
|
||||
(__v16qi)__B, __C);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_gf2p8affine_epi64_epi8(__m128i __A, __m128i __B,
|
||||
const int __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)__A,
|
||||
(__v16qi)__B, __C);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
|
||||
(__v16qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi( \
|
||||
(__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||
#define _mm_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi( \
|
||||
(__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||
#define _mm_gf2p8affine_epi64_epi8(A, B, C) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNI__
|
||||
#undef __DISABLE_GFNI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_GFNI__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__GFNI__) || !defined(__AVX__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx")
|
||||
#define __DISABLE_GFNIAVX__
|
||||
#endif /* __GFNIAVX__ */
|
||||
|
||||
__funline __m256i _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi((__v32qi)__A, (__v32qi)__B);
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m256i _mm256_gf2p8affineinv_epi64_epi8(__m256i __A, __m256i __B,
|
||||
const int __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)__A,
|
||||
(__v32qi)__B, __C);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_gf2p8affine_epi64_epi8(__m256i __A, __m256i __B,
|
||||
const int __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)__A,
|
||||
(__v32qi)__B, __C);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi( \
|
||||
(__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi( \
|
||||
(__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, C) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A), ( __v32qi)(__m256i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX__
|
||||
#undef __DISABLE_GFNIAVX__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__GFNI__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512vl")
|
||||
#define __DISABLE_GFNIAVX512VL__
|
||||
#endif /* __GFNIAVX512VL__ */
|
||||
|
||||
__funline __m128i _mm_mask_gf2p8mul_epi8(__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, (__v16qi)__A, __B);
|
||||
#endif
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D,
|
||||
(__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_gf2p8mul_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, (__v16qi)_mm_setzero_si128(), __A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m128i _mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A, __mmask16 __B,
|
||||
__m128i __C, __m128i __D,
|
||||
const int __E) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D, const int __E)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D,
|
||||
__E,
|
||||
(__v16qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C,
|
||||
const int __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, __D,
|
||||
(__v16qi) _mm_setzero_si128 (),
|
||||
__A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_gf2p8affine_epi64_epi8(__m128i __A, __mmask16 __B,
|
||||
__m128i __C, __m128i __D,
|
||||
const int __E) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D, const int __E)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D, __E, (__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C, const int __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
|
||||
}
|
||||
#else
|
||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), \
|
||||
(__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), \
|
||||
(__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)(A)))
|
||||
#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), \
|
||||
(__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), \
|
||||
(__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)(A)))
|
||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( (__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( (__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
|
||||
#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512VL__
|
||||
#undef __DISABLE_GFNIAVX512VL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512VL__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512vl,avx512bw")
|
||||
#define __DISABLE_GFNIAVX512VLBW__
|
||||
#endif /* __GFNIAVX512VLBW__ */
|
||||
|
||||
__funline __m256i _mm256_mask_gf2p8mul_epi8(__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, (__v32qi)__A, __B);
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
(__v32qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_gf2p8mul_epi8(__mmask32 __A, __m256i __B,
|
||||
__m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m256i _mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A,
|
||||
__mmask32 __B,
|
||||
__m256i __C, __m256i __D,
|
||||
const int __E) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D, const int __E)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
__E,
|
||||
(__v32qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A,
|
||||
__m256i __B, __m256i __C,
|
||||
const int __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
|
||||
__m256i __C, const int __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, __D,
|
||||
(__v32qi) _mm256_setzero_si256 (), __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_gf2p8affine_epi64_epi8(__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D,
|
||||
const int __E) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
|
||||
__m256i __D, const int __E)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
__E,
|
||||
(__v32qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A, __m256i __B,
|
||||
__m256i __C,
|
||||
const int __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
|
||||
__m256i __C, const int __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
|
||||
}
|
||||
#else
|
||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
|
||||
(__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
|
||||
(__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)(A)))
|
||||
#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
|
||||
(__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
|
||||
(__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)(A)))
|
||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( (__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( (__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
|
||||
#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512VLBW__
|
||||
#undef __DISABLE_GFNIAVX512VLBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512VLBW__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512f,avx512bw")
|
||||
#define __DISABLE_GFNIAVX512FBW__
|
||||
#endif /* __GFNIAVX512FBW__ */
|
||||
|
||||
__funline __m512i _mm512_mask_gf2p8mul_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, (__v64qi)__A, __B);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_gf2p8mul_epi8(__mmask64 __A, __m512i __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
__funline __m512i _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi((__v64qi)__A, (__v64qi)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m512i _mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A,
|
||||
__mmask64 __B,
|
||||
__m512i __C, __m512i __D,
|
||||
const int __E) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D, const int __E)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D,
|
||||
__E,
|
||||
(__v64qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A,
|
||||
__m512i __B, __m512i __C,
|
||||
const int __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
|
||||
__m512i __C, const int __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, __D,
|
||||
(__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_gf2p8affineinv_epi64_epi8(__m512i __A, __m512i __B,
|
||||
const int __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)__A,
|
||||
(__v64qi)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B, __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_gf2p8affine_epi64_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C, __m512i __D,
|
||||
const int __E) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D, const int __E)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D, __E, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A, __m512i __B,
|
||||
__m512i __C,
|
||||
const int __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
__funline __m512i _mm512_gf2p8affine_epi64_epi8(__m512i __A, __m512i __B,
|
||||
const int __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)__A,
|
||||
(__v64qi)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
|
||||
(__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
|
||||
(__v64qi)(__m512i)_mm512_setzero_si512(), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi( \
|
||||
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
|
||||
(__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
|
||||
(__v64qi)(__m512i)_mm512_setzero_si512(), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi( \
|
||||
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( (__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( (__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, C) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512FBW__
|
||||
#undef __DISABLE_GFNIAVX512FBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512FBW__ */
|
||||
|
||||
#endif /* _GFNIINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
24
third_party/intel/hresetintrin.internal.h
vendored
Normal file
24
third_party/intel/hresetintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <hresetintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _HRESETINTRIN_H_INCLUDED
|
||||
#define _HRESETINTRIN_H_INCLUDED
|
||||
#ifndef __HRESET__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("hreset")
|
||||
#define __DISABLE_HRESET__
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_hreset (unsigned int __EAX)
|
||||
{
|
||||
__builtin_ia32_hreset (__EAX);
|
||||
}
|
||||
#ifdef __DISABLE_HRESET__
|
||||
#undef __DISABLE_HRESET__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
275
third_party/intel/ia32intrin.internal.h
vendored
275
third_party/intel/ia32intrin.internal.h
vendored
|
@ -1,184 +1,217 @@
|
|||
#ifndef _X86INTRIN_H_INCLUDED
|
||||
#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <ia32intrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
__funline int __bsfd(int __X) {
|
||||
return __builtin_ctz(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsfd (int __X)
|
||||
{
|
||||
return __builtin_ctz (__X);
|
||||
}
|
||||
|
||||
__funline int __bsrd(int __X) {
|
||||
return __builtin_ia32_bsrsi(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsrd (int __X)
|
||||
{
|
||||
return __builtin_ia32_bsrsi (__X);
|
||||
}
|
||||
|
||||
__funline int __bswapd(int __X) {
|
||||
return __builtin_bswap32(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bswapd (int __X)
|
||||
{
|
||||
return __builtin_bswap32 (__X);
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
#ifndef __SSE4_2__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4.2")
|
||||
#define __DISABLE_SSE4_2__
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
__funline unsigned int __crc32b(unsigned int __C, unsigned char __V) {
|
||||
return __builtin_ia32_crc32qi(__C, __V);
|
||||
#endif
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32b (unsigned int __C, unsigned char __V)
|
||||
{
|
||||
return __builtin_ia32_crc32qi (__C, __V);
|
||||
}
|
||||
|
||||
__funline unsigned int __crc32w(unsigned int __C, unsigned short __V) {
|
||||
return __builtin_ia32_crc32hi(__C, __V);
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32w (unsigned int __C, unsigned short __V)
|
||||
{
|
||||
return __builtin_ia32_crc32hi (__C, __V);
|
||||
}
|
||||
|
||||
__funline unsigned int __crc32d(unsigned int __C, unsigned int __V) {
|
||||
return __builtin_ia32_crc32si(__C, __V);
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32d (unsigned int __C, unsigned int __V)
|
||||
{
|
||||
return __builtin_ia32_crc32si (__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SSE4_2__
|
||||
#undef __DISABLE_SSE4_2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4_2__ */
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
__funline int __popcntd(unsigned int __X) {
|
||||
return __builtin_popcount(__X);
|
||||
#endif
|
||||
#endif
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__popcntd (unsigned int __X)
|
||||
{
|
||||
return __builtin_popcount (__X);
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
__funline unsigned long long __rdpmc(int __S) {
|
||||
return __builtin_ia32_rdpmc(__S);
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rdpmc (int __S)
|
||||
{
|
||||
return __builtin_ia32_rdpmc (__S);
|
||||
}
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
__funline unsigned long long __rdtsc(void) {
|
||||
return __builtin_ia32_rdtsc();
|
||||
}
|
||||
|
||||
#endif
|
||||
#define __rdtsc() __builtin_ia32_rdtsc ()
|
||||
#ifndef __iamcu__
|
||||
|
||||
__funline unsigned long long __rdtscp(unsigned int *__A) {
|
||||
return __builtin_ia32_rdtscp(__A);
|
||||
#define __rdtscp(a) __builtin_ia32_rdtscp (a)
|
||||
#endif
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolb (unsigned char __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rolqi (__X, __C);
|
||||
}
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
__funline unsigned char __rolb(unsigned char __X, int __C) {
|
||||
return __builtin_ia32_rolqi(__X, __C);
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolw (unsigned short __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rolhi (__X, __C);
|
||||
}
|
||||
|
||||
__funline unsigned short __rolw(unsigned short __X, int __C) {
|
||||
return __builtin_ia32_rolhi(__X, __C);
|
||||
}
|
||||
|
||||
__funline unsigned int __rold(unsigned int __X, int __C) {
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rold (unsigned int __X, int __C)
|
||||
{
|
||||
__C &= 31;
|
||||
return (__X << __C) | (__X >> (-__C & 31));
|
||||
}
|
||||
|
||||
__funline unsigned char __rorb(unsigned char __X, int __C) {
|
||||
return __builtin_ia32_rorqi(__X, __C);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorb (unsigned char __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rorqi (__X, __C);
|
||||
}
|
||||
|
||||
__funline unsigned short __rorw(unsigned short __X, int __C) {
|
||||
return __builtin_ia32_rorhi(__X, __C);
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorw (unsigned short __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rorhi (__X, __C);
|
||||
}
|
||||
|
||||
__funline unsigned int __rord(unsigned int __X, int __C) {
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rord (unsigned int __X, int __C)
|
||||
{
|
||||
__C &= 31;
|
||||
return (__X >> __C) | (__X << (-__C & 31));
|
||||
}
|
||||
|
||||
__funline void __pause(void) {
|
||||
__builtin_ia32_pause();
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__pause (void)
|
||||
{
|
||||
__builtin_ia32_pause ();
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
__funline int __bsfq(long long __X) {
|
||||
return __builtin_ctzll(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsfq (long long __X)
|
||||
{
|
||||
return __builtin_ctzll (__X);
|
||||
}
|
||||
|
||||
__funline int __bsrq(long long __X) {
|
||||
return __builtin_ia32_bsrdi(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsrq (long long __X)
|
||||
{
|
||||
return __builtin_ia32_bsrdi (__X);
|
||||
}
|
||||
|
||||
__funline long long __bswapq(long long __X) {
|
||||
return __builtin_bswap64(__X);
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bswapq (long long __X)
|
||||
{
|
||||
return __builtin_bswap64 (__X);
|
||||
}
|
||||
|
||||
#ifndef __SSE4_2__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4.2")
|
||||
#define __DISABLE_SSE4_2__
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
__funline unsigned long long __crc32q(unsigned long long __C,
|
||||
unsigned long long __V) {
|
||||
return __builtin_ia32_crc32di(__C, __V);
|
||||
#endif
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32q (unsigned long long __C, unsigned long long __V)
|
||||
{
|
||||
return __builtin_ia32_crc32di (__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SSE4_2__
|
||||
#undef __DISABLE_SSE4_2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4_2__ */
|
||||
|
||||
__funline long long __popcntq(unsigned long long __X) {
|
||||
return __builtin_popcountll(__X);
|
||||
#endif
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__popcntq (unsigned long long __X)
|
||||
{
|
||||
return __builtin_popcountll (__X);
|
||||
}
|
||||
|
||||
__funline unsigned long long __rolq(unsigned long long __X, int __C) {
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolq (unsigned long long __X, int __C)
|
||||
{
|
||||
__C &= 63;
|
||||
return (__X << __C) | (__X >> (-__C & 63));
|
||||
}
|
||||
|
||||
__funline unsigned long long __rorq(unsigned long long __X, int __C) {
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorq (unsigned long long __X, int __C)
|
||||
{
|
||||
__C &= 63;
|
||||
return (__X >> __C) | (__X << (-__C & 63));
|
||||
}
|
||||
|
||||
__funline unsigned long long __readeflags(void) {
|
||||
return __builtin_ia32_readeflags_u64();
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__readeflags (void)
|
||||
{
|
||||
return __builtin_ia32_readeflags_u64 ();
|
||||
}
|
||||
|
||||
__funline void __writeeflags(unsigned long long __X) {
|
||||
__builtin_ia32_writeeflags_u64(__X);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__writeeflags (unsigned long long __X)
|
||||
{
|
||||
__builtin_ia32_writeeflags_u64 (__X);
|
||||
}
|
||||
|
||||
#define _bswap64(a) __bswapq(a)
|
||||
#define _bswap64(a) __bswapq(a)
|
||||
#define _popcnt64(a) __popcntq(a)
|
||||
#else
|
||||
|
||||
__funline unsigned int __readeflags(void) {
|
||||
return __builtin_ia32_readeflags_u32();
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__readeflags (void)
|
||||
{
|
||||
return __builtin_ia32_readeflags_u32 ();
|
||||
}
|
||||
|
||||
__funline void __writeeflags(unsigned int __X) {
|
||||
__builtin_ia32_writeeflags_u32(__X);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__writeeflags (unsigned int __X)
|
||||
{
|
||||
__builtin_ia32_writeeflags_u32 (__X);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __LP64__
|
||||
#define _lrotl(a, b) __rolq((a), (b))
|
||||
#define _lrotr(a, b) __rorq((a), (b))
|
||||
#define _lrotl(a,b) __rolq((a), (b))
|
||||
#define _lrotr(a,b) __rorq((a), (b))
|
||||
#else
|
||||
#define _lrotl(a, b) __rold((a), (b))
|
||||
#define _lrotr(a, b) __rord((a), (b))
|
||||
#define _lrotl(a,b) __rold((a), (b))
|
||||
#define _lrotr(a,b) __rord((a), (b))
|
||||
#endif
|
||||
|
||||
#define _bit_scan_forward(a) __bsfd(a)
|
||||
#define _bit_scan_reverse(a) __bsrd(a)
|
||||
#define _bswap(a) __bswapd(a)
|
||||
#define _popcnt32(a) __popcntd(a)
|
||||
#define _bswap(a) __bswapd(a)
|
||||
#define _popcnt32(a) __popcntd(a)
|
||||
#ifndef __iamcu__
|
||||
#define _rdpmc(a) __rdpmc(a)
|
||||
#define _rdpmc(a) __rdpmc(a)
|
||||
#define _rdtscp(a) __rdtscp(a)
|
||||
#endif /* __iamcu__ */
|
||||
#define _rdtsc() __rdtsc()
|
||||
#define _rotwl(a, b) __rolw((a), (b))
|
||||
#define _rotwr(a, b) __rorw((a), (b))
|
||||
#define _rotl(a, b) __rold((a), (b))
|
||||
#define _rotr(a, b) __rord((a), (b))
|
||||
#endif
|
||||
#define _rdtsc() __rdtsc()
|
||||
#define _rotwl(a,b) __rolw((a), (b))
|
||||
#define _rotwr(a,b) __rorw((a), (b))
|
||||
#define _rotl(a,b) __rold((a), (b))
|
||||
#define _rotr(a,b) __rord((a), (b))
|
||||
#endif
|
||||
|
|
152
third_party/intel/immintrin.internal.h
vendored
152
third_party/intel/immintrin.internal.h
vendored
|
@ -1,8 +1,8 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#define _IMMINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
|
||||
/* clang-format off */
|
||||
#include "third_party/intel/x86gprintrin.internal.h"
|
||||
#include "third_party/intel/mmintrin.internal.h"
|
||||
#include "third_party/intel/xmmintrin.internal.h"
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
@ -10,12 +10,8 @@
|
|||
#include "third_party/intel/tmmintrin.internal.h"
|
||||
#include "third_party/intel/smmintrin.internal.h"
|
||||
#include "third_party/intel/wmmintrin.internal.h"
|
||||
#include "third_party/intel/fxsrintrin.internal.h"
|
||||
#include "third_party/intel/xsaveintrin.internal.h"
|
||||
#include "third_party/intel/xsaveoptintrin.internal.h"
|
||||
#include "third_party/intel/xsavesintrin.internal.h"
|
||||
#include "third_party/intel/xsavecintrin.internal.h"
|
||||
#include "third_party/intel/avxintrin.internal.h"
|
||||
#include "third_party/intel/avxvnniintrin.internal.h"
|
||||
#include "third_party/intel/avx2intrin.internal.h"
|
||||
#include "third_party/intel/avx512fintrin.internal.h"
|
||||
#include "third_party/intel/avx512erintrin.internal.h"
|
||||
|
@ -39,143 +35,21 @@
|
|||
#include "third_party/intel/avx512vnnivlintrin.internal.h"
|
||||
#include "third_party/intel/avx512vpopcntdqvlintrin.internal.h"
|
||||
#include "third_party/intel/avx512bitalgintrin.internal.h"
|
||||
#include "third_party/intel/avx512vp2intersectintrin.internal.h"
|
||||
#include "third_party/intel/avx512vp2intersectvlintrin.internal.h"
|
||||
#include "third_party/intel/shaintrin.internal.h"
|
||||
#include "third_party/intel/lzcntintrin.internal.h"
|
||||
#include "third_party/intel/bmiintrin.internal.h"
|
||||
#include "third_party/intel/bmi2intrin.internal.h"
|
||||
#include "third_party/intel/fmaintrin.internal.h"
|
||||
#include "third_party/intel/f16cintrin.internal.h"
|
||||
#include "third_party/intel/rtmintrin.internal.h"
|
||||
#include "third_party/intel/xtestintrin.internal.h"
|
||||
#include "third_party/intel/cetintrin.internal.h"
|
||||
#include "third_party/intel/gfniintrin.internal.h"
|
||||
#include "third_party/intel/vaesintrin.internal.h"
|
||||
#include "third_party/intel/vpclmulqdqintrin.internal.h"
|
||||
#include "third_party/intel/movdirintrin.internal.h"
|
||||
#include "third_party/intel/sgxintrin.internal.h"
|
||||
#include "third_party/intel/pconfigintrin.internal.h"
|
||||
#include "third_party/intel/waitpkgintrin.internal.h"
|
||||
#include "third_party/intel/cldemoteintrin.internal.h"
|
||||
#include "third_party/intel/rdseedintrin.internal.h"
|
||||
#include "third_party/intel/avx512bf16vlintrin.internal.h"
|
||||
#include "third_party/intel/avx512bf16intrin.internal.h"
|
||||
#include "third_party/intel/amxtileintrin.internal.h"
|
||||
#include "third_party/intel/amxint8intrin.internal.h"
|
||||
#include "third_party/intel/amxbf16intrin.internal.h"
|
||||
#include "third_party/intel/prfchwintrin.internal.h"
|
||||
#include "third_party/intel/adxintrin.internal.h"
|
||||
#include "third_party/intel/clwbintrin.internal.h"
|
||||
#include "third_party/intel/clflushoptintrin.internal.h"
|
||||
#include "third_party/intel/wbnoinvdintrin.internal.h"
|
||||
#include "third_party/intel/pkuintrin.internal.h"
|
||||
/* clang-format on */
|
||||
|
||||
__funline void _wbinvd(void) {
|
||||
__builtin_ia32_wbinvd();
|
||||
}
|
||||
|
||||
#ifndef __RDRND__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("rdrnd")
|
||||
#define __DISABLE_RDRND__
|
||||
#endif /* __RDRND__ */
|
||||
__funline int _rdrand16_step(unsigned short *__P) {
|
||||
return __builtin_ia32_rdrand16_step(__P);
|
||||
}
|
||||
|
||||
__funline int _rdrand32_step(unsigned int *__P) {
|
||||
return __builtin_ia32_rdrand32_step(__P);
|
||||
}
|
||||
#ifdef __DISABLE_RDRND__
|
||||
#undef __DISABLE_RDRND__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RDRND__ */
|
||||
|
||||
#ifndef __RDPID__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("rdpid")
|
||||
#define __DISABLE_RDPID__
|
||||
#endif /* __RDPID__ */
|
||||
__funline unsigned int _rdpid_u32(void) {
|
||||
return __builtin_ia32_rdpid();
|
||||
}
|
||||
#ifdef __DISABLE_RDPID__
|
||||
#undef __DISABLE_RDPID__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RDPID__ */
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#ifndef __FSGSBASE__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fsgsbase")
|
||||
#define __DISABLE_FSGSBASE__
|
||||
#endif /* __FSGSBASE__ */
|
||||
__funline unsigned int _readfsbase_u32(void) {
|
||||
return __builtin_ia32_rdfsbase32();
|
||||
}
|
||||
|
||||
__funline unsigned long long _readfsbase_u64(void) {
|
||||
return __builtin_ia32_rdfsbase64();
|
||||
}
|
||||
|
||||
__funline unsigned int _readgsbase_u32(void) {
|
||||
return __builtin_ia32_rdgsbase32();
|
||||
}
|
||||
|
||||
__funline unsigned long long _readgsbase_u64(void) {
|
||||
return __builtin_ia32_rdgsbase64();
|
||||
}
|
||||
|
||||
__funline void _writefsbase_u32(unsigned int __B) {
|
||||
__builtin_ia32_wrfsbase32(__B);
|
||||
}
|
||||
|
||||
__funline void _writefsbase_u64(unsigned long long __B) {
|
||||
__builtin_ia32_wrfsbase64(__B);
|
||||
}
|
||||
|
||||
__funline void _writegsbase_u32(unsigned int __B) {
|
||||
__builtin_ia32_wrgsbase32(__B);
|
||||
}
|
||||
|
||||
__funline void _writegsbase_u64(unsigned long long __B) {
|
||||
__builtin_ia32_wrgsbase64(__B);
|
||||
}
|
||||
#ifdef __DISABLE_FSGSBASE__
|
||||
#undef __DISABLE_FSGSBASE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FSGSBASE__ */
|
||||
|
||||
#ifndef __RDRND__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("rdrnd")
|
||||
#define __DISABLE_RDRND__
|
||||
#endif /* __RDRND__ */
|
||||
__funline int _rdrand64_step(unsigned long long *__P) {
|
||||
return __builtin_ia32_rdrand64_step(__P);
|
||||
}
|
||||
#ifdef __DISABLE_RDRND__
|
||||
#undef __DISABLE_RDRND__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RDRND__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#ifndef __PTWRITE__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("ptwrite")
|
||||
#define __DISABLE_PTWRITE__
|
||||
#include "third_party/intel/keylockerintrin.internal.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void _ptwrite64(unsigned long long __B) {
|
||||
__builtin_ia32_ptwrite64(__B);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
__funline void _ptwrite32(unsigned __B) {
|
||||
__builtin_ia32_ptwrite32(__B);
|
||||
}
|
||||
#ifdef __DISABLE_PTWRITE__
|
||||
#undef __DISABLE_PTWRITE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_PTWRITE__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _IMMINTRIN_H_INCLUDED */
|
||||
|
|
93
third_party/intel/keylockerintrin.internal.h
vendored
Normal file
93
third_party/intel/keylockerintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,93 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <keylockerintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
#ifndef _KEYLOCKERINTRIN_H_INCLUDED
|
||||
#define _KEYLOCKERINTRIN_H_INCLUDED
|
||||
#ifndef __KL__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("kl")
|
||||
#define __DISABLE_KL__
|
||||
#endif
|
||||
extern __inline
|
||||
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_loadiwkey (unsigned int __I, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
__builtin_ia32_loadiwkey ((__v2di) __B, (__v2di) __C, (__v2di) __A, __I);
|
||||
}
|
||||
extern __inline
|
||||
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_encodekey128_u32 (unsigned int __I, __m128i __A, void * __P)
|
||||
{
|
||||
return __builtin_ia32_encodekey128_u32 (__I, (__v2di)__A, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_encodekey256_u32 (unsigned int __I, __m128i __A, __m128i __B, void * __P)
|
||||
{
|
||||
return __builtin_ia32_encodekey256_u32 (__I, (__v2di)__A, (__v2di)__B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdec128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdec256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesenc128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesenc256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
#ifdef __DISABLE_KL__
|
||||
#undef __DISABLE_KL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#ifndef __WIDEKL__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("widekl")
|
||||
#define __DISABLE_WIDEKL__
|
||||
#endif
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdecwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdecwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdecwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdecwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesencwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesencwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesencwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesencwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
#ifdef __DISABLE_WIDEKL__
|
||||
#undef __DISABLE_WIDEKL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
81
third_party/intel/lwpintrin.internal.h
vendored
81
third_party/intel/lwpintrin.internal.h
vendored
|
@ -1,73 +1,68 @@
|
|||
#ifndef _X86INTRIN_H_INCLUDED
|
||||
#error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <lwpintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _LWPINTRIN_H_INCLUDED
|
||||
#define _LWPINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __LWP__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("lwp")
|
||||
#define __DISABLE_LWP__
|
||||
#endif /* __LWP__ */
|
||||
|
||||
__funline void __llwpcb(void *__pcbAddress) {
|
||||
__builtin_ia32_llwpcb(__pcbAddress);
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__llwpcb (void *__pcbAddress)
|
||||
{
|
||||
__builtin_ia32_llwpcb (__pcbAddress);
|
||||
}
|
||||
|
||||
__funline void *__slwpcb(void) {
|
||||
return __builtin_ia32_slwpcb();
|
||||
extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__slwpcb (void)
|
||||
{
|
||||
return __builtin_ia32_slwpcb ();
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline void __lwpval32(unsigned int __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
__builtin_ia32_lwpval32(__data2, __data1, __flags);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
|
||||
{
|
||||
__builtin_ia32_lwpval32 (__data2, __data1, __flags);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void __lwpval64(unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
__builtin_ia32_lwpval64(__data2, __data1, __flags);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpval64 (unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags)
|
||||
{
|
||||
__builtin_ia32_lwpval64 (__data2, __data1, __flags);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define __lwpval32(D2, D1, F) \
|
||||
(__builtin_ia32_lwpval32((unsigned int)(D2), (unsigned int)(D1), \
|
||||
(unsigned int)(F)))
|
||||
#define __lwpval32(D2, D1, F) (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||
#ifdef __x86_64__
|
||||
#define __lwpval64(D2, D1, F) \
|
||||
(__builtin_ia32_lwpval64((unsigned long long)(D2), (unsigned int)(D1), \
|
||||
(unsigned int)(F)))
|
||||
#define __lwpval64(D2, D1, F) (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline unsigned char __lwpins32(unsigned int __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
return __builtin_ia32_lwpins32(__data2, __data1, __flags);
|
||||
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
|
||||
{
|
||||
return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline unsigned char __lwpins64(unsigned long long __data2,
|
||||
unsigned int __data1, unsigned int __flags) {
|
||||
return __builtin_ia32_lwpins64(__data2, __data1, __flags);
|
||||
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpins64 (unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags)
|
||||
{
|
||||
return __builtin_ia32_lwpins64 (__data2, __data1, __flags);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define __lwpins32(D2, D1, F) \
|
||||
(__builtin_ia32_lwpins32((unsigned int)(D2), (unsigned int)(D1), \
|
||||
(unsigned int)(F)))
|
||||
#define __lwpins32(D2, D1, F) (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||
#ifdef __x86_64__
|
||||
#define __lwpins64(D2, D1, F) \
|
||||
(__builtin_ia32_lwpins64((unsigned long long)(D2), (unsigned int)(D1), \
|
||||
(unsigned int)(F)))
|
||||
#define __lwpins64(D2, D1, F) (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_LWP__
|
||||
#undef __DISABLE_LWP__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_LWP__ */
|
||||
|
||||
#endif /* _LWPINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
52
third_party/intel/lzcntintrin.internal.h
vendored
52
third_party/intel/lzcntintrin.internal.h
vendored
|
@ -1,41 +1,45 @@
|
|||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <lzcntintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _LZCNTINTRIN_H_INCLUDED
|
||||
#define _LZCNTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __LZCNT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("lzcnt")
|
||||
#define __DISABLE_LZCNT__
|
||||
#endif /* __LZCNT__ */
|
||||
|
||||
__funline unsigned short __lzcnt16(unsigned short __X) {
|
||||
return __builtin_ia32_lzcnt_u16(__X);
|
||||
#endif
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt16 (unsigned short __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u16 (__X);
|
||||
}
|
||||
|
||||
__funline unsigned int __lzcnt32(unsigned int __X) {
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
__funline unsigned int _lzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_lzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline unsigned long long __lzcnt64(unsigned long long __X) {
|
||||
return __builtin_ia32_lzcnt_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
__funline unsigned long long _lzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ia32_lzcnt_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_lzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u64 (__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_LZCNT__
|
||||
#undef __DISABLE_LZCNT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_LZCNT__ */
|
||||
|
||||
#endif /* _LZCNTINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
217
third_party/intel/mm3dnow.internal.h
vendored
217
third_party/intel/mm3dnow.internal.h
vendored
|
@ -1,9 +1,9 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _MM3DNOW_H_INCLUDED
|
||||
#define _MM3DNOW_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
#include "third_party/intel/mmintrin.internal.h"
|
||||
#include "third_party/intel/prfchwintrin.internal.h"
|
||||
|
||||
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW__
|
||||
#pragma GCC push_options
|
||||
#ifdef __x86_64__
|
||||
|
@ -12,110 +12,128 @@
|
|||
#pragma GCC target("3dnow")
|
||||
#endif
|
||||
#define __DISABLE_3dNOW__
|
||||
#endif /* __3dNOW__ */
|
||||
|
||||
__funline void _m_femms(void) {
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_femms (void)
|
||||
{
|
||||
__builtin_ia32_femms();
|
||||
}
|
||||
|
||||
__funline __m64 _m_pavgusb(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pavgusb((__v8qi)__A, (__v8qi)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pavgusb (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pf2id(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pf2id((__v2sf)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pf2id (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfacc((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfadd(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfadd((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfadd (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfcmpeq(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpeq (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfcmpge(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpge((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpge (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfcmpgt(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpgt (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfmax(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmax((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmax (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfmin(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmin((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmin (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfmul(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmul((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmul (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrcp(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pfrcp((__v2sf)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcp (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrcpit1(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcpit1 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrcpit2(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcpit2 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrsqrt(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrsqrt (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrsqit1(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrsqit1 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfsub(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfsub((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfsub (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfsubr(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfsubr((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfsubr (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pi2fd(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pi2fd((__v2si)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pi2fd (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pmulhrw(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pmulhrw((__v4hi)__A, (__v4hi)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmulhrw (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
|
||||
}
|
||||
|
||||
__funline void _m_prefetch(void *__P) {
|
||||
__builtin_prefetch(__P, 0, 3 /* _MM_HINT_T0 */);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_prefetch (void *__P)
|
||||
{
|
||||
__builtin_prefetch (__P, 0, 3 );
|
||||
}
|
||||
|
||||
__funline __m64 _m_from_float(float __A) {
|
||||
return __extension__(__m64)(__v2sf){__A, 0.0f};
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_from_float (float __A)
|
||||
{
|
||||
return __extension__ (__m64)(__v2sf){ __A, 0.0f };
|
||||
}
|
||||
|
||||
__funline float _m_to_float(__m64 __A) {
|
||||
union {
|
||||
__v2sf v;
|
||||
float a[2];
|
||||
} __tmp;
|
||||
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_to_float (__m64 __A)
|
||||
{
|
||||
union { __v2sf v; float a[2]; } __tmp;
|
||||
__tmp.v = (__v2sf)__A;
|
||||
return __tmp.a[0];
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_3dNOW__
|
||||
#undef __DISABLE_3dNOW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_3dNOW__ */
|
||||
|
||||
#endif
|
||||
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW_A__
|
||||
#pragma GCC push_options
|
||||
#ifdef __x86_64__
|
||||
|
@ -124,32 +142,35 @@ __funline float _m_to_float(__m64 __A) {
|
|||
#pragma GCC target("3dnowa")
|
||||
#endif
|
||||
#define __DISABLE_3dNOW_A__
|
||||
#endif /* __3dNOW_A__ */
|
||||
|
||||
__funline __m64 _m_pf2iw(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pf2iw((__v2sf)__A);
|
||||
#endif
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pf2iw (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfnacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfnacc((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfnacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfpnacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfpnacc((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfpnacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pi2fw(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pi2fw((__v2si)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pi2fw (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pswapd(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pswapdsf((__v2sf)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pswapd (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_3dNOW_A__
|
||||
#undef __DISABLE_3dNOW_A__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_3dNOW_A__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _MM3DNOW_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
11
third_party/intel/mm_malloc.internal.h
vendored
11
third_party/intel/mm_malloc.internal.h
vendored
|
@ -1,15 +1,14 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _MM_MALLOC_H_INCLUDED
|
||||
#define _MM_MALLOC_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
#include "libc/mem/mem.h"
|
||||
|
||||
#ifndef __cplusplus
|
||||
extern int _mm_posix_memalign(void **, size_t, size_t)
|
||||
#else
|
||||
extern "C" int _mm_posix_memalign(void **, size_t, size_t) throw()
|
||||
#endif
|
||||
__asm__("posix_memalign");
|
||||
|
||||
static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
|
||||
void *__ptr;
|
||||
if (__alignment == 1) return malloc(__size);
|
||||
|
@ -20,10 +19,8 @@ static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
|
|||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static __inline void _mm_free(void *__ptr) {
|
||||
free(__ptr);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _MM_MALLOC_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
|
|
1092
third_party/intel/mmintrin.internal.h
vendored
1092
third_party/intel/mmintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
49
third_party/intel/movdirintrin.internal.h
vendored
49
third_party/intel/movdirintrin.internal.h
vendored
|
@ -1,42 +1,47 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <movdirintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <movdirintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _MOVDIRINTRIN_H_INCLUDED
|
||||
#define _MOVDIRINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __MOVDIRI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("movdiri")
|
||||
#pragma GCC target ("movdiri")
|
||||
#define __DISABLE_MOVDIRI__
|
||||
#endif /* __MOVDIRI__ */
|
||||
|
||||
__funline void _directstoreu_u32(void *__P, unsigned int __A) {
|
||||
__builtin_ia32_directstoreu_u32((unsigned int *)__P, __A);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_directstoreu_u32 (void * __P, unsigned int __A)
|
||||
{
|
||||
__builtin_ia32_directstoreu_u32 ((unsigned int *)__P, __A);
|
||||
}
|
||||
#ifdef __x86_64__
|
||||
__funline void _directstoreu_u64(void *__P, unsigned long long __A) {
|
||||
__builtin_ia32_directstoreu_u64((unsigned long long *)__P, __A);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_directstoreu_u64 (void * __P, unsigned long long __A)
|
||||
{
|
||||
__builtin_ia32_directstoreu_u64 ((unsigned long long *)__P, __A);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_MOVDIRI__
|
||||
#undef __DISABLE_MOVDIRI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_MOVDIRI__ */
|
||||
|
||||
#endif
|
||||
#ifndef __MOVDIR64B__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("movdir64b")
|
||||
#pragma GCC target ("movdir64b")
|
||||
#define __DISABLE_MOVDIR64B__
|
||||
#endif /* __MOVDIR64B__ */
|
||||
|
||||
__funline void _movdir64b(void *__P, const void *__Q) {
|
||||
__builtin_ia32_movdir64b(__P, __Q);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_movdir64b (void * __P, const void * __Q)
|
||||
{
|
||||
__builtin_ia32_movdir64b (__P, __Q);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_MOVDIR64B__
|
||||
#undef __DISABLE_MOVDIR64B__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_MOVDIR64B__ */
|
||||
#endif /* _MOVDIRINTRIN_H_INCLUDED. */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
28
third_party/intel/mwaitxintrin.internal.h
vendored
28
third_party/intel/mwaitxintrin.internal.h
vendored
|
@ -1,25 +1,25 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _MWAITXINTRIN_H_INCLUDED
|
||||
#define _MWAITXINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
|
||||
#ifndef __MWAITX__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("mwaitx")
|
||||
#define __DISABLE_MWAITX__
|
||||
#endif /* __MWAITX__ */
|
||||
|
||||
__funline void _mm_monitorx(void const* __P, unsigned int __E, unsigned int __H) {
|
||||
__builtin_ia32_monitorx(__P, __E, __H);
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_monitorx (void const * __P, unsigned int __E, unsigned int __H)
|
||||
{
|
||||
__builtin_ia32_monitorx (__P, __E, __H);
|
||||
}
|
||||
|
||||
__funline void _mm_mwaitx(unsigned int __E, unsigned int __H, unsigned int __C) {
|
||||
__builtin_ia32_mwaitx(__E, __H, __C);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mwaitx (unsigned int __E, unsigned int __H, unsigned int __C)
|
||||
{
|
||||
__builtin_ia32_mwaitx (__E, __H, __C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_MWAITX__
|
||||
#undef __DISABLE_MWAITX__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_MWAITX__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _MWAITXINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
7
third_party/intel/nmmintrin.internal.h
vendored
7
third_party/intel/nmmintrin.internal.h
vendored
|
@ -1,6 +1,7 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _NMMINTRIN_H_INCLUDED
|
||||
#define _NMMINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
#include "third_party/intel/smmintrin.internal.h"
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _NMMINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
|
|
61
third_party/intel/pconfigintrin.internal.h
vendored
61
third_party/intel/pconfigintrin.internal.h
vendored
|
@ -1,52 +1,41 @@
|
|||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <pconfigintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <pconfigintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _PCONFIGINTRIN_H_INCLUDED
|
||||
#define _PCONFIGINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __PCONFIG__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("pconfig")
|
||||
#define __DISABLE_PCONFIG__
|
||||
#endif /* __PCONFIG__ */
|
||||
|
||||
#define __pconfig_b(leaf, b, retval) \
|
||||
__asm__ __volatile__("pconfig\n\t" \
|
||||
: "=a"(retval) \
|
||||
: "a"(leaf), "b"(b) \
|
||||
: "c" \
|
||||
"c")
|
||||
|
||||
#define __pconfig_generic(leaf, b, c, d, retval) \
|
||||
__asm__ __volatile__("pconfig\n\t" \
|
||||
: "=a"(retval), "=b"(b), "=c"(c), "=d"(d) \
|
||||
: "a"(leaf), "b"(b), "c"(c), "d"(d) \
|
||||
: "cc")
|
||||
|
||||
__funline unsigned int _pconfig_u32(const unsigned int __L, size_t __D[]) {
|
||||
enum __pconfig_type {
|
||||
#endif
|
||||
#define __pconfig_b(leaf, b, retval) __asm__ __volatile__ ("pconfig\n\t" : "=a" (retval) : "a" (leaf), "b" (b) : "cc")
|
||||
#define __pconfig_generic(leaf, b, c, d, retval) __asm__ __volatile__ ("pconfig\n\t" : "=a" (retval), "=b" (b), "=c" (c), "=d" (d) : "a" (leaf), "b" (b), "c" (c), "d" (d) : "cc")
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pconfig_u32 (const unsigned int __L, size_t __D[])
|
||||
{
|
||||
enum __pconfig_type
|
||||
{
|
||||
__PCONFIG_KEY_PROGRAM = 0x01,
|
||||
};
|
||||
|
||||
unsigned int __R = 0;
|
||||
|
||||
if (!__builtin_constant_p(__L))
|
||||
__pconfig_generic(__L, __D[0], __D[1], __D[2], __R);
|
||||
else
|
||||
switch (__L) {
|
||||
case __PCONFIG_KEY_PROGRAM:
|
||||
__pconfig_b(__L, __D[0], __R);
|
||||
break;
|
||||
default:
|
||||
__pconfig_generic(__L, __D[0], __D[1], __D[2], __R);
|
||||
if (!__builtin_constant_p (__L))
|
||||
__pconfig_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
else switch (__L)
|
||||
{
|
||||
case __PCONFIG_KEY_PROGRAM:
|
||||
__pconfig_b (__L, __D[0], __R);
|
||||
break;
|
||||
default:
|
||||
__pconfig_generic (__L, __D[0], __D[1], __D[2], __R);
|
||||
}
|
||||
return __R;
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_PCONFIG__
|
||||
#undef __DISABLE_PCONFIG__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_PCONFIG__ */
|
||||
|
||||
#endif /* _PCONFIGINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
33
third_party/intel/pkuintrin.internal.h
vendored
33
third_party/intel/pkuintrin.internal.h
vendored
|
@ -1,27 +1,30 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <pkuintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <pkuintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _PKUINTRIN_H_INCLUDED
|
||||
#define _PKUINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __PKU__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("pku")
|
||||
#define __DISABLE_PKU__
|
||||
#endif /* __PKU__ */
|
||||
|
||||
__funline unsigned int _rdpkru_u32(void) {
|
||||
return __builtin_ia32_rdpkru();
|
||||
#endif
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rdpkru_u32 (void)
|
||||
{
|
||||
return __builtin_ia32_rdpkru ();
|
||||
}
|
||||
|
||||
__funline void _wrpkru(unsigned int __key) {
|
||||
__builtin_ia32_wrpkru(__key);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrpkru (unsigned int __key)
|
||||
{
|
||||
__builtin_ia32_wrpkru (__key);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_PKU__
|
||||
#undef __DISABLE_PKU__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_PKU__ */
|
||||
|
||||
#endif /* _PKUINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
114
third_party/intel/pmmintrin.internal.h
vendored
114
third_party/intel/pmmintrin.internal.h
vendored
|
@ -1,78 +1,86 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _PMMINTRIN_H_INCLUDED
|
||||
#define _PMMINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
||||
#ifndef __SSE3__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse3")
|
||||
#define __DISABLE_SSE3__
|
||||
#endif /* __SSE3__ */
|
||||
|
||||
#endif
|
||||
#define _MM_DENORMALS_ZERO_MASK 0x0040
|
||||
#define _MM_DENORMALS_ZERO_ON 0x0040
|
||||
#define _MM_DENORMALS_ZERO_OFF 0x0000
|
||||
|
||||
#define _MM_SET_DENORMALS_ZERO_MODE(mode) \
|
||||
_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (mode))
|
||||
#define _MM_DENORMALS_ZERO_ON 0x0040
|
||||
#define _MM_DENORMALS_ZERO_OFF 0x0000
|
||||
#define _MM_SET_DENORMALS_ZERO_MODE(mode) _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode))
|
||||
#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
|
||||
|
||||
__funline __m128 _mm_addsub_ps(__m128 __X, __m128 __Y) {
|
||||
return (__m128)__builtin_ia32_addsubps((__v4sf)__X, (__v4sf)__Y);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_addsub_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_hadd_ps(__m128 __X, __m128 __Y) {
|
||||
return (__m128)__builtin_ia32_haddps((__v4sf)__X, (__v4sf)__Y);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_hsub_ps(__m128 __X, __m128 __Y) {
|
||||
return (__m128)__builtin_ia32_hsubps((__v4sf)__X, (__v4sf)__Y);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_ps (__m128 __X, __m128 __Y)
|
||||
{
|
||||
return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_movehdup_ps(__m128 __X) {
|
||||
return (__m128)__builtin_ia32_movshdup((__v4sf)__X);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_movehdup_ps (__m128 __X)
|
||||
{
|
||||
return (__m128) __builtin_ia32_movshdup ((__v4sf)__X);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_moveldup_ps(__m128 __X) {
|
||||
return (__m128)__builtin_ia32_movsldup((__v4sf)__X);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_moveldup_ps (__m128 __X)
|
||||
{
|
||||
return (__m128) __builtin_ia32_movsldup ((__v4sf)__X);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_addsub_pd(__m128d __X, __m128d __Y) {
|
||||
return (__m128d)__builtin_ia32_addsubpd((__v2df)__X, (__v2df)__Y);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_addsub_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_hadd_pd(__m128d __X, __m128d __Y) {
|
||||
return (__m128d)__builtin_ia32_haddpd((__v2df)__X, (__v2df)__Y);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hadd_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_hsub_pd(__m128d __X, __m128d __Y) {
|
||||
return (__m128d)__builtin_ia32_hsubpd((__v2df)__X, (__v2df)__Y);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_hsub_pd (__m128d __X, __m128d __Y)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_loaddup_pd(double const *__P) {
|
||||
return _mm_load1_pd(__P);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_loaddup_pd (double const *__P)
|
||||
{
|
||||
return _mm_load1_pd (__P);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_movedup_pd(__m128d __X) {
|
||||
return _mm_shuffle_pd(__X, __X, _MM_SHUFFLE2(0, 0));
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_movedup_pd (__m128d __X)
|
||||
{
|
||||
return _mm_shuffle_pd (__X, __X, _MM_SHUFFLE2 (0,0));
|
||||
}
|
||||
|
||||
__funline __m128i _mm_lddqu_si128(__m128i const *__P) {
|
||||
return (__m128i)__builtin_ia32_lddqu((char const *)__P);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_lddqu_si128 (__m128i const *__P)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_lddqu ((char const *)__P);
|
||||
}
|
||||
|
||||
__funline void _mm_monitor(void const *__P, unsigned int __E, unsigned int __H) {
|
||||
__builtin_ia32_monitor(__P, __E, __H);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_monitor (void const * __P, unsigned int __E, unsigned int __H)
|
||||
{
|
||||
__builtin_ia32_monitor (__P, __E, __H);
|
||||
}
|
||||
|
||||
__funline void _mm_mwait(unsigned int __E, unsigned int __H) {
|
||||
__builtin_ia32_mwait(__E, __H);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mwait (unsigned int __E, unsigned int __H)
|
||||
{
|
||||
__builtin_ia32_mwait (__E, __H);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SSE3__
|
||||
#undef __DISABLE_SSE3__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE3__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _PMMINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue