mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-23 13:52:28 +00:00
Upgrade to Cosmopolitan GCC 11.2.0 for x86_64
This commit is contained in:
parent
682b74ed88
commit
39f20dbb13
137 changed files with 48523 additions and 34001 deletions
1
Makefile
1
Makefile
|
@ -94,6 +94,7 @@ o/$(MODE): \
|
||||||
rwc:/dev/shm \
|
rwc:/dev/shm \
|
||||||
rx:build/bootstrap \
|
rx:build/bootstrap \
|
||||||
rx:o/third_party/gcc \
|
rx:o/third_party/gcc \
|
||||||
|
r:build/portcosmo.h \
|
||||||
/proc/stat \
|
/proc/stat \
|
||||||
rw:/dev/null \
|
rw:/dev/null \
|
||||||
w:o/stack.log \
|
w:o/stack.log \
|
||||||
|
|
|
@ -88,11 +88,15 @@ ARCH = x86_64
|
||||||
HOSTS ?= freebsd openbsd netbsd rhel7 rhel5 xnu win10
|
HOSTS ?= freebsd openbsd netbsd rhel7 rhel5 xnu win10
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
PORTCOSMO_CCFLAGS = -fportcosmo -include build/portcosmo.h
|
||||||
|
|
||||||
ifneq ("$(wildcard o/third_party/gcc/bin/x86_64-pc-linux-gnu-*)","")
|
ifneq ("$(wildcard o/third_party/gcc/bin/x86_64-pc-linux-gnu-*)","")
|
||||||
PREFIX = o/third_party/gcc/bin/x86_64-pc-linux-gnu-
|
PREFIX = o/third_party/gcc/bin/x86_64-pc-linux-gnu-
|
||||||
|
DEFAULT_CPPFLAGS += $(PORTCOSMO_CCFLAGS)
|
||||||
else
|
else
|
||||||
IGNORE := $(shell build/bootstrap/unbundle.com)
|
IGNORE := $(shell build/bootstrap/unbundle.com)
|
||||||
PREFIX = o/third_party/gcc/bin/x86_64-linux-musl-
|
PREFIX = o/third_party/gcc/bin/x86_64-linux-musl-
|
||||||
|
DEFAULT_CPPFLAGS += $(PORTCOSMO_CCFLAGS)
|
||||||
endif
|
endif
|
||||||
ifeq ($(ARCH), aarch64)
|
ifeq ($(ARCH), aarch64)
|
||||||
PREFIX = o/third_party/gcc/bin/aarch64-linux-musl-
|
PREFIX = o/third_party/gcc/bin/aarch64-linux-musl-
|
||||||
|
@ -163,7 +167,7 @@ TRADITIONAL = \
|
||||||
-Wno-return-type \
|
-Wno-return-type \
|
||||||
-Wno-pointer-sign
|
-Wno-pointer-sign
|
||||||
|
|
||||||
DEFAULT_CCFLAGS = \
|
DEFAULT_CCFLAGS += \
|
||||||
-Wall \
|
-Wall \
|
||||||
-Werror \
|
-Werror \
|
||||||
-fdebug-prefix-map='$(PWD)'= \
|
-fdebug-prefix-map='$(PWD)'= \
|
||||||
|
@ -206,7 +210,7 @@ MATHEMATICAL = \
|
||||||
-O3 \
|
-O3 \
|
||||||
-fwrapv
|
-fwrapv
|
||||||
|
|
||||||
DEFAULT_CPPFLAGS = \
|
DEFAULT_CPPFLAGS += \
|
||||||
-DCOSMO \
|
-DCOSMO \
|
||||||
-DMODE='"$(MODE)"' \
|
-DMODE='"$(MODE)"' \
|
||||||
-DIMAGE_BASE_VIRTUAL=$(IMAGE_BASE_VIRTUAL) \
|
-DIMAGE_BASE_VIRTUAL=$(IMAGE_BASE_VIRTUAL) \
|
||||||
|
|
361
build/portcosmo.h
Normal file
361
build/portcosmo.h
Normal file
|
@ -0,0 +1,361 @@
|
||||||
|
#ifndef ACTUALLY_MODS
|
||||||
|
#define ACTUALLY_MODS
|
||||||
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
static const int __tmpcosmo_AF_ALG = -15823936;
|
||||||
|
static const int __tmpcosmo_AF_APPLETALK = -15823820;
|
||||||
|
static const int __tmpcosmo_AF_ASH = -15823924;
|
||||||
|
static const int __tmpcosmo_AF_ATMPVC = -15824070;
|
||||||
|
static const int __tmpcosmo_AF_ATMSVC = -15824056;
|
||||||
|
static const int __tmpcosmo_AF_AX25 = -15824014;
|
||||||
|
static const int __tmpcosmo_AF_BLUETOOTH = -15823992;
|
||||||
|
static const int __tmpcosmo_AF_BRIDGE = -15823812;
|
||||||
|
static const int __tmpcosmo_AF_CAIF = -15823850;
|
||||||
|
static const int __tmpcosmo_AF_CAN = -15823868;
|
||||||
|
static const int __tmpcosmo_AF_ECONET = -15823852;
|
||||||
|
static const int __tmpcosmo_AF_FILE = -15824118;
|
||||||
|
static const int __tmpcosmo_AF_IB = -15823966;
|
||||||
|
static const int __tmpcosmo_AF_IEEE802154 = -15823906;
|
||||||
|
static const int __tmpcosmo_AF_IPX = -15824002;
|
||||||
|
static const int __tmpcosmo_AF_IRDA = -15823860;
|
||||||
|
static const int __tmpcosmo_AF_ISDN = -15823978;
|
||||||
|
static const int __tmpcosmo_AF_IUCV = -15824106;
|
||||||
|
static const int __tmpcosmo_AF_KCM = -15824024;
|
||||||
|
static const int __tmpcosmo_AF_KEY = -15823948;
|
||||||
|
static const int __tmpcosmo_AF_LINK = -15823878;
|
||||||
|
static const int __tmpcosmo_AF_LLC = -15823824;
|
||||||
|
static const int __tmpcosmo_AF_LOCAL = -15823928;
|
||||||
|
static const int __tmpcosmo_AF_MAX = -15824082;
|
||||||
|
static const int __tmpcosmo_AF_MPLS = -15824026;
|
||||||
|
static const int __tmpcosmo_AF_NETBEUI = -15824124;
|
||||||
|
static const int __tmpcosmo_AF_NETLINK = -15824004;
|
||||||
|
static const int __tmpcosmo_AF_NETROM = -15823886;
|
||||||
|
static const int __tmpcosmo_AF_NFC = -15824142;
|
||||||
|
static const int __tmpcosmo_AF_PACKET = -15824028;
|
||||||
|
static const int __tmpcosmo_AF_PHONET = -15823830;
|
||||||
|
static const int __tmpcosmo_AF_PPPOX = -15823876;
|
||||||
|
static const int __tmpcosmo_AF_ROSE = -15824016;
|
||||||
|
static const int __tmpcosmo_AF_ROUTE = -15824100;
|
||||||
|
static const int __tmpcosmo_AF_RXRPC = -15823926;
|
||||||
|
static const int __tmpcosmo_AF_SECURITY = -15824136;
|
||||||
|
static const int __tmpcosmo_AF_SNA = -15823950;
|
||||||
|
static const int __tmpcosmo_AF_TIPC = -15824034;
|
||||||
|
static const int __tmpcosmo_AF_VSOCK = -15824146;
|
||||||
|
static const int __tmpcosmo_AF_WANPIPE = -15823960;
|
||||||
|
static const int __tmpcosmo_AF_X25 = -15823864;
|
||||||
|
static const int __tmpcosmo_E2BIG = -15823698;
|
||||||
|
static const int __tmpcosmo_EACCES = -15823580;
|
||||||
|
static const int __tmpcosmo_EADDRINUSE = -15823756;
|
||||||
|
static const int __tmpcosmo_EADDRNOTAVAIL = -15823592;
|
||||||
|
static const int __tmpcosmo_EADV = -15823574;
|
||||||
|
static const int __tmpcosmo_EAFNOSUPPORT = -15823748;
|
||||||
|
static const int __tmpcosmo_EAGAIN = -15823506;
|
||||||
|
static const int __tmpcosmo_EALREADY = -15823530;
|
||||||
|
static const int __tmpcosmo_EAUTH = -15823702;
|
||||||
|
static const int __tmpcosmo_EBADARCH = -15823738;
|
||||||
|
static const int __tmpcosmo_EBADE = -15823740;
|
||||||
|
static const int __tmpcosmo_EBADEXEC = -15823684;
|
||||||
|
static const int __tmpcosmo_EBADF = -15823744;
|
||||||
|
static const int __tmpcosmo_EBADFD = -15823554;
|
||||||
|
static const int __tmpcosmo_EBADMACHO = -15823618;
|
||||||
|
static const int __tmpcosmo_EBADMSG = -15823650;
|
||||||
|
static const int __tmpcosmo_EBADR = -15823570;
|
||||||
|
static const int __tmpcosmo_EBADRPC = -15823626;
|
||||||
|
static const int __tmpcosmo_EBADRQC = -15823688;
|
||||||
|
static const int __tmpcosmo_EBADSLT = -15823788;
|
||||||
|
static const int __tmpcosmo_EBUSY = -15823550;
|
||||||
|
static const int __tmpcosmo_ECANCELED = -15823676;
|
||||||
|
static const int __tmpcosmo_ECHILD = -15823662;
|
||||||
|
static const int __tmpcosmo_ECHRNG = -15823722;
|
||||||
|
static const int __tmpcosmo_ECOMM = -15823634;
|
||||||
|
static const int __tmpcosmo_ECONNABORTED = -15823616;
|
||||||
|
static const int __tmpcosmo_ECONNREFUSED = -15823556;
|
||||||
|
static const int __tmpcosmo_ECONNRESET = -15823548;
|
||||||
|
static const int __tmpcosmo_EDEADLK = -15823718;
|
||||||
|
static const int __tmpcosmo_EDESTADDRREQ = -15823658;
|
||||||
|
static const int __tmpcosmo_EDEVERR = -15823518;
|
||||||
|
static const int __tmpcosmo_EDOM = -15823798;
|
||||||
|
static const int __tmpcosmo_EDOTDOT = -15823726;
|
||||||
|
static const int __tmpcosmo_EDQUOT = -15823620;
|
||||||
|
static const int __tmpcosmo_EEXIST = -15823594;
|
||||||
|
static const int __tmpcosmo_EFAULT = -15823686;
|
||||||
|
static const int __tmpcosmo_EFBIG = -15823768;
|
||||||
|
static const int __tmpcosmo_EFTYPE = -15823568;
|
||||||
|
static const int __tmpcosmo_EHOSTDOWN = -15823596;
|
||||||
|
static const int __tmpcosmo_EHOSTUNREACH = -15823742;
|
||||||
|
static const int __tmpcosmo_EHWPOISON = -15823680;
|
||||||
|
static const int __tmpcosmo_EIDRM = -15823644;
|
||||||
|
static const int __tmpcosmo_EILSEQ = -15823540;
|
||||||
|
static const int __tmpcosmo_EINPROGRESS = -15823720;
|
||||||
|
static const int __tmpcosmo_EINTR = -15823710;
|
||||||
|
static const int __tmpcosmo_EINVAL = -15823624;
|
||||||
|
static const int __tmpcosmo_EIO = -15823544;
|
||||||
|
static const int __tmpcosmo_EISCONN = -15823704;
|
||||||
|
static const int __tmpcosmo_EISDIR = -15823758;
|
||||||
|
static const int __tmpcosmo_EISNAM = -15823682;
|
||||||
|
static const int __tmpcosmo_EKEYEXPIRED = -15823520;
|
||||||
|
static const int __tmpcosmo_EKEYREJECTED = -15823712;
|
||||||
|
static const int __tmpcosmo_EKEYREVOKED = -15823780;
|
||||||
|
static const int __tmpcosmo_EL2HLT = -15823510;
|
||||||
|
static const int __tmpcosmo_EL2NSYNC = -15823670;
|
||||||
|
static const int __tmpcosmo_EL3HLT = -15823792;
|
||||||
|
static const int __tmpcosmo_EL3RST = -15823654;
|
||||||
|
static const int __tmpcosmo_ELIBACC = -15823708;
|
||||||
|
static const int __tmpcosmo_ELIBBAD = -15823564;
|
||||||
|
static const int __tmpcosmo_ELIBEXEC = -15823696;
|
||||||
|
static const int __tmpcosmo_ELIBMAX = -15823724;
|
||||||
|
static const int __tmpcosmo_ELIBSCN = -15823786;
|
||||||
|
static const int __tmpcosmo_ELNRNG = -15823732;
|
||||||
|
static const int __tmpcosmo_ELOOP = -15823672;
|
||||||
|
static const int __tmpcosmo_EMEDIUMTYPE = -15823508;
|
||||||
|
static const int __tmpcosmo_EMFILE = -15823762;
|
||||||
|
static const int __tmpcosmo_EMLINK = -15823694;
|
||||||
|
static const int __tmpcosmo_EMSGSIZE = -15823536;
|
||||||
|
static const int __tmpcosmo_EMULTIHOP = -15823750;
|
||||||
|
static const int __tmpcosmo_ENAMETOOLONG = -15823600;
|
||||||
|
static const int __tmpcosmo_ENAVAIL = -15823656;
|
||||||
|
static const int __tmpcosmo_ENEEDAUTH = -15823766;
|
||||||
|
static const int __tmpcosmo_ENETDOWN = -15823730;
|
||||||
|
static const int __tmpcosmo_ENETRESET = -15823604;
|
||||||
|
static const int __tmpcosmo_ENETUNREACH = -15823524;
|
||||||
|
static const int __tmpcosmo_ENFILE = -15823700;
|
||||||
|
static const int __tmpcosmo_ENOANO = -15823734;
|
||||||
|
static const int __tmpcosmo_ENOATTR = -15823606;
|
||||||
|
static const int __tmpcosmo_ENOBUFS = -15823628;
|
||||||
|
static const int __tmpcosmo_ENOCSI = -15823760;
|
||||||
|
static const int __tmpcosmo_ENODATA = -15823516;
|
||||||
|
static const int __tmpcosmo_ENODEV = -15823774;
|
||||||
|
static const int __tmpcosmo_ENOENT = -15823590;
|
||||||
|
static const int __tmpcosmo_ENOEXEC = -15823512;
|
||||||
|
static const int __tmpcosmo_ENOKEY = -15823764;
|
||||||
|
static const int __tmpcosmo_ENOLCK = -15823782;
|
||||||
|
static const int __tmpcosmo_ENOLINK = -15823538;
|
||||||
|
static const int __tmpcosmo_ENOMEDIUM = -15823598;
|
||||||
|
static const int __tmpcosmo_ENOMEM = -15823514;
|
||||||
|
static const int __tmpcosmo_ENOMSG = -15823796;
|
||||||
|
static const int __tmpcosmo_ENONET = -15823642;
|
||||||
|
static const int __tmpcosmo_ENOPKG = -15823664;
|
||||||
|
static const int __tmpcosmo_ENOPOLICY = -15823716;
|
||||||
|
static const int __tmpcosmo_ENOPROTOOPT = -15823608;
|
||||||
|
static const int __tmpcosmo_ENOSPC = -15823646;
|
||||||
|
static const int __tmpcosmo_ENOSR = -15823558;
|
||||||
|
static const int __tmpcosmo_ENOSTR = -15823706;
|
||||||
|
static const int __tmpcosmo_ENOSYS = -15823636;
|
||||||
|
static const int __tmpcosmo_ENOTBLK = -15823640;
|
||||||
|
static const int __tmpcosmo_ENOTCONN = -15823778;
|
||||||
|
static const int __tmpcosmo_ENOTDIR = -15823648;
|
||||||
|
static const int __tmpcosmo_ENOTEMPTY = -15823552;
|
||||||
|
static const int __tmpcosmo_ENOTNAM = -15823532;
|
||||||
|
static const int __tmpcosmo_ENOTRECOVERABLE = -15823746;
|
||||||
|
static const int __tmpcosmo_ENOTSOCK = -15823582;
|
||||||
|
static const int __tmpcosmo_ENOTSUP = -15823602;
|
||||||
|
static const int __tmpcosmo_ENOTTY = -15823528;
|
||||||
|
static const int __tmpcosmo_ENOTUNIQ = -15823790;
|
||||||
|
static const int __tmpcosmo_ENXIO = -15823622;
|
||||||
|
static const int __tmpcosmo_EOPNOTSUPP = -15823588;
|
||||||
|
static const int __tmpcosmo_EOVERFLOW = -15823736;
|
||||||
|
static const int __tmpcosmo_EOWNERDEAD = -15823562;
|
||||||
|
static const int __tmpcosmo_EPERM = -15823754;
|
||||||
|
static const int __tmpcosmo_EPFNOSUPPORT = -15823690;
|
||||||
|
static const int __tmpcosmo_EPIPE = -15823534;
|
||||||
|
static const int __tmpcosmo_EPROCLIM = -15823610;
|
||||||
|
static const int __tmpcosmo_EPROCUNAVAIL = -15823546;
|
||||||
|
static const int __tmpcosmo_EPROGMISMATCH = -15823572;
|
||||||
|
static const int __tmpcosmo_EPROGUNAVAIL = -15823526;
|
||||||
|
static const int __tmpcosmo_EPROTO = -15823678;
|
||||||
|
static const int __tmpcosmo_EPROTONOSUPPORT = -15823576;
|
||||||
|
static const int __tmpcosmo_EPROTOTYPE = -15823614;
|
||||||
|
static const int __tmpcosmo_EPWROFF = -15823692;
|
||||||
|
static const int __tmpcosmo_ERANGE = -15823772;
|
||||||
|
static const int __tmpcosmo_EREMCHG = -15823666;
|
||||||
|
static const int __tmpcosmo_EREMOTE = -15823560;
|
||||||
|
static const int __tmpcosmo_EREMOTEIO = -15823794;
|
||||||
|
static const int __tmpcosmo_ERESTART = -15823728;
|
||||||
|
static const int __tmpcosmo_ERFKILL = -15823612;
|
||||||
|
static const int __tmpcosmo_EROFS = -15823566;
|
||||||
|
static const int __tmpcosmo_ERPCMISMATCH = -15823542;
|
||||||
|
static const int __tmpcosmo_ESHLIBVERS = -15823584;
|
||||||
|
static const int __tmpcosmo_ESHUTDOWN = -15823660;
|
||||||
|
static const int __tmpcosmo_ESOCKTNOSUPPORT = -15823776;
|
||||||
|
static const int __tmpcosmo_ESPIPE = -15823652;
|
||||||
|
static const int __tmpcosmo_ESRCH = -15823674;
|
||||||
|
static const int __tmpcosmo_ESRMNT = -15823714;
|
||||||
|
static const int __tmpcosmo_ESTALE = -15823632;
|
||||||
|
static const int __tmpcosmo_ESTRPIPE = -15823770;
|
||||||
|
static const int __tmpcosmo_ETIME = -15823630;
|
||||||
|
static const int __tmpcosmo_ETIMEDOUT = -15823522;
|
||||||
|
static const int __tmpcosmo_ETOOMANYREFS = -15823586;
|
||||||
|
static const int __tmpcosmo_ETXTBSY = -15823638;
|
||||||
|
static const int __tmpcosmo_EUCLEAN = -15823578;
|
||||||
|
static const int __tmpcosmo_EUNATCH = -15823504;
|
||||||
|
static const int __tmpcosmo_EUSERS = -15823668;
|
||||||
|
static const int __tmpcosmo_EXDEV = -15823752;
|
||||||
|
static const int __tmpcosmo_EXFULL = -15823784;
|
||||||
|
static const int __tmpcosmo_F_DUPFD_CLOEXEC = -15823938;
|
||||||
|
static const int __tmpcosmo_F_GETLEASE = -15823862;
|
||||||
|
static const int __tmpcosmo_F_GETLK = -15823916;
|
||||||
|
static const int __tmpcosmo_F_GETLK64 = -15823846;
|
||||||
|
static const int __tmpcosmo_F_GETOWN = -15824116;
|
||||||
|
static const int __tmpcosmo_F_GETPATH = -15824128;
|
||||||
|
static const int __tmpcosmo_F_GETPIPE_SZ = -15824006;
|
||||||
|
static const int __tmpcosmo_F_GETSIG = -15824112;
|
||||||
|
static const int __tmpcosmo_F_MAXFD = -15823896;
|
||||||
|
static const int __tmpcosmo_F_NOCACHE = -15824048;
|
||||||
|
static const int __tmpcosmo_F_NOTIFY = -15823898;
|
||||||
|
static const int __tmpcosmo_F_RDLCK = -15823826;
|
||||||
|
static const int __tmpcosmo_F_SETLEASE = -15823884;
|
||||||
|
static const int __tmpcosmo_F_SETLK = -15824088;
|
||||||
|
static const int __tmpcosmo_F_SETLK64 = -15824154;
|
||||||
|
static const int __tmpcosmo_F_SETLKW = -15824096;
|
||||||
|
static const int __tmpcosmo_F_SETLKW64 = -15824104;
|
||||||
|
static const int __tmpcosmo_F_SETOWN = -15823874;
|
||||||
|
static const int __tmpcosmo_F_SETPIPE_SZ = -15823958;
|
||||||
|
static const int __tmpcosmo_F_SETSIG = -15823832;
|
||||||
|
static const int __tmpcosmo_F_UNLCK = -15824148;
|
||||||
|
static const int __tmpcosmo_F_WRLCK = -15824058;
|
||||||
|
static const int __tmpcosmo_IFF_ALLMULTI = -15824140;
|
||||||
|
static const int __tmpcosmo_IFF_AUTOMEDIA = -15823962;
|
||||||
|
static const int __tmpcosmo_IFF_DYNAMIC = -15823848;
|
||||||
|
static const int __tmpcosmo_IFF_MASTER = -15823900;
|
||||||
|
static const int __tmpcosmo_IFF_MULTICAST = -15824000;
|
||||||
|
static const int __tmpcosmo_IFF_NOARP = -15823802;
|
||||||
|
static const int __tmpcosmo_IFF_NOTRAILERS = -15824130;
|
||||||
|
static const int __tmpcosmo_IFF_POINTOPOINT = -15824138;
|
||||||
|
static const int __tmpcosmo_IFF_PORTSEL = -15824150;
|
||||||
|
static const int __tmpcosmo_IFF_PROMISC = -15824010;
|
||||||
|
static const int __tmpcosmo_IFF_RUNNING = -15824080;
|
||||||
|
static const int __tmpcosmo_IFF_SLAVE = -15824022;
|
||||||
|
static const int __tmpcosmo_LOCAL_PEERCRED = -15823986;
|
||||||
|
static const int __tmpcosmo_SIGBUS = -15824132;
|
||||||
|
static const int __tmpcosmo_SIGCHLD = -15824036;
|
||||||
|
static const int __tmpcosmo_SIGCONT = -15823836;
|
||||||
|
static const int __tmpcosmo_SIGEMT = -15823972;
|
||||||
|
static const int __tmpcosmo_SIGINFO = -15824086;
|
||||||
|
static const int __tmpcosmo_SIGIO = -15823912;
|
||||||
|
static const int __tmpcosmo_SIGPOLL = -15823854;
|
||||||
|
static const int __tmpcosmo_SIGPWR = -15824114;
|
||||||
|
static const int __tmpcosmo_SIGRTMAX = -15824040;
|
||||||
|
static const int __tmpcosmo_SIGRTMIN = -15824134;
|
||||||
|
static const int __tmpcosmo_SIGSTKFLT = -15823934;
|
||||||
|
static const int __tmpcosmo_SIGSTOP = -15824158;
|
||||||
|
static const int __tmpcosmo_SIGSYS = -15823922;
|
||||||
|
static const int __tmpcosmo_SIGTHR = -15823902;
|
||||||
|
static const int __tmpcosmo_SIGTSTP = -15823988;
|
||||||
|
static const int __tmpcosmo_SIGUNUSED = -15823970;
|
||||||
|
static const int __tmpcosmo_SIGURG = -15823952;
|
||||||
|
static const int __tmpcosmo_SIGUSR1 = -15824018;
|
||||||
|
static const int __tmpcosmo_SIGUSR2 = -15823998;
|
||||||
|
static const int __tmpcosmo_SIG_BLOCK = -15823800;
|
||||||
|
static const int __tmpcosmo_SIG_SETMASK = -15824090;
|
||||||
|
static const int __tmpcosmo_SIG_UNBLOCK = -15824078;
|
||||||
|
static const int __tmpcosmo_SOL_AAL = -15823976;
|
||||||
|
static const int __tmpcosmo_SOL_ALG = -15823956;
|
||||||
|
static const int __tmpcosmo_SOL_ATM = -15823914;
|
||||||
|
static const int __tmpcosmo_SOL_BLUETOOTH = -15824062;
|
||||||
|
static const int __tmpcosmo_SOL_CAIF = -15823904;
|
||||||
|
static const int __tmpcosmo_SOL_DCCP = -15823814;
|
||||||
|
static const int __tmpcosmo_SOL_DECNET = -15823842;
|
||||||
|
static const int __tmpcosmo_SOL_ICMPV6 = -15823908;
|
||||||
|
static const int __tmpcosmo_SOL_IPV6 = -15823808;
|
||||||
|
static const int __tmpcosmo_SOL_IRDA = -15823880;
|
||||||
|
static const int __tmpcosmo_SOL_IUCV = -15824156;
|
||||||
|
static const int __tmpcosmo_SOL_KCM = -15824092;
|
||||||
|
static const int __tmpcosmo_SOL_LLC = -15823930;
|
||||||
|
static const int __tmpcosmo_SOL_NETBEUI = -15823894;
|
||||||
|
static const int __tmpcosmo_SOL_NETLINK = -15824012;
|
||||||
|
static const int __tmpcosmo_SOL_NFC = -15823942;
|
||||||
|
static const int __tmpcosmo_SOL_PACKET = -15823806;
|
||||||
|
static const int __tmpcosmo_SOL_PNPIPE = -15823968;
|
||||||
|
static const int __tmpcosmo_SOL_PPPOL2TP = -15823816;
|
||||||
|
static const int __tmpcosmo_SOL_RAW = -15824044;
|
||||||
|
static const int __tmpcosmo_SOL_RDS = -15824020;
|
||||||
|
static const int __tmpcosmo_SOL_RXRPC = -15823984;
|
||||||
|
static const int __tmpcosmo_SOL_SOCKET = -15824050;
|
||||||
|
static const int __tmpcosmo_SOL_TIPC = -15823940;
|
||||||
|
static const int __tmpcosmo_SOL_X25 = -15823856;
|
||||||
|
static const int __tmpcosmo_SO_ACCEPTCONN = -15823872;
|
||||||
|
static const int __tmpcosmo_SO_ATTACH_BPF = -15824072;
|
||||||
|
static const int __tmpcosmo_SO_ATTACH_FILTER = -15824094;
|
||||||
|
static const int __tmpcosmo_SO_ATTACH_REUSEPORT_CBPF = -15823964;
|
||||||
|
static const int __tmpcosmo_SO_ATTACH_REUSEPORT_EBPF = -15824060;
|
||||||
|
static const int __tmpcosmo_SO_BINDTODEVICE = -15823990;
|
||||||
|
static const int __tmpcosmo_SO_BPF_EXTENSIONS = -15824030;
|
||||||
|
static const int __tmpcosmo_SO_BROADCAST = -15823882;
|
||||||
|
static const int __tmpcosmo_SO_BSDCOMPAT = -15824038;
|
||||||
|
static const int __tmpcosmo_SO_BUSY_POLL = -15823944;
|
||||||
|
static const int __tmpcosmo_SO_CNX_ADVICE = -15823828;
|
||||||
|
static const int __tmpcosmo_SO_DETACH_BPF = -15824068;
|
||||||
|
static const int __tmpcosmo_SO_DETACH_FILTER = -15824032;
|
||||||
|
static const int __tmpcosmo_SO_DOMAIN = -15823980;
|
||||||
|
static const int __tmpcosmo_SO_DONTROUTE = -15823918;
|
||||||
|
static const int __tmpcosmo_SO_ERROR = -15823892;
|
||||||
|
static const int __tmpcosmo_SO_EXCLUSIVEADDRUSE = -15823858;
|
||||||
|
static const int __tmpcosmo_SO_GET_FILTER = -15823834;
|
||||||
|
static const int __tmpcosmo_SO_INCOMING_CPU = -15824074;
|
||||||
|
static const int __tmpcosmo_SO_KEEPALIVE = -15823890;
|
||||||
|
static const int __tmpcosmo_SO_LINGER = -15824084;
|
||||||
|
static const int __tmpcosmo_SO_LOCK_FILTER = -15823804;
|
||||||
|
static const int __tmpcosmo_SO_MARK = -15824008;
|
||||||
|
static const int __tmpcosmo_SO_MAX_PACING_RATE = -15824120;
|
||||||
|
static const int __tmpcosmo_SO_NOFCS = -15823818;
|
||||||
|
static const int __tmpcosmo_SO_NO_CHECK = -15824152;
|
||||||
|
static const int __tmpcosmo_SO_OOBINLINE = -15823838;
|
||||||
|
static const int __tmpcosmo_SO_PASSCRED = -15823888;
|
||||||
|
static const int __tmpcosmo_SO_PASSSEC = -15823866;
|
||||||
|
static const int __tmpcosmo_SO_PEEK_OFF = -15823870;
|
||||||
|
static const int __tmpcosmo_SO_PEERCRED = -15823954;
|
||||||
|
static const int __tmpcosmo_SO_PEERNAME = -15824042;
|
||||||
|
static const int __tmpcosmo_SO_PEERSEC = -15823844;
|
||||||
|
static const int __tmpcosmo_SO_PRIORITY = -15824122;
|
||||||
|
static const int __tmpcosmo_SO_PROTOCOL = -15823982;
|
||||||
|
static const int __tmpcosmo_SO_RCVBUF = -15823974;
|
||||||
|
static const int __tmpcosmo_SO_RCVBUFFORCE = -15823994;
|
||||||
|
static const int __tmpcosmo_SO_RCVLOWAT = -15824076;
|
||||||
|
static const int __tmpcosmo_SO_RCVTIMEO = -15824046;
|
||||||
|
static const int __tmpcosmo_SO_REUSEADDR = -15823810;
|
||||||
|
static const int __tmpcosmo_SO_REUSEPORT = -15823822;
|
||||||
|
static const int __tmpcosmo_SO_RXQ_OVFL = -15824066;
|
||||||
|
static const int __tmpcosmo_SO_SECURITY_AUTHENTICATION = -15824098;
|
||||||
|
static const int __tmpcosmo_SO_SECURITY_ENCRYPTION_NETWORK = -15824126;
|
||||||
|
static const int __tmpcosmo_SO_SELECT_ERR_QUEUE = -15824052;
|
||||||
|
static const int __tmpcosmo_SO_SETFIB = -15823920;
|
||||||
|
static const int __tmpcosmo_SO_SNDBUF = -15824102;
|
||||||
|
static const int __tmpcosmo_SO_SNDBUFFORCE = -15823840;
|
||||||
|
static const int __tmpcosmo_SO_SNDLOWAT = -15823946;
|
||||||
|
static const int __tmpcosmo_SO_SNDTIMEO = -15824064;
|
||||||
|
static const int __tmpcosmo_SO_TIMESTAMP = -15823932;
|
||||||
|
static const int __tmpcosmo_SO_TIMESTAMPING = -15824054;
|
||||||
|
static const int __tmpcosmo_SO_TIMESTAMPNS = -15823910;
|
||||||
|
static const int __tmpcosmo_SO_TYPE = -15824144;
|
||||||
|
static const int __tmpcosmo_SO_USELOOPBACK = -15824110;
|
||||||
|
static const int __tmpcosmo_SO_WIFI_STATUS = -15824108;
|
||||||
|
static const unsigned int __tmpcosmo_B1000000 = 15823512;
|
||||||
|
static const unsigned int __tmpcosmo_B110 = 15823518;
|
||||||
|
static const unsigned int __tmpcosmo_B115200 = 15823540;
|
||||||
|
static const unsigned int __tmpcosmo_B1152000 = 15823538;
|
||||||
|
static const unsigned int __tmpcosmo_B1200 = 15823548;
|
||||||
|
static const unsigned int __tmpcosmo_B134 = 15823510;
|
||||||
|
static const unsigned int __tmpcosmo_B150 = 15823542;
|
||||||
|
static const unsigned int __tmpcosmo_B1500000 = 15823508;
|
||||||
|
static const unsigned int __tmpcosmo_B1800 = 15823522;
|
||||||
|
static const unsigned int __tmpcosmo_B19200 = 15823546;
|
||||||
|
static const unsigned int __tmpcosmo_B200 = 15823528;
|
||||||
|
static const unsigned int __tmpcosmo_B2000000 = 15823524;
|
||||||
|
static const unsigned int __tmpcosmo_B230400 = 15823516;
|
||||||
|
static const unsigned int __tmpcosmo_B2400 = 15823526;
|
||||||
|
static const unsigned int __tmpcosmo_B2500000 = 15823558;
|
||||||
|
static const unsigned int __tmpcosmo_B300 = 15823534;
|
||||||
|
static const unsigned int __tmpcosmo_B3000000 = 15823530;
|
||||||
|
static const unsigned int __tmpcosmo_B3500000 = 15823544;
|
||||||
|
static const unsigned int __tmpcosmo_B38400 = 15823514;
|
||||||
|
static const unsigned int __tmpcosmo_B4000000 = 15823520;
|
||||||
|
static const unsigned int __tmpcosmo_B4800 = 15823556;
|
||||||
|
static const unsigned int __tmpcosmo_B50 = 15823532;
|
||||||
|
static const unsigned int __tmpcosmo_B500000 = 15823550;
|
||||||
|
static const unsigned int __tmpcosmo_B57600 = 15823552;
|
||||||
|
static const unsigned int __tmpcosmo_B576000 = 15823506;
|
||||||
|
static const unsigned int __tmpcosmo_B600 = 15823554;
|
||||||
|
static const unsigned int __tmpcosmo_B75 = 15823536;
|
||||||
|
static const unsigned int __tmpcosmo_B9600 = 15823504;
|
||||||
|
static const unsigned short __tmpcosmo_AF_INET6 = 58236;
|
||||||
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||||
|
#endif /* ACTUALLY_MODS */
|
|
@ -1,11 +1,27 @@
|
||||||
#ifndef COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_
|
#ifndef COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_
|
||||||
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_
|
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_
|
||||||
|
|
||||||
|
#ifdef COSMO
|
||||||
|
#define timespec_get __timespec_get
|
||||||
|
#define timespec_getres __timespec_getres
|
||||||
|
#define timespec_cmp __timespec_cmp
|
||||||
|
#define timespec_tomicros __timespec_tomicros
|
||||||
|
#define timespec_tomillis __timespec_tomillis
|
||||||
|
#define timespec_tonanos __timespec_tonanos
|
||||||
|
#define timespec_add __timespec_add
|
||||||
|
#define timespec_fromnanos __timespec_fromnanos
|
||||||
|
#define timespec_frommicros __timespec_frommicros
|
||||||
|
#define timespec_frommillis __timespec_frommillis
|
||||||
|
#define timespec_real __timespec_real
|
||||||
|
#define timespec_mono __timespec_mono
|
||||||
|
#define timespec_sleep __timespec_sleep
|
||||||
|
#define timespec_sleep_until __timespec_sleep_until
|
||||||
|
#define timespec_sub __timespec_sub
|
||||||
|
#endif /* COSMO */
|
||||||
|
|
||||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
COSMOPOLITAN_C_START_
|
COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
#define timespec_zero ((struct timespec){0})
|
|
||||||
#define timespec_max ((struct timespec){0x7fffffffffffffff, 999999999})
|
|
||||||
|
|
||||||
struct timespec {
|
struct timespec {
|
||||||
int64_t tv_sec;
|
int64_t tv_sec;
|
||||||
int64_t tv_nsec; /* nanoseconds */
|
int64_t tv_nsec; /* nanoseconds */
|
||||||
|
@ -18,9 +34,14 @@ int futimens(int, const struct timespec[2]);
|
||||||
int nanosleep(const struct timespec *, struct timespec *);
|
int nanosleep(const struct timespec *, struct timespec *);
|
||||||
int sys_futex(int *, int, int, const struct timespec *, int *);
|
int sys_futex(int *, int, int, const struct timespec *, int *);
|
||||||
int utimensat(int, const char *, const struct timespec[2], int);
|
int utimensat(int, const char *, const struct timespec[2], int);
|
||||||
|
|
||||||
|
#ifdef COSMO
|
||||||
|
/* cosmopolitan libc's non-posix timespec library
|
||||||
|
removed by default due to emacs codebase clash */
|
||||||
|
#define timespec_zero ((struct timespec){0})
|
||||||
|
#define timespec_max ((struct timespec){0x7fffffffffffffff, 999999999})
|
||||||
int timespec_get(struct timespec *, int);
|
int timespec_get(struct timespec *, int);
|
||||||
int timespec_getres(struct timespec *, int);
|
int timespec_getres(struct timespec *, int);
|
||||||
|
|
||||||
int timespec_cmp(struct timespec, struct timespec) pureconst;
|
int timespec_cmp(struct timespec, struct timespec) pureconst;
|
||||||
int64_t timespec_tomicros(struct timespec) pureconst;
|
int64_t timespec_tomicros(struct timespec) pureconst;
|
||||||
int64_t timespec_tomillis(struct timespec) pureconst;
|
int64_t timespec_tomillis(struct timespec) pureconst;
|
||||||
|
@ -34,6 +55,7 @@ struct timespec timespec_mono(void);
|
||||||
struct timespec timespec_sleep(struct timespec);
|
struct timespec timespec_sleep(struct timespec);
|
||||||
int timespec_sleep_until(struct timespec);
|
int timespec_sleep_until(struct timespec);
|
||||||
struct timespec timespec_sub(struct timespec, struct timespec) pureconst;
|
struct timespec timespec_sub(struct timespec, struct timespec) pureconst;
|
||||||
|
#endif /* COSMO */
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
COSMOPOLITAN_C_END_
|
||||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||||
|
|
|
@ -2,6 +2,16 @@
|
||||||
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMEVAL_H_
|
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMEVAL_H_
|
||||||
#include "libc/calls/struct/timespec.h"
|
#include "libc/calls/struct/timespec.h"
|
||||||
#include "libc/time/struct/timezone.h"
|
#include "libc/time/struct/timezone.h"
|
||||||
|
|
||||||
|
#ifdef COSMO
|
||||||
|
#define timeval_cmp __timeval_cmp
|
||||||
|
#define timeval_frommicros __timeval_frommicros
|
||||||
|
#define timeval_frommillis __timeval_frommillis
|
||||||
|
#define timeval_add __timeval_add
|
||||||
|
#define timeval_sub __timeval_sub
|
||||||
|
#define timeval_totimespec __timeval_totimespec
|
||||||
|
#endif /* COSMO */
|
||||||
|
|
||||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
COSMOPOLITAN_C_START_
|
COSMOPOLITAN_C_START_
|
||||||
|
|
||||||
|
@ -16,6 +26,9 @@ int gettimeofday(struct timeval *, struct timezone *);
|
||||||
int lutimes(const char *, const struct timeval[2]);
|
int lutimes(const char *, const struct timeval[2]);
|
||||||
int utimes(const char *, const struct timeval[2]);
|
int utimes(const char *, const struct timeval[2]);
|
||||||
|
|
||||||
|
#ifdef COSMO
|
||||||
|
/* cosmopolitan libc's non-posix timevals library
|
||||||
|
removed by default due to emacs codebase clash */
|
||||||
int timeval_cmp(struct timeval, struct timeval) pureconst;
|
int timeval_cmp(struct timeval, struct timeval) pureconst;
|
||||||
struct timeval timeval_frommicros(int64_t) pureconst;
|
struct timeval timeval_frommicros(int64_t) pureconst;
|
||||||
struct timeval timeval_frommillis(int64_t) pureconst;
|
struct timeval timeval_frommillis(int64_t) pureconst;
|
||||||
|
@ -23,6 +36,7 @@ struct timeval timeval_add(struct timeval, struct timeval) pureconst;
|
||||||
struct timeval timeval_sub(struct timeval, struct timeval) pureconst;
|
struct timeval timeval_sub(struct timeval, struct timeval) pureconst;
|
||||||
struct timeval timespec_totimeval(struct timespec) pureconst;
|
struct timeval timespec_totimeval(struct timespec) pureconst;
|
||||||
struct timespec timeval_totimespec(struct timeval) pureconst;
|
struct timespec timeval_totimespec(struct timeval) pureconst;
|
||||||
|
#endif /* COSMO */
|
||||||
|
|
||||||
COSMOPOLITAN_C_END_
|
COSMOPOLITAN_C_END_
|
||||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||||
|
|
|
@ -725,6 +725,9 @@ void abort(void) wontreturn;
|
||||||
#endif /* GCC8+ */
|
#endif /* GCC8+ */
|
||||||
#if __GNUC__ + 0 >= 9
|
#if __GNUC__ + 0 >= 9
|
||||||
#pragma GCC diagnostic ignored /* "always true" breaks dce */ "-Waddress"
|
#pragma GCC diagnostic ignored /* "always true" breaks dce */ "-Waddress"
|
||||||
|
#if __GNUC__ >= 11
|
||||||
|
#pragma GCC diagnostic ignored /* orwellian */ "-Wold-style-definition"
|
||||||
|
#endif /* GCC11+ */
|
||||||
#endif /* GCC9+ */
|
#endif /* GCC9+ */
|
||||||
#endif /* !C++ */
|
#endif /* !C++ */
|
||||||
#endif /* GCC && !LLVM */
|
#endif /* GCC && !LLVM */
|
||||||
|
|
238
third_party/gcc/README.cosmo
vendored
238
third_party/gcc/README.cosmo
vendored
|
@ -1,232 +1,28 @@
|
||||||
This is a modern statically-linked GNU C2X toolchain.
|
DESCRIPTION
|
||||||
|
|
||||||
You have the freedom to obtain the original sources to these binaries,
|
Cosmopolitan GCC
|
||||||
and build ones just like them, by visiting:
|
Prebuilt x86_64-linux binaries
|
||||||
|
An APE-friendly C/C++ compiler
|
||||||
|
|
||||||
https://www.gnu.org/
|
LICENSE
|
||||||
https://github.com/richfelker/musl-cross-make
|
|
||||||
|
|
||||||
The musl-cross-make tool also produces libraries and header files. We've
|
GPLv3 and other licenses (see LICENSE.txt)
|
||||||
only vendored the statically-linked executable files, since Cosmopolitan
|
|
||||||
won't depend on GPL-licensed headers / runtime libraries.
|
|
||||||
|
|
||||||
We haven't made any modifications to the original software. The versions
|
ORIGIN
|
||||||
we chose are documented in $PKG/LICENSE.txt. Here's our Musl
|
|
||||||
build config for maximum transparency:
|
|
||||||
|
|
||||||
commit 38e52db8358c043ae82b346a2e6e66bc86a53bc1
|
@ahgamut's musl-cross-make fork
|
||||||
Author: Rich Felker <dalias@aerifal.cx>
|
https://github.com/ahgamut/musl-cross-make/
|
||||||
Date: Wed Dec 18 14:29:07 2019 -0500
|
d0f33e2162cf5e5b30cdf3b3accc0d0f7756830c
|
||||||
|
|
||||||
switch linux kernel headers to 4.19.88 by default
|
MODIFICATIONS
|
||||||
|
|
||||||
using slim headers-only version. this change is needed to support all
|
ahgamut's musl-cross-make fork includes a 2kLOC patch that modifies
|
||||||
future versions of musl on 32-bit archs, since prior to 4.16 the
|
GCC so it'll compile C code like `switch(errno){case EINVAL: etc.}`
|
||||||
kernel headers had incompatibility with userspace time_t not matching
|
|
||||||
the kernel's old (32-bit) time_t. support for older headers will be
|
|
||||||
dropped entirely soon.
|
|
||||||
|
|
||||||
TARGET = x86_64-linux-musl
|
SEE ALSO
|
||||||
OUTPUT = /opt/cross9
|
|
||||||
GCC_VER = 9.2.0
|
|
||||||
export LANG=en_US.UTF-8
|
|
||||||
export LC_CTYPE=en_US.UTF-8
|
|
||||||
COMMON_CONFIG += CC="/opt/cross9/bin/x86_64-linux-musl-cc -static --static -g -Os -ftree-vectorize -fvect-cost-model=unlimited -mstringop-strategy=vector_loop -save-temps -fno-ident"
|
|
||||||
COMMON_CONFIG += CXX="/opt/cross9/bin/x86_64-linux-musl-c++ -static --static -g -Os -ftree-vectorize -fvect-cost-model=unlimited -mstringop-strategy=vector_loop -save-temps -fno-ident"
|
|
||||||
COMMON_CONFIG += LD="/opt/cross9/bin/x86_64-linux-musl-ld --build-id=none"
|
|
||||||
COMMON_CONFIG += NM="/opt/cross9/bin/x86_64-linux-musl-nm"
|
|
||||||
COMMON_CONFIG += LDFLAGS="-Wl,--build-id=none"
|
|
||||||
COMMON_CONFIG += OBJCOPY="/opt/cross9/bin/x86_64-linux-musl-objcopy"
|
|
||||||
COMMON_CONFIG += --disable-nls --disable-lto
|
|
||||||
GCC_CONFIG += --enable-languages=c,c++
|
|
||||||
GCC_CONFIG += --disable-multilib
|
|
||||||
GCC_CONFIG += --with-gnu-as
|
|
||||||
GCC_CONFIG += --with-gnu-ld
|
|
||||||
GCC_CONFIG += --disable-multilib
|
|
||||||
GCC_CONFIG += --enable-sjlj-exceptions
|
|
||||||
GCC_CONFIG += --disable-threads
|
|
||||||
GCC_CONFIG += --disable-tls
|
|
||||||
COMMON_CONFIG += --with-debug-prefix-map=$(CURDIR)=
|
|
||||||
|
|
||||||
#!/bin/sh
|
third_party/gcc/portcosmo.patch
|
||||||
set -e
|
|
||||||
export LC_ALL=C
|
|
||||||
export GUNZ="/bin/gzip --rsyncable -9 -c"
|
|
||||||
BASE=/opt/cross9
|
|
||||||
PKG=third_party/gcc
|
|
||||||
VERS=9.2.0
|
|
||||||
|
|
||||||
if [ ! -d $BASE ]; then
|
NOTES
|
||||||
echo error: run make install >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -d $BASE/$PKG ]; then
|
My name is Justine Tunney and I approve of these binaries.
|
||||||
rm -rf $BASE/$PKG
|
|
||||||
fi
|
|
||||||
|
|
||||||
mkdir -p $BASE/$PKG/bin
|
|
||||||
mkdir -p $BASE/$PKG/libexec/gcc/x86_64-linux-musl/$VERS
|
|
||||||
mkdir -p $BASE/$PKG/x86_64-linux-musl/bin
|
|
||||||
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-gcov-dump $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-cc $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-addr2line $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-ar $BASE/$PKG/bin/x86_64-linux-musl-ar
|
|
||||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-c++ $BASE/$PKG/bin/x86_64-linux-musl-g++
|
|
||||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/collect2 $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-gcc-nm $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-c++filt $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-elfedit $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-ld $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-size $BASE/$PKG/bin/x86_64-linux-musl-size
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-strings $BASE/$PKG/bin/x86_64-linux-musl-strings
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-objcopy $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-nm $BASE/$PKG/bin/x86_64-linux-musl-nm
|
|
||||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/cc1 $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-readelf $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-objdump $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-gcc-ar $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-gcov $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-ranlib $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-as $BASE/$PKG/bin/x86_64-linux-musl-as
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-gcc-ranlib $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-cpp $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strip
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-gprof $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
|
||||||
cp $BASE/bin/x86_64-linux-musl-gcov-tool $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
|
||||||
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-ar
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-g++
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-size
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strings
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-nm
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-as
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strip
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
|
||||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
|
||||||
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump >$BASE/$PKG/bin/x86_64-linux-musl-gcov-dump.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc >$BASE/$PKG/bin/x86_64-linux-musl-gcc.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-addr2line >$BASE/$PKG/bin/x86_64-linux-musl-addr2line.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-ar >$BASE/$PKG/bin/x86_64-linux-musl-ar.gz
|
|
||||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-g++ >$BASE/$PKG/bin/x86_64-linux-musl-g++.gz
|
|
||||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2 >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm >$BASE/$PKG/bin/x86_64-linux-musl-gcc-nm.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-c++filt >$BASE/$PKG/bin/x86_64-linux-musl-c++filt.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-elfedit >$BASE/$PKG/bin/x86_64-linux-musl-elfedit.gz
|
|
||||||
$GUNZ $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd >$BASE/$PKG/x86_64-linux-musl/bin/ld.bfd.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-size >$BASE/$PKG/bin/x86_64-linux-musl-size.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-strings >$BASE/$PKG/bin/x86_64-linux-musl-strings.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-objcopy >$BASE/$PKG/bin/x86_64-linux-musl-objcopy.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-nm >$BASE/$PKG/bin/x86_64-linux-musl-nm.gz
|
|
||||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1 >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-readelf >$BASE/$PKG/bin/x86_64-linux-musl-readelf.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-objdump >$BASE/$PKG/bin/x86_64-linux-musl-objdump.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar >$BASE/$PKG/bin/x86_64-linux-musl-gcc-ar.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov >$BASE/$PKG/bin/x86_64-linux-musl-gcov.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-ranlib >$BASE/$PKG/bin/x86_64-linux-musl-ranlib.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-as >$BASE/$PKG/bin/x86_64-linux-musl-as.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib >$BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-cpp >$BASE/$PKG/bin/x86_64-linux-musl-cpp.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-strip >$BASE/$PKG/bin/x86_64-linux-musl-strip.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gprof >$BASE/$PKG/bin/x86_64-linux-musl-gprof.gz
|
|
||||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool >$BASE/$PKG/bin/x86_64-linux-musl-gcov-tool.gz
|
|
||||||
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-ar
|
|
||||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-g++
|
|
||||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
|
||||||
rm $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-size
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-strings
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-nm
|
|
||||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-as
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-strip
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
|
||||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
|
||||||
|
|
||||||
ln -s x86_64-linux-musl-gcc $BASE/$PKG/bin/x86_64-linux-musl-cc
|
|
||||||
ln -s x86_64-linux-musl-gcc $BASE/$PKG/bin/x86_64-linux-musl-gcc-9.2.0
|
|
||||||
ln -s ../../bin/x86_64-linux-musl-ar $BASE/$PKG/x86_64-linux-musl/bin/ar
|
|
||||||
ln -s x86_64-linux-musl-g++ $BASE/$PKG/bin/x86_64-linux-musl-c++
|
|
||||||
ln -s ld.bfd $BASE/$PKG/x86_64-linux-musl/bin/ld
|
|
||||||
ln -s ../x86_64-linux-musl/bin/ld.bfd $BASE/$PKG/bin/x86_64-linux-musl-ld.bfd
|
|
||||||
ln -s ../x86_64-linux-musl/bin/ld.bfd $BASE/$PKG/bin/x86_64-linux-musl-ld
|
|
||||||
ln -s ../../bin/x86_64-linux-musl-objcopy $BASE/$PKG/x86_64-linux-musl/bin/objcopy
|
|
||||||
ln -s ../../bin/x86_64-linux-musl-nm $BASE/$PKG/x86_64-linux-musl/bin/nm
|
|
||||||
ln -s ../../bin/x86_64-linux-musl-readelf $BASE/$PKG/x86_64-linux-musl/bin/readelf
|
|
||||||
ln -s ../../bin/x86_64-linux-musl-objdump $BASE/$PKG/x86_64-linux-musl/bin/objdump
|
|
||||||
ln -s ../../bin/x86_64-linux-musl-ranlib $BASE/$PKG/x86_64-linux-musl/bin/ranlib
|
|
||||||
ln -s ../../bin/x86_64-linux-musl-as $BASE/$PKG/x86_64-linux-musl/bin/as
|
|
||||||
ln -s ../../bin/x86_64-linux-musl-strip $BASE/$PKG/x86_64-linux-musl/bin/strip
|
|
||||||
|
|
||||||
{
|
|
||||||
cat <<'EOF'
|
|
||||||
This is a modern statically-linked GNU C2X toolchain.
|
|
||||||
|
|
||||||
You have the freedom to obtain the original sources to these binaries,
|
|
||||||
and build ones just like them, by visiting:
|
|
||||||
|
|
||||||
https://www.gnu.org/
|
|
||||||
https://github.com/richfelker/musl-cross-make
|
|
||||||
|
|
||||||
The musl-cross-make tool also produces libraries and header files. We've
|
|
||||||
only vendored the statically-linked executable files, since Cosmopolitan
|
|
||||||
won't depend on GPL-licensed headers / runtime libraries.
|
|
||||||
|
|
||||||
We haven't made any modifications to the original software. The versions
|
|
||||||
we chose are documented in $PKG/LICENSE.txt. Here's our Musl
|
|
||||||
build config for maximum transparency:
|
|
||||||
|
|
||||||
EOF
|
|
||||||
git show --quiet
|
|
||||||
echo
|
|
||||||
cat config.mak
|
|
||||||
echo
|
|
||||||
cat bundle.sh
|
|
||||||
} >$BASE/$PKG/README.cosmo
|
|
||||||
|
|
||||||
{
|
|
||||||
for f in $(find . -iname \*copying\* -or -iname \*license\* | sort); do
|
|
||||||
printf '\n'
|
|
||||||
printf '%s\n' "$f"
|
|
||||||
printf '========================================================================\n'
|
|
||||||
cat "$f"
|
|
||||||
done
|
|
||||||
} >$BASE/$PKG/LICENSE.txt
|
|
||||||
|
|
BIN
third_party/gcc/bin/x86_64-linux-musl-addr2line.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-addr2line.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-ar.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-ar.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-as.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-as.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-c++filt.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-c++filt.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-cpp.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-cpp.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-elfedit.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-elfedit.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-g++.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-g++.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ar.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ar.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-nm.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-nm.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ranlib.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ranlib.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-dump.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-dump.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-tool.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-tool.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gprof.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gprof.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-nm.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-nm.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-objcopy.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-objcopy.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-objdump.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-objdump.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-ranlib.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-ranlib.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-readelf.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-readelf.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-size.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-size.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-strings.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-strings.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-strip.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-strip.gz
vendored
Binary file not shown.
97
third_party/gcc/config.mak
vendored
Normal file
97
third_party/gcc/config.mak
vendored
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
#
|
||||||
|
# config.mak.dist - sample musl-cross-make configuration
|
||||||
|
#
|
||||||
|
# Copy to config.mak and edit as desired.
|
||||||
|
#
|
||||||
|
|
||||||
|
# There is no default TARGET; you must select one here or on the make
|
||||||
|
# command line. Some examples:
|
||||||
|
|
||||||
|
# TARGET = i486-linux-musl
|
||||||
|
TARGET = aarch64-linux-musl
|
||||||
|
# TARGET = arm-linux-musleabi
|
||||||
|
# TARGET = arm-linux-musleabihf
|
||||||
|
# TARGET = sh2eb-linux-muslfdpic
|
||||||
|
# TARGET = powerpc64le-linux-musl
|
||||||
|
# TARGET = aarch64-linux-musl
|
||||||
|
|
||||||
|
# By default, cross compilers are installed to ./output under the top-level
|
||||||
|
# musl-cross-make directory and can later be moved wherever you want them.
|
||||||
|
# To install directly to a specific location, set it here. Multiple targets
|
||||||
|
# can safely be installed in the same location. Some examples:
|
||||||
|
|
||||||
|
OUTPUT = /opt/cross11portcosmo
|
||||||
|
# OUTPUT = /usr/local
|
||||||
|
|
||||||
|
# By default, latest supported release versions of musl and the toolchain
|
||||||
|
# components are used. You can override those here, but the version selected
|
||||||
|
# must be supported (under hashes/ and patches/) to work. For musl, you
|
||||||
|
# can use "git-refname" (e.g. git-master) instead of a release. Setting a
|
||||||
|
# blank version for gmp, mpc, mpfr and isl will suppress download and
|
||||||
|
# in-tree build of these libraries and instead depend on pre-installed
|
||||||
|
# libraries when available (isl is optional and not set by default).
|
||||||
|
# Setting a blank version for linux will suppress installation of kernel
|
||||||
|
# headers, which are not needed unless compiling programs that use them.
|
||||||
|
|
||||||
|
# BINUTILS_VER = 2.25.1
|
||||||
|
GCC_VER = 11.2.0
|
||||||
|
# MUSL_VER = git-master
|
||||||
|
# GMP_VER =
|
||||||
|
# MPC_VER =
|
||||||
|
# MPFR_VER =
|
||||||
|
# ISL_VER =
|
||||||
|
# LINUX_VER =
|
||||||
|
|
||||||
|
# By default source archives are downloaded with wget. curl is also an option.
|
||||||
|
|
||||||
|
# DL_CMD = wget -c -O
|
||||||
|
# DL_CMD = curl -C - -L -o
|
||||||
|
|
||||||
|
# Check sha-1 hashes of downloaded source archives. On gnu systems this is
|
||||||
|
# usually done with sha1sum.
|
||||||
|
|
||||||
|
# SHA1_CMD = sha1sum -c
|
||||||
|
# SHA1_CMD = sha1 -c
|
||||||
|
# SHA1_CMD = shasum -a 1 -c
|
||||||
|
|
||||||
|
# Something like the following can be used to produce a static-linked
|
||||||
|
# toolchain that's deployable to any system with matching arch, using
|
||||||
|
# an existing musl-targeted cross compiler. This only works if the
|
||||||
|
# system you build on can natively (or via binfmt_misc and qemu) run
|
||||||
|
# binaries produced by the existing toolchain (in this example, i486).
|
||||||
|
|
||||||
|
# MUSL_CONFIG += --enable-debug
|
||||||
|
# MUSL_CONFIG += CFLAGS="-Os -fno-omit-frame-pointer -fno-optimize-sibling-calls -mno-omit-leaf-frame-pointer"
|
||||||
|
MUSL_CONFIG += CFLAGS="-Os"
|
||||||
|
|
||||||
|
COMMON_CONFIG += CC="/opt/cross/bin/x86_64-linux-musl-gcc -static --static"
|
||||||
|
COMMON_CONFIG += CXX="/opt/cross/bin/x86_64-linux-musl-g++ -static --static"
|
||||||
|
# COMMON_CONFIG += CC="gcc -static --static"
|
||||||
|
# COMMON_CONFIG += CXX="g++ -static --static"
|
||||||
|
|
||||||
|
# Recommended options for smaller build for deploying binaries:
|
||||||
|
|
||||||
|
COMMON_CONFIG += CFLAGS="-Os -g0"
|
||||||
|
COMMON_CONFIG += CXXFLAGS="-Os -g0"
|
||||||
|
COMMON_CONFIG += LDFLAGS="-s"
|
||||||
|
|
||||||
|
# Options you can add for faster/simpler build at the expense of features:
|
||||||
|
|
||||||
|
COMMON_CONFIG += --disable-nls
|
||||||
|
GCC_CONFIG += --disable-libquadmath --disable-decimal-float
|
||||||
|
GCC_CONFIG += --disable-libitm
|
||||||
|
GCC_CONFIG += --disable-fixed-point
|
||||||
|
GCC_CONFIG += --disable-lto
|
||||||
|
|
||||||
|
# By default C and C++ are the only languages enabled, and these are
|
||||||
|
# the only ones tested and known to be supported. You can uncomment the
|
||||||
|
# following and add other languages if you want to try getting them to
|
||||||
|
# work too.
|
||||||
|
|
||||||
|
GCC_CONFIG += --enable-languages=c,c++ #--enable-plugin
|
||||||
|
|
||||||
|
# You can keep the local build path out of your toolchain binaries and
|
||||||
|
# target libraries with the following, but then gdb needs to be told
|
||||||
|
# where to look for source files.
|
||||||
|
|
||||||
|
# COMMON_CONFIG += --with-debug-prefix-map=$(CURDIR)=
|
0
third_party/gcc/lib/gcc/x86_64-linux-musl/11.2.0/specs
vendored
Normal file
0
third_party/gcc/lib/gcc/x86_64-linux-musl/11.2.0/specs
vendored
Normal file
|
@ -1,141 +0,0 @@
|
||||||
*asm:
|
|
||||||
%{m16|m32:--32} %{m16|m32:;:--64} %{msse2avx:%{!mavx:-msse2avx}}
|
|
||||||
|
|
||||||
*asm_debug:
|
|
||||||
%{%:debug-level-gt(0):%{gstabs*:--gstabs}%{!gstabs*:%{g*:--gdwarf2}}} %{fdebug-prefix-map=*:--debug-prefix-map %*}
|
|
||||||
|
|
||||||
*asm_final:
|
|
||||||
%{gsplit-dwarf:
|
|
||||||
objcopy --extract-dwo %{c:%{o*:%*}%{!o*:%b%O}}%{!c:%U%O} %{c:%{o*:%:replace-extension(%{o*:%*} .dwo)}%{!o*:%b.dwo}}%{!c:%b.dwo}
|
|
||||||
objcopy --strip-dwo %{c:%{o*:%*}%{!o*:%b%O}}%{!c:%U%O} }
|
|
||||||
|
|
||||||
*asm_options:
|
|
||||||
%{-target-help:%:print-asm-header()} %{v} %{w:-W} %{I*} %{gz|gz=zlib:--compress-debug-sections=zlib} %{gz=none:--compress-debug-sections=none} %{gz=zlib-gnu:--compress-debug-sections=zlib-gnu} %a %Y %{c:%W{o*}%{!o*:-o %w%b%O}}%{!c:-o %d%w%u%O}
|
|
||||||
|
|
||||||
*invoke_as:
|
|
||||||
%{!fwpa*: %{fcompare-debug=*|fdump-final-insns=*:%:compare-debug-dump-opt()} %{!S:-o %|.s |
|
|
||||||
as %(asm_options) %m.s %A } }
|
|
||||||
|
|
||||||
*cpp:
|
|
||||||
%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}
|
|
||||||
|
|
||||||
*cpp_options:
|
|
||||||
%(cpp_unique_options) %1 %{m*} %{std*&ansi&trigraphs} %{W*&pedantic*} %{w} %{f*} %{g*:%{%:debug-level-gt(0):%{g*} %{!fno-working-directory:-fworking-directory}}} %{O*} %{undef} %{save-temps*:-fpch-preprocess}
|
|
||||||
|
|
||||||
*cpp_debug_options:
|
|
||||||
%{d*}
|
|
||||||
|
|
||||||
*cpp_unique_options:
|
|
||||||
%{!Q:-quiet} %{nostdinc*} %{C} %{CC} %{v} %@{I*&F*} %{P} %I %{MD:-MD %{!o:%b.d}%{o*:%.d%*}} %{MMD:-MMD %{!o:%b.d}%{o*:%.d%*}} %{M} %{MM} %{MF*} %{MG} %{MP} %{MQ*} %{MT*} %{!E:%{!M:%{!MM:%{!MT:%{!MQ:%{MD|MMD:%{o*:-MQ %*}}}}}}} %{remap} %{g3|ggdb3|gstabs3|gxcoff3|gvms3:-dD} %{!iplugindir*:%{fplugin*:%:find-plugindir()}} %{H} %C %{D*&U*&A*} %{i*} %Z %i %{E|M|MM:%W{o*}}
|
|
||||||
|
|
||||||
*trad_capable_cpp:
|
|
||||||
cc1 -E %{traditional|traditional-cpp:-traditional-cpp}
|
|
||||||
|
|
||||||
*cc1:
|
|
||||||
%{!mandroid|tno-android-cc:%(cc1_cpu) %{profile:-p};:%(cc1_cpu) %{profile:-p} %{!fno-pic:%{!fno-PIC:%{!fpic:%{!fPIC: -fPIC}}}}}
|
|
||||||
|
|
||||||
*cc1_options:
|
|
||||||
%{pg:%{fomit-frame-pointer:%e-pg and -fomit-frame-pointer are incompatible}} %{!iplugindir*:%{fplugin*:%:find-plugindir()}} %1 %{!Q:-quiet} %{!dumpbase:-dumpbase %B} %{d*} %{m*} %{aux-info*} %{fcompare-debug-second:%:compare-debug-auxbase-opt(%b)} %{!fcompare-debug-second:%{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}}%{!c:%{!S:-auxbase %b}} %{g*} %{O*} %{W*&pedantic*} %{w} %{std*&ansi&trigraphs} %{v:-version} %{pg:-p} %{p} %{f*} %{undef} %{Qn:-fno-ident} %{Qy:} %{-help:--help} %{-target-help:--target-help} %{-version:--version} %{-help=*:--help=%*} %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %b.s}}} %{fsyntax-only:-o %j} %{-param*} %{coverage:-fprofile-arcs -ftest-coverage} %{fprofile-arcs|fprofile-generate*|coverage: %{!fprofile-update=single: %{pthread:-fprofile-update=prefer-atomic}}}
|
|
||||||
|
|
||||||
*cc1plus:
|
|
||||||
|
|
||||||
|
|
||||||
*link_gcc_c_sequence:
|
|
||||||
%{static|static-pie:--start-group} %G %{!nolibc:%L} %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}
|
|
||||||
|
|
||||||
*link_ssp:
|
|
||||||
%{fstack-protector|fstack-protector-all|fstack-protector-strong|fstack-protector-explicit:-lssp_nonshared}
|
|
||||||
|
|
||||||
*endfile:
|
|
||||||
--push-state --pop-state
|
|
||||||
|
|
||||||
*link:
|
|
||||||
%{!mandroid|tno-android-ld:%{m16|m32:;:-m elf_x86_64} %{m16|m32:-m elf_i386} %{shared:-shared} %{!shared: %{!static: %{!static-pie: %{rdynamic:-export-dynamic} }} %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}};:%{m16|m32:;:-m elf_x86_64} %{m16|m32:-m elf_i386} %{mx32:-m elf32_x86_64} %{shared:-shared} %{!shared: %{!static: %{!static-pie: %{rdynamic:-export-dynamic} %{m16|m32:-dynamic-linker } %{m16|m32:;:-dynamic-linker} }} %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}} %{shared: -Bsymbolic}}
|
|
||||||
|
|
||||||
*lib:
|
|
||||||
--push-state --pop-state
|
|
||||||
|
|
||||||
*link_gomp:
|
|
||||||
|
|
||||||
|
|
||||||
*libgcc:
|
|
||||||
--push-state --pop-state
|
|
||||||
|
|
||||||
*startfile:
|
|
||||||
--push-state --pop-state
|
|
||||||
|
|
||||||
*cross_compile:
|
|
||||||
1
|
|
||||||
|
|
||||||
*version:
|
|
||||||
9.2.0
|
|
||||||
|
|
||||||
*multilib:
|
|
||||||
. ;
|
|
||||||
|
|
||||||
*multilib_defaults:
|
|
||||||
m64
|
|
||||||
|
|
||||||
*multilib_extra:
|
|
||||||
|
|
||||||
|
|
||||||
*multilib_matches:
|
|
||||||
|
|
||||||
|
|
||||||
*multilib_exclusions:
|
|
||||||
|
|
||||||
|
|
||||||
*multilib_options:
|
|
||||||
|
|
||||||
|
|
||||||
*multilib_reuse:
|
|
||||||
|
|
||||||
|
|
||||||
*linker:
|
|
||||||
collect2
|
|
||||||
|
|
||||||
*linker_plugin_file:
|
|
||||||
|
|
||||||
|
|
||||||
*lto_wrapper:
|
|
||||||
|
|
||||||
|
|
||||||
*lto_gcc:
|
|
||||||
|
|
||||||
|
|
||||||
*post_link:
|
|
||||||
|
|
||||||
|
|
||||||
*link_libgcc:
|
|
||||||
%D
|
|
||||||
|
|
||||||
*md_exec_prefix:
|
|
||||||
|
|
||||||
|
|
||||||
*md_startfile_prefix:
|
|
||||||
|
|
||||||
|
|
||||||
*md_startfile_prefix_1:
|
|
||||||
|
|
||||||
|
|
||||||
*startfile_prefix_spec:
|
|
||||||
|
|
||||||
|
|
||||||
*sysroot_spec:
|
|
||||||
--sysroot=%R
|
|
||||||
|
|
||||||
*sysroot_suffix_spec:
|
|
||||||
|
|
||||||
|
|
||||||
*sysroot_hdrs_suffix_spec:
|
|
||||||
|
|
||||||
|
|
||||||
*self_spec:
|
|
||||||
|
|
||||||
|
|
||||||
*cc1_cpu:
|
|
||||||
%{march=native:%>march=native %:local_cpu_detect(arch) %{!mtune=*:%>mtune=native %:local_cpu_detect(tune)}} %{mtune=native:%>mtune=native %:local_cpu_detect(tune)}
|
|
||||||
|
|
||||||
*link_command:
|
|
||||||
%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S: %(linker) %{fuse-linker-plugin: %e-fuse-linker-plugin is not supported in this configuration}%{flto|flto=*:%<fcompare-debug*} %{flto} %{fno-lto} %{flto=*} %l %{shared|r:;pie|static-pie:-pie %{static|static-pie:--no-dynamic-linker -z text -Bsymbolic}} %{fuse-ld=*:-fuse-ld=%*} %{gz|gz=zlib:--compress-debug-sections=zlib} %{gz=none:--compress-debug-sections=none} %{gz=zlib-gnu:--compress-debug-sections=zlib-gnu} %X %{o*} %{e*} %{N} %{n} %{r} %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!r:%{!nostartfiles:%S}}} %{static|no-pie|static-pie:} %@{L*} %(mfwrap) %(link_libgcc) %{fvtable-verify=none:} %{fvtable-verify=std: %e-fvtable-verify=std is not supported in this configuration} %{fvtable-verify=preinit: %e-fvtable-verify=preinit is not supported in this configuration} %{!nostdlib:%{!r:%{!nodefaultlibs:%{%:sanitize(address):%{!shared:libasan_preinit%O%s} %{static-libasan:%{!shared:-Bstatic --whole-archive -lasan --no-whole-archive -Bdynamic}}%{!static-libasan:-lasan}} %{%:sanitize(thread):%{!shared:libtsan_preinit%O%s} %{static-libtsan:%{!shared:-Bstatic --whole-archive -ltsan --no-whole-archive -Bdynamic}}%{!static-libtsan:-ltsan}} %{%:sanitize(leak):%{!shared:liblsan_preinit%O%s} %{static-liblsan:%{!shared:-Bstatic --whole-archive -llsan --no-whole-archive -Bdynamic}}%{!static-liblsan:-llsan}}}}} %o %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*:%*} 1): %:include(libgomp.spec)%(link_gomp)} %{fgnu-tm:%:include(libitm.spec)%(link_itm)} %(mflib) %{fsplit-stack: --wrap=pthread_create} %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} %{!nostdlib:%{!r:%{!nodefaultlibs:%{%:sanitize(address): %{static-libasan|static:%:include(libsanitizer.spec)%(link_libasan)} %{static:%ecannot specify -static with -fsanitize=address}} %{%:sanitize(thread): %{static-libtsan|static:%:include(libsanitizer.spec)%(link_libtsan)} %{static:%ecannot specify -static with -fsanitize=thread}} %{%:sanitize(undefined):%{static-libubsan:-Bstatic} -lubsan %{static-libubsan:-Bdynamic} %{static-libubsan|static:%:include(libsanitizer.spec)%(link_libubsan)}} %{%:sanitize(leak): %{static-liblsan|static:%:include(libsanitizer.spec)%(link_liblsan)}}}}} %{!nostdlib:%{!r:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}} %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*}
|
|
||||||
%(post_link) }}}}}}
|
|
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/cc1.gz
vendored
Normal file
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/cc1.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/collect2.gz
vendored
Normal file
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/collect2.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
1869
third_party/gcc/portcosmo.patch
vendored
Normal file
1869
third_party/gcc/portcosmo.patch
vendored
Normal file
File diff suppressed because it is too large
Load diff
46
third_party/gcc/upgrade-cosmo-gcc.sh
vendored
Executable file
46
third_party/gcc/upgrade-cosmo-gcc.sh
vendored
Executable file
|
@ -0,0 +1,46 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
ARCH=${1:-x86_64}
|
||||||
|
IMPORT=${2:-/opt/cross11portcosmo}
|
||||||
|
PREFIX=third_party/gcc/
|
||||||
|
OLDVERSION=9.2.0
|
||||||
|
NEWVERSION=11.2.0
|
||||||
|
|
||||||
|
rm -rf o/third_party/gcc
|
||||||
|
mv $PREFIX/libexec/gcc/$ARCH-linux-musl/$OLDVERSION $PREFIX/libexec/gcc/$ARCH-linux-musl/$NEWVERSION
|
||||||
|
mv $PREFIX/lib/gcc/$ARCH-linux-musl/$OLDVERSION $PREFIX/lib/gcc/$ARCH-linux-musl/$NEWVERSION
|
||||||
|
sed -i -e "s/$OLDVERSION/$NEWVERSION/g" $(find $PREFIX -name \*.sym | grep $ARCH)
|
||||||
|
|
||||||
|
FILES="
|
||||||
|
$ARCH-linux-musl/bin/ld.bfd
|
||||||
|
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/collect2
|
||||||
|
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/cc1
|
||||||
|
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/cc1plus
|
||||||
|
bin/$ARCH-linux-musl-elfedit
|
||||||
|
bin/$ARCH-linux-musl-nm
|
||||||
|
bin/$ARCH-linux-musl-objcopy
|
||||||
|
bin/$ARCH-linux-musl-gcc
|
||||||
|
bin/$ARCH-linux-musl-c++filt
|
||||||
|
bin/$ARCH-linux-musl-gcc-ranlib
|
||||||
|
bin/$ARCH-linux-musl-addr2line
|
||||||
|
bin/$ARCH-linux-musl-objdump
|
||||||
|
bin/$ARCH-linux-musl-gcov
|
||||||
|
bin/$ARCH-linux-musl-ranlib
|
||||||
|
bin/$ARCH-linux-musl-gcc-nm
|
||||||
|
bin/$ARCH-linux-musl-strip
|
||||||
|
bin/$ARCH-linux-musl-gcov-tool
|
||||||
|
bin/$ARCH-linux-musl-gprof
|
||||||
|
bin/$ARCH-linux-musl-strings
|
||||||
|
bin/$ARCH-linux-musl-gcov-dump
|
||||||
|
bin/$ARCH-linux-musl-cpp
|
||||||
|
bin/$ARCH-linux-musl-ar
|
||||||
|
bin/$ARCH-linux-musl-readelf
|
||||||
|
bin/$ARCH-linux-musl-size
|
||||||
|
bin/$ARCH-linux-musl-as
|
||||||
|
bin/$ARCH-linux-musl-g++
|
||||||
|
bin/$ARCH-linux-musl-gcc-ar
|
||||||
|
"
|
||||||
|
|
||||||
|
for f in $FILES; do
|
||||||
|
gzip -9 <$IMPORT/$f >$PREFIX/$f.gz || exit
|
||||||
|
done
|
BIN
third_party/gcc/x86_64-linux-musl/bin/ld.bfd.gz
vendored
BIN
third_party/gcc/x86_64-linux-musl/bin/ld.bfd.gz
vendored
Binary file not shown.
62
third_party/intel/adxintrin.internal.h
vendored
62
third_party/intel/adxintrin.internal.h
vendored
|
@ -1,43 +1,53 @@
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <adxintrin.h> directly; include <immintrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <adxintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _ADXINTRIN_H_INCLUDED
|
#ifndef _ADXINTRIN_H_INCLUDED
|
||||||
#define _ADXINTRIN_H_INCLUDED
|
#define _ADXINTRIN_H_INCLUDED
|
||||||
|
extern __inline unsigned char
|
||||||
__funline unsigned char _subborrow_u32(unsigned char __CF, unsigned int __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned int __Y, unsigned int *__P) {
|
_subborrow_u32 (unsigned char __CF, unsigned int __X,
|
||||||
|
unsigned int __Y, unsigned int *__P)
|
||||||
|
{
|
||||||
return __builtin_ia32_sbb_u32 (__CF, __X, __Y, __P);
|
return __builtin_ia32_sbb_u32 (__CF, __X, __Y, __P);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned char
|
||||||
__funline unsigned char _addcarry_u32(unsigned char __CF, unsigned int __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned int __Y, unsigned int *__P) {
|
_addcarry_u32 (unsigned char __CF, unsigned int __X,
|
||||||
|
unsigned int __Y, unsigned int *__P)
|
||||||
|
{
|
||||||
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned char
|
||||||
__funline unsigned char _addcarryx_u32(unsigned char __CF, unsigned int __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned int __Y, unsigned int *__P) {
|
_addcarryx_u32 (unsigned char __CF, unsigned int __X,
|
||||||
|
unsigned int __Y, unsigned int *__P)
|
||||||
|
{
|
||||||
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__funline unsigned char _subborrow_u64(unsigned char __CF, unsigned long long __X,
|
extern __inline unsigned char
|
||||||
unsigned long long __Y,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned long long *__P) {
|
_subborrow_u64 (unsigned char __CF, unsigned long long __X,
|
||||||
|
unsigned long long __Y, unsigned long long *__P)
|
||||||
|
{
|
||||||
return __builtin_ia32_sbb_u64 (__CF, __X, __Y, __P);
|
return __builtin_ia32_sbb_u64 (__CF, __X, __Y, __P);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned char
|
||||||
__funline unsigned char _addcarry_u64(unsigned char __CF, unsigned long long __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned long long __Y,
|
_addcarry_u64 (unsigned char __CF, unsigned long long __X,
|
||||||
unsigned long long *__P) {
|
unsigned long long __Y, unsigned long long *__P)
|
||||||
|
{
|
||||||
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned char
|
||||||
__funline unsigned char _addcarryx_u64(unsigned char __CF, unsigned long long __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned long long __Y,
|
_addcarryx_u64 (unsigned char __CF, unsigned long long __X,
|
||||||
unsigned long long *__P) {
|
unsigned long long __Y, unsigned long long *__P)
|
||||||
|
{
|
||||||
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _ADXINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
56
third_party/intel/ammintrin.internal.h
vendored
56
third_party/intel/ammintrin.internal.h
vendored
|
@ -1,58 +1,54 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _AMMINTRIN_H_INCLUDED
|
#ifndef _AMMINTRIN_H_INCLUDED
|
||||||
#define _AMMINTRIN_H_INCLUDED
|
#define _AMMINTRIN_H_INCLUDED
|
||||||
#ifdef __x86_64__
|
|
||||||
#include "third_party/intel/pmmintrin.internal.h"
|
#include "third_party/intel/pmmintrin.internal.h"
|
||||||
|
|
||||||
#ifndef __SSE4A__
|
#ifndef __SSE4A__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("sse4a")
|
#pragma GCC target("sse4a")
|
||||||
#define __DISABLE_SSE4A__
|
#define __DISABLE_SSE4A__
|
||||||
#endif /* __SSE4A__ */
|
#endif
|
||||||
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void _mm_stream_sd(double* __P, __m128d __Y) {
|
_mm_stream_sd (double * __P, __m128d __Y)
|
||||||
|
{
|
||||||
__builtin_ia32_movntsd (__P, (__v2df) __Y);
|
__builtin_ia32_movntsd (__P, (__v2df) __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void _mm_stream_ss(float* __P, __m128 __Y) {
|
_mm_stream_ss (float * __P, __m128 __Y)
|
||||||
|
{
|
||||||
__builtin_ia32_movntss (__P, (__v4sf) __Y);
|
__builtin_ia32_movntss (__P, (__v4sf) __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m128i _mm_extract_si64(__m128i __X, __m128i __Y) {
|
_mm_extract_si64 (__m128i __X, __m128i __Y)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
|
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m128i _mm_extracti_si64(__m128i __X, unsigned const int __I,
|
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned const int __L) {
|
_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
|
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _mm_extracti_si64(X, I, L) \
|
#define _mm_extracti_si64(X, I, L) ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), (unsigned int)(I), (unsigned int)(L)))
|
||||||
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(X), (unsigned int)(I), \
|
|
||||||
(unsigned int)(L)))
|
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m128i _mm_insert_si64(__m128i __X, __m128i __Y) {
|
_mm_insert_si64 (__m128i __X,__m128i __Y)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
|
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m128i _mm_inserti_si64(__m128i __X, __m128i __Y,
|
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned const int __I,
|
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
|
||||||
unsigned const int __L) {
|
{
|
||||||
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
|
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _mm_inserti_si64(X, Y, I, L) \
|
#define _mm_inserti_si64(X, Y, I, L) ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), (__v2di)(__m128i)(Y), (unsigned int)(I), (unsigned int)(L)))
|
||||||
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(X), \
|
|
||||||
(__v2di)(__m128i)(Y), (unsigned int)(I), \
|
|
||||||
(unsigned int)(L)))
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_SSE4A__
|
#ifdef __DISABLE_SSE4A__
|
||||||
#undef __DISABLE_SSE4A__
|
#undef __DISABLE_SSE4A__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_SSE4A__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* __x86_64__ */
|
#endif
|
||||||
#endif /* _AMMINTRIN_H_INCLUDED */
|
|
||||||
|
|
22
third_party/intel/amxbf16intrin.internal.h
vendored
Normal file
22
third_party/intel/amxbf16intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
|
#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _AMXBF16INTRIN_H_INCLUDED
|
||||||
|
#define _AMXBF16INTRIN_H_INCLUDED
|
||||||
|
#if !defined(__AMX_BF16__)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("amx-bf16")
|
||||||
|
#define __DISABLE_AMX_BF16__
|
||||||
|
#endif
|
||||||
|
#if defined(__x86_64__) && defined(__AMX_BF16__)
|
||||||
|
#define _tile_dpbf16ps_internal(dst,src1,src2) __asm__ volatile ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||||
|
#define _tile_dpbf16ps(dst,src1,src2) _tile_dpbf16ps_internal (dst, src1, src2)
|
||||||
|
#endif
|
||||||
|
#ifdef __DISABLE_AMX_BF16__
|
||||||
|
#undef __DISABLE_AMX_BF16__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
25
third_party/intel/amxint8intrin.internal.h
vendored
Normal file
25
third_party/intel/amxint8intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
|
#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _AMXINT8INTRIN_H_INCLUDED
|
||||||
|
#define _AMXINT8INTRIN_H_INCLUDED
|
||||||
|
#if !defined(__AMX_INT8__)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("amx-int8")
|
||||||
|
#define __DISABLE_AMX_INT8__
|
||||||
|
#endif
|
||||||
|
#if defined(__x86_64__) && defined(__AMX_INT8__)
|
||||||
|
#define _tile_int8_dp_internal(name,dst,src1,src2) __asm__ volatile ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||||
|
#define _tile_dpbssd(dst,src1,src2) _tile_int8_dp_internal (tdpbssd, dst, src1, src2)
|
||||||
|
#define _tile_dpbsud(dst,src1,src2) _tile_int8_dp_internal (tdpbsud, dst, src1, src2)
|
||||||
|
#define _tile_dpbusd(dst,src1,src2) _tile_int8_dp_internal (tdpbusd, dst, src1, src2)
|
||||||
|
#define _tile_dpbuud(dst,src1,src2) _tile_int8_dp_internal (tdpbuud, dst, src1, src2)
|
||||||
|
#endif
|
||||||
|
#ifdef __DISABLE_AMX_INT8__
|
||||||
|
#undef __DISABLE_AMX_INT8__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
46
third_party/intel/amxtileintrin.internal.h
vendored
Normal file
46
third_party/intel/amxtileintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
|
#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _AMXTILEINTRIN_H_INCLUDED
|
||||||
|
#define _AMXTILEINTRIN_H_INCLUDED
|
||||||
|
#if !defined(__AMX_TILE__)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("amx-tile")
|
||||||
|
#define __DISABLE_AMX_TILE__
|
||||||
|
#endif
|
||||||
|
#if defined(__x86_64__) && defined(__AMX_TILE__)
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_tile_loadconfig (const void *__config)
|
||||||
|
{
|
||||||
|
__asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_tile_storeconfig (void *__config)
|
||||||
|
{
|
||||||
|
__asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_tile_release (void)
|
||||||
|
{
|
||||||
|
__asm__ volatile ("tilerelease" ::);
|
||||||
|
}
|
||||||
|
#define _tile_loadd(dst,base,stride) _tile_loadd_internal (dst, base, stride)
|
||||||
|
#define _tile_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
|
||||||
|
#define _tile_stream_loadd(dst,base,stride) _tile_stream_loadd_internal (dst, base, stride)
|
||||||
|
#define _tile_stream_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
|
||||||
|
#define _tile_stored(dst,base,stride) _tile_stored_internal (dst, base, stride)
|
||||||
|
#define _tile_stored_internal(src,base,stride) __asm__ volatile ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" :: "r" ((void*) base), "r" ((long) stride) : "memory")
|
||||||
|
#define _tile_zero(dst) _tile_zero_internal (dst)
|
||||||
|
#define _tile_zero_internal(dst) __asm__ volatile ("tilezero\t%%tmm"#dst ::)
|
||||||
|
#endif
|
||||||
|
#ifdef __DISABLE_AMX_TILE__
|
||||||
|
#undef __DISABLE_AMX_TILE__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
1760
third_party/intel/avx2intrin.internal.h
vendored
1760
third_party/intel/avx2intrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
254
third_party/intel/avx5124fmapsintrin.internal.h
vendored
254
third_party/intel/avx5124fmapsintrin.internal.h
vendored
|
@ -1,112 +1,180 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
# error "Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
|
||||||
"Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX5124FMAPSINTRIN_H_INCLUDED
|
#ifndef _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||||
#define _AVX5124FMAPSINTRIN_H_INCLUDED
|
#define _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX5124FMAPS__
|
#ifndef __AVX5124FMAPS__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx5124fmaps")
|
#pragma GCC target("avx5124fmaps")
|
||||||
#define __DISABLE_AVX5124FMAPS__
|
#define __DISABLE_AVX5124FMAPS__
|
||||||
#endif /* __AVX5124FMAPS__ */
|
#endif
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_4fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512 __E, __m128 *__F) {
|
_mm512_4fmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||||
return (__m512)__builtin_ia32_4fmaddps((__v16sf)__B, (__v16sf)__C,
|
__m512 __D, __m512 __E, __m128 *__F)
|
||||||
(__v16sf)__D, (__v16sf)__E,
|
{
|
||||||
(__v16sf)__A, (const __v4sf *)__F);
|
return (__m512) __builtin_ia32_4fmaddps ((__v16sf) __B,
|
||||||
}
|
(__v16sf) __C,
|
||||||
|
(__v16sf) __D,
|
||||||
__funline __m512 _mm512_mask_4fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
|
(__v16sf) __E,
|
||||||
__m512 __C, __m512 __D, __m512 __E,
|
(__v16sf) __A,
|
||||||
__m128 *__F) {
|
|
||||||
return (__m512)__builtin_ia32_4fmaddps_mask(
|
|
||||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
|
||||||
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512 _mm512_maskz_4fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
|
|
||||||
__m512 __C, __m512 __D, __m512 __E,
|
|
||||||
__m128 *__F) {
|
|
||||||
return (__m512)__builtin_ia32_4fmaddps_mask(
|
|
||||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
|
||||||
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_4fmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
|
|
||||||
__m128 __E, __m128 *__F) {
|
|
||||||
return (__m128)__builtin_ia32_4fmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
|
|
||||||
(__v4sf)__E, (__v4sf)__A,
|
|
||||||
(const __v4sf *) __F);
|
(const __v4sf *) __F);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m128 _mm_mask_4fmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128 __C, __m128 __D, __m128 __E,
|
_mm512_mask_4fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||||
__m128 *__F) {
|
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||||
return (__m128)__builtin_ia32_4fmaddss_mask(
|
{
|
||||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||||
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
|
(__v16sf) __C,
|
||||||
|
(__v16sf) __D,
|
||||||
|
(__v16sf) __E,
|
||||||
|
(__v16sf) __A,
|
||||||
|
(const __v4sf *) __F,
|
||||||
|
(__v16sf) __A,
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m128 _mm_maskz_4fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128 __C, __m128 __D, __m128 __E,
|
_mm512_maskz_4fmadd_ps (__mmask16 __U,
|
||||||
__m128 *__F) {
|
__m512 __A, __m512 __B, __m512 __C,
|
||||||
return (__m128)__builtin_ia32_4fmaddss_mask(
|
__m512 __D, __m512 __E, __m128 *__F)
|
||||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
{
|
||||||
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||||
|
(__v16sf) __C,
|
||||||
|
(__v16sf) __D,
|
||||||
|
(__v16sf) __E,
|
||||||
|
(__v16sf) __A,
|
||||||
|
(const __v4sf *) __F,
|
||||||
|
(__v16sf) _mm512_setzero_ps (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m512 _mm512_4fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512 __E, __m128 *__F) {
|
_mm_4fmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||||
return (__m512)__builtin_ia32_4fnmaddps((__v16sf)__B, (__v16sf)__C,
|
__m128 __D, __m128 __E, __m128 *__F)
|
||||||
(__v16sf)__D, (__v16sf)__E,
|
{
|
||||||
(__v16sf)__A, (const __v4sf *)__F);
|
return (__m128) __builtin_ia32_4fmaddss ((__v4sf) __B,
|
||||||
}
|
(__v4sf) __C,
|
||||||
|
(__v4sf) __D,
|
||||||
__funline __m512 _mm512_mask_4fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
|
(__v4sf) __E,
|
||||||
__m512 __C, __m512 __D, __m512 __E,
|
(__v4sf) __A,
|
||||||
__m128 *__F) {
|
|
||||||
return (__m512)__builtin_ia32_4fnmaddps_mask(
|
|
||||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
|
||||||
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512 _mm512_maskz_4fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
|
|
||||||
__m512 __C, __m512 __D, __m512 __E,
|
|
||||||
__m128 *__F) {
|
|
||||||
return (__m512)__builtin_ia32_4fnmaddps_mask(
|
|
||||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
|
||||||
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_4fnmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
|
|
||||||
__m128 __E, __m128 *__F) {
|
|
||||||
return (__m128)__builtin_ia32_4fnmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
|
|
||||||
(__v4sf)__E, (__v4sf)__A,
|
|
||||||
(const __v4sf *) __F);
|
(const __v4sf *) __F);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_mask_4fnmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128 __C, __m128 __D, __m128 __E,
|
_mm_mask_4fmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||||
__m128 *__F) {
|
__m128 __D, __m128 __E, __m128 *__F)
|
||||||
return (__m128)__builtin_ia32_4fnmaddss_mask(
|
{
|
||||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||||
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
|
(__v4sf) __C,
|
||||||
|
(__v4sf) __D,
|
||||||
|
(__v4sf) __E,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(const __v4sf *) __F,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_maskz_4fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128 __C, __m128 __D, __m128 __E,
|
_mm_maskz_4fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||||
__m128 *__F) {
|
__m128 __D, __m128 __E, __m128 *__F)
|
||||||
return (__m128)__builtin_ia32_4fnmaddss_mask(
|
{
|
||||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||||
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
(__v4sf) __C,
|
||||||
|
(__v4sf) __D,
|
||||||
|
(__v4sf) __E,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(const __v4sf *) __F,
|
||||||
|
(__v4sf) _mm_setzero_ps (),
|
||||||
|
(__mmask8) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m512
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_4fnmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||||
|
__m512 __D, __m512 __E, __m128 *__F)
|
||||||
|
{
|
||||||
|
return (__m512) __builtin_ia32_4fnmaddps ((__v16sf) __B,
|
||||||
|
(__v16sf) __C,
|
||||||
|
(__v16sf) __D,
|
||||||
|
(__v16sf) __E,
|
||||||
|
(__v16sf) __A,
|
||||||
|
(const __v4sf *) __F);
|
||||||
|
}
|
||||||
|
extern __inline __m512
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_4fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||||
|
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||||
|
{
|
||||||
|
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||||
|
(__v16sf) __C,
|
||||||
|
(__v16sf) __D,
|
||||||
|
(__v16sf) __E,
|
||||||
|
(__v16sf) __A,
|
||||||
|
(const __v4sf *) __F,
|
||||||
|
(__v16sf) __A,
|
||||||
|
(__mmask16) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m512
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_maskz_4fnmadd_ps (__mmask16 __U,
|
||||||
|
__m512 __A, __m512 __B, __m512 __C,
|
||||||
|
__m512 __D, __m512 __E, __m128 *__F)
|
||||||
|
{
|
||||||
|
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||||
|
(__v16sf) __C,
|
||||||
|
(__v16sf) __D,
|
||||||
|
(__v16sf) __E,
|
||||||
|
(__v16sf) __A,
|
||||||
|
(const __v4sf *) __F,
|
||||||
|
(__v16sf) _mm512_setzero_ps (),
|
||||||
|
(__mmask16) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_4fnmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||||
|
__m128 __D, __m128 __E, __m128 *__F)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_4fnmaddss ((__v4sf) __B,
|
||||||
|
(__v4sf) __C,
|
||||||
|
(__v4sf) __D,
|
||||||
|
(__v4sf) __E,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(const __v4sf *) __F);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_4fnmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||||
|
__m128 __D, __m128 __E, __m128 *__F)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||||
|
(__v4sf) __C,
|
||||||
|
(__v4sf) __D,
|
||||||
|
(__v4sf) __E,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(const __v4sf *) __F,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(__mmask8) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_4fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||||
|
__m128 __D, __m128 __E, __m128 *__F)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||||
|
(__v4sf) __C,
|
||||||
|
(__v4sf) __D,
|
||||||
|
(__v4sf) __E,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(const __v4sf *) __F,
|
||||||
|
(__v4sf) _mm_setzero_ps (),
|
||||||
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX5124FMAPS__
|
#ifdef __DISABLE_AVX5124FMAPS__
|
||||||
#undef __DISABLE_AVX5124FMAPS__
|
#undef __DISABLE_AVX5124FMAPS__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX5124FMAPS__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX5124FMAPSINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
137
third_party/intel/avx5124vnniwintrin.internal.h
vendored
137
third_party/intel/avx5124vnniwintrin.internal.h
vendored
|
@ -1,69 +1,102 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
# error "Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
|
||||||
"Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX5124VNNIWINTRIN_H_INCLUDED
|
#ifndef _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||||
#define _AVX5124VNNIWINTRIN_H_INCLUDED
|
#define _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX5124VNNIW__
|
#ifndef __AVX5124VNNIW__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx5124vnniw")
|
#pragma GCC target("avx5124vnniw")
|
||||||
#define __DISABLE_AVX5124VNNIW__
|
#define __DISABLE_AVX5124VNNIW__
|
||||||
#endif /* __AVX5124VNNIW__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_4dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D, __m512i __E, __m128i *__F) {
|
_mm512_4dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vp4dpwssd((__v16si)__B, (__v16si)__C,
|
__m512i __D, __m512i __E, __m128i *__F)
|
||||||
(__v16si)__D, (__v16si)__E,
|
{
|
||||||
(__v16si)__A, (const __v4si *)__F);
|
return (__m512i) __builtin_ia32_vp4dpwssd ((__v16si) __B,
|
||||||
|
(__v16si) __C,
|
||||||
|
(__v16si) __D,
|
||||||
|
(__v16si) __E,
|
||||||
|
(__v16si) __A,
|
||||||
|
(const __v4si *) __F);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_4dpwssd_epi32(__m512i __A, __mmask16 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B, __m512i __C, __m512i __D,
|
_mm512_mask_4dpwssd_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||||
__m512i __E, __m128i *__F) {
|
__m512i __C, __m512i __D, __m512i __E,
|
||||||
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
|
__m128i *__F)
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
{
|
||||||
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
|
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||||
|
(__v16si) __C,
|
||||||
|
(__v16si) __D,
|
||||||
|
(__v16si) __E,
|
||||||
|
(__v16si) __A,
|
||||||
|
(const __v4si *) __F,
|
||||||
|
(__v16si) __A,
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_4dpwssd_epi32(__mmask16 __U, __m512i __A,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B, __m512i __C,
|
_mm512_maskz_4dpwssd_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||||
__m512i __D, __m512i __E,
|
__m512i __C, __m512i __D, __m512i __E,
|
||||||
__m128i *__F) {
|
__m128i *__F)
|
||||||
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
|
{
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||||
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U);
|
(__v16si) __C,
|
||||||
|
(__v16si) __D,
|
||||||
|
(__v16si) __E,
|
||||||
|
(__v16si) __A,
|
||||||
|
(const __v4si *) __F,
|
||||||
|
(__v16si) _mm512_setzero_ps (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_4dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D, __m512i __E, __m128i *__F) {
|
_mm512_4dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vp4dpwssds((__v16si)__B, (__v16si)__C,
|
__m512i __D, __m512i __E, __m128i *__F)
|
||||||
(__v16si)__D, (__v16si)__E,
|
{
|
||||||
(__v16si)__A, (const __v4si *)__F);
|
return (__m512i) __builtin_ia32_vp4dpwssds ((__v16si) __B,
|
||||||
|
(__v16si) __C,
|
||||||
|
(__v16si) __D,
|
||||||
|
(__v16si) __E,
|
||||||
|
(__v16si) __A,
|
||||||
|
(const __v4si *) __F);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_4dpwssds_epi32(__m512i __A, __mmask16 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B, __m512i __C,
|
_mm512_mask_4dpwssds_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||||
__m512i __D, __m512i __E,
|
__m512i __C, __m512i __D, __m512i __E,
|
||||||
__m128i *__F) {
|
__m128i *__F)
|
||||||
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
|
{
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||||
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
|
(__v16si) __C,
|
||||||
|
(__v16si) __D,
|
||||||
|
(__v16si) __E,
|
||||||
|
(__v16si) __A,
|
||||||
|
(const __v4si *) __F,
|
||||||
|
(__v16si) __A,
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_4dpwssds_epi32(__mmask16 __U, __m512i __A,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B, __m512i __C,
|
_mm512_maskz_4dpwssds_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||||
__m512i __D, __m512i __E,
|
__m512i __C, __m512i __D, __m512i __E,
|
||||||
__m128i *__F) {
|
__m128i *__F)
|
||||||
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
|
{
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||||
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U);
|
(__v16si) __C,
|
||||||
|
(__v16si) __D,
|
||||||
|
(__v16si) __E,
|
||||||
|
(__v16si) __A,
|
||||||
|
(const __v4si *) __F,
|
||||||
|
(__v16si) _mm512_setzero_ps (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX5124VNNIW__
|
#ifdef __DISABLE_AVX5124VNNIW__
|
||||||
#undef __DISABLE_AVX5124VNNIW__
|
#undef __DISABLE_AVX5124VNNIW__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX5124VNNIW__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX5124VNNIWINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
74
third_party/intel/avx512bf16intrin.internal.h
vendored
Normal file
74
third_party/intel/avx512bf16intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
|
#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _AVX512BF16INTRIN_H_INCLUDED
|
||||||
|
#define _AVX512BF16INTRIN_H_INCLUDED
|
||||||
|
#ifndef __AVX512BF16__
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("avx512bf16")
|
||||||
|
#define __DISABLE_AVX512BF16__
|
||||||
|
#endif
|
||||||
|
typedef short __v32bh __attribute__ ((__vector_size__ (64)));
|
||||||
|
typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||||
|
extern __inline __m512bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
|
||||||
|
{
|
||||||
|
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m512bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
|
||||||
|
{
|
||||||
|
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m512bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
|
||||||
|
{
|
||||||
|
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
|
||||||
|
}
|
||||||
|
extern __inline __m256bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_cvtneps_pbh (__m512 __A)
|
||||||
|
{
|
||||||
|
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
|
||||||
|
}
|
||||||
|
extern __inline __m256bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
|
||||||
|
{
|
||||||
|
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m256bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
|
||||||
|
{
|
||||||
|
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
|
||||||
|
}
|
||||||
|
extern __inline __m512
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
|
||||||
|
{
|
||||||
|
return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
|
||||||
|
}
|
||||||
|
extern __inline __m512
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
|
||||||
|
{
|
||||||
|
return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m512
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
|
||||||
|
{
|
||||||
|
return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_AVX512BF16__
|
||||||
|
#undef __DISABLE_AVX512BF16__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
130
third_party/intel/avx512bf16vlintrin.internal.h
vendored
Normal file
130
third_party/intel/avx512bf16vlintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,130 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
|
#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
|
||||||
|
#define _AVX512BF16VLINTRIN_H_INCLUDED
|
||||||
|
#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("avx512bf16,avx512vl")
|
||||||
|
#define __DISABLE_AVX512BF16VL__
|
||||||
|
#endif
|
||||||
|
typedef short __v16bh __attribute__ ((__vector_size__ (32)));
|
||||||
|
typedef short __v8bh __attribute__ ((__vector_size__ (16)));
|
||||||
|
typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
|
||||||
|
typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||||
|
extern __inline __m256bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
|
||||||
|
{
|
||||||
|
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m256bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
|
||||||
|
{
|
||||||
|
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m256bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_cvtneps_pbh (__m256 __A)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_cvtneps_pbh (__m128 __A)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m128bh
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
|
||||||
|
{
|
||||||
|
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
|
||||||
|
}
|
||||||
|
extern __inline __m256
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
|
||||||
|
{
|
||||||
|
return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
|
||||||
|
}
|
||||||
|
extern __inline __m256
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
|
||||||
|
{
|
||||||
|
return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m256
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
|
||||||
|
{
|
||||||
|
return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
|
||||||
|
{
|
||||||
|
return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
|
||||||
|
{
|
||||||
|
return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
|
||||||
|
{
|
||||||
|
return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_AVX512BF16VL__
|
||||||
|
#undef __DISABLE_AVX512BF16VL__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
279
third_party/intel/avx512bitalgintrin.internal.h
vendored
279
third_party/intel/avx512bitalgintrin.internal.h
vendored
|
@ -1,172 +1,231 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
|
||||||
"Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512BITALGINTRIN_H_INCLUDED
|
#ifndef _AVX512BITALGINTRIN_H_INCLUDED
|
||||||
#define _AVX512BITALGINTRIN_H_INCLUDED
|
#define _AVX512BITALGINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX512BITALG__
|
#ifndef __AVX512BITALG__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512bitalg")
|
#pragma GCC target("avx512bitalg")
|
||||||
#define __DISABLE_AVX512BITALG__
|
#define __DISABLE_AVX512BITALG__
|
||||||
#endif /* __AVX512BITALG__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_popcnt_epi8(__m512i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_popcnt_epi8 (__m512i __A)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
|
return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_popcnt_epi16(__m512i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_popcnt_epi16 (__m512i __A)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
|
return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512BITALG__
|
#ifdef __DISABLE_AVX512BITALG__
|
||||||
#undef __DISABLE_AVX512BITALG__
|
#undef __DISABLE_AVX512BITALG__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512BITALG__ */
|
#endif
|
||||||
|
|
||||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
|
#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512bitalg,avx512bw")
|
#pragma GCC target("avx512bitalg,avx512bw")
|
||||||
#define __DISABLE_AVX512BITALGBW__
|
#define __DISABLE_AVX512BITALGBW__
|
||||||
#endif /* __AVX512VLBW__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B) {
|
_mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
|
||||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
|
{
|
||||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
|
||||||
|
(__v64qi) __W,
|
||||||
|
(__mmask64) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
|
_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A)
|
||||||
(__v64qi)__A, (__v64qi)_mm512_setzero_si512(), (__mmask64)__U);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
|
||||||
|
(__v64qi)
|
||||||
|
_mm512_setzero_si512 (),
|
||||||
|
(__mmask64) __U);
|
||||||
}
|
}
|
||||||
__funline __m512i _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U,
|
extern __inline __m512i
|
||||||
__m512i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
|
_mm512_mask_popcnt_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
|
||||||
(__v32hi)__A, (__v32hi)__B, (__mmask32)__U);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
|
||||||
|
(__v32hi) __W,
|
||||||
|
(__mmask32) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
|
_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A)
|
||||||
(__v32hi)__A, (__v32hi)_mm512_setzero_si512(), (__mmask32)__U);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
|
||||||
|
(__v32hi)
|
||||||
|
_mm512_setzero_si512 (),
|
||||||
|
(__mmask32) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __mmask64
|
||||||
__funline __mmask64 _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
|
_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
|
||||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
|
{
|
||||||
|
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
|
||||||
|
(__v64qi) __B,
|
||||||
|
(__mmask64) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __mmask64
|
||||||
__funline __mmask64 _mm512_mask_bitshuffle_epi64_mask(__mmask64 __M, __m512i __A,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B) {
|
_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
|
||||||
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
|
{
|
||||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__M);
|
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
|
||||||
|
(__v64qi) __B,
|
||||||
|
(__mmask64) __M);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512BITALGBW__
|
#ifdef __DISABLE_AVX512BITALGBW__
|
||||||
#undef __DISABLE_AVX512BITALGBW__
|
#undef __DISABLE_AVX512BITALGBW__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512BITALGBW__ */
|
#endif
|
||||||
|
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
||||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || \
|
|
||||||
!defined(__AVX512BW__)
|
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512bitalg,avx512vl,avx512bw")
|
#pragma GCC target("avx512bitalg,avx512vl,avx512bw")
|
||||||
#define __DISABLE_AVX512BITALGVLBW__
|
#define __DISABLE_AVX512BITALGVLBW__
|
||||||
#endif /* __AVX512VLBW__ */
|
#endif
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __B) {
|
_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
|
||||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
|
{
|
||||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
|
||||||
|
(__v32qi) __W,
|
||||||
|
(__mmask32) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
|
_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
|
||||||
(__v32qi)__A, (__v32qi)_mm256_setzero_si256(), (__mmask32)__U);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
|
||||||
|
(__v32qi)
|
||||||
|
_mm256_setzero_si256 (),
|
||||||
|
(__mmask32) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __mmask32
|
||||||
__funline __mmask32 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
|
_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
|
||||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
|
{
|
||||||
|
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
|
||||||
|
(__v32qi) __B,
|
||||||
|
(__mmask32) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __mmask32
|
||||||
__funline __mmask32 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __M, __m256i __A,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __B) {
|
_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
|
||||||
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
|
{
|
||||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__M);
|
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
|
||||||
|
(__v32qi) __B,
|
||||||
|
(__mmask32) __M);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512BITALGVLBW__
|
#ifdef __DISABLE_AVX512BITALGVLBW__
|
||||||
#undef __DISABLE_AVX512BITALGVLBW__
|
#undef __DISABLE_AVX512BITALGVLBW__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512BITALGVLBW__ */
|
#endif
|
||||||
|
|
||||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
|
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512bitalg,avx512vl")
|
#pragma GCC target("avx512bitalg,avx512vl")
|
||||||
#define __DISABLE_AVX512BITALGVL__
|
#define __DISABLE_AVX512BITALGVL__
|
||||||
#endif /* __AVX512VLBW__ */
|
#endif
|
||||||
|
extern __inline __mmask16
|
||||||
__funline __mmask16 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
|
_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
|
||||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
|
{
|
||||||
|
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
|
||||||
|
(__v16qi) __B,
|
||||||
|
(__mmask16) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __mmask16
|
||||||
__funline __mmask16 _mm_mask_bitshuffle_epi64_mask(__mmask16 __M, __m128i __A,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __B) {
|
_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
|
||||||
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
|
{
|
||||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__M);
|
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
|
||||||
|
(__v16qi) __B,
|
||||||
|
(__mmask16) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_popcnt_epi8(__m256i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_popcnt_epi8 (__m256i __A)
|
||||||
|
{
|
||||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
|
return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_popcnt_epi16(__m256i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_popcnt_epi16 (__m256i __A)
|
||||||
|
{
|
||||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
|
return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_popcnt_epi8(__m128i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_popcnt_epi8 (__m128i __A)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
|
return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_popcnt_epi16(__m128i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_popcnt_epi16 (__m128i __A)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
|
return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __B) {
|
_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
|
||||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
|
{
|
||||||
(__v16hi)__A, (__v16hi)__B, (__mmask16)__U);
|
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
|
||||||
|
(__v16hi) __W,
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
|
_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
|
||||||
(__v16hi)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
|
||||||
|
(__v16hi)
|
||||||
|
_mm256_setzero_si256 (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
|
_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
|
||||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
|
||||||
|
(__v16qi) __W,
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
|
_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
|
||||||
(__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__U);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
|
||||||
|
(__v16qi)
|
||||||
|
_mm_setzero_si128 (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
__funline __m128i _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
|
extern __inline __m128i
|
||||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask((__v8hi)__A, (__v8hi)__B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
|
||||||
|
(__v8hi) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask(
|
_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
|
||||||
(__v8hi)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
|
||||||
|
(__v8hi)
|
||||||
|
_mm_setzero_si128 (),
|
||||||
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
#ifdef __DISABLE_AVX512BITALGVL__
|
#ifdef __DISABLE_AVX512BITALGVL__
|
||||||
#undef __DISABLE_AVX512BITALGVL__
|
#undef __DISABLE_AVX512BITALGVL__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512BITALGBW__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512BITALGINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
4022
third_party/intel/avx512bwintrin.internal.h
vendored
4022
third_party/intel/avx512bwintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
188
third_party/intel/avx512cdintrin.internal.h
vendored
188
third_party/intel/avx512cdintrin.internal.h
vendored
|
@ -1,100 +1,140 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
|
#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512CDINTRIN_H_INCLUDED
|
#ifndef _AVX512CDINTRIN_H_INCLUDED
|
||||||
#define _AVX512CDINTRIN_H_INCLUDED
|
#define _AVX512CDINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX512CD__
|
#ifndef __AVX512CD__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512cd")
|
#pragma GCC target("avx512cd")
|
||||||
#define __DISABLE_AVX512CD__
|
#define __DISABLE_AVX512CD__
|
||||||
#endif /* __AVX512CD__ */
|
#endif
|
||||||
|
|
||||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||||
|
|
||||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||||
|
|
||||||
typedef unsigned char __mmask8;
|
typedef unsigned char __mmask8;
|
||||||
typedef unsigned short __mmask16;
|
typedef unsigned short __mmask16;
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_conflict_epi32(__m512i __A) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
_mm512_conflict_epi32 (__m512i __A)
|
||||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
|
{
|
||||||
|
return (__m512i)
|
||||||
|
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||||
|
(__v16si) _mm512_setzero_si512 (),
|
||||||
|
(__mmask16) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __A) {
|
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
{
|
||||||
(__v16si)__A, (__v16si)__W, (__mmask16)__U);
|
return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||||
}
|
(__v16si) __W,
|
||||||
|
|
||||||
__funline __m512i _mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
|
||||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_conflict_epi64(__m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
|
|
||||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U,
|
|
||||||
__m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask((__v8di)__A, (__v8di)__W,
|
|
||||||
(__mmask8)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
|
|
||||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_lzcnt_epi64(__m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
|
|
||||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U,
|
|
||||||
__m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask((__v8di)__A, (__v8di)__W,
|
|
||||||
(__mmask8)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
|
|
||||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_lzcnt_epi32(__m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
|
|
||||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U,
|
|
||||||
__m512i __A) {
|
|
||||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask((__v16si)__A, (__v16si)__W,
|
|
||||||
(__mmask16) __U);
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
|
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
|
||||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
{
|
||||||
|
return (__m512i)
|
||||||
|
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||||
|
(__v16si) _mm512_setzero_si512 (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_broadcastmb_epi64(__mmask8 __A) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_conflict_epi64 (__m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i)
|
||||||
|
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||||
|
(__v8di) _mm512_setzero_si512 (),
|
||||||
|
(__mmask8) -1);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||||
|
(__v8di) __W,
|
||||||
|
(__mmask8) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i)
|
||||||
|
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||||
|
(__v8di) _mm512_setzero_si512 (),
|
||||||
|
(__mmask8) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_lzcnt_epi64 (__m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i)
|
||||||
|
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||||
|
(__v8di) _mm512_setzero_si512 (),
|
||||||
|
(__mmask8) -1);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||||
|
(__v8di) __W,
|
||||||
|
(__mmask8) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i)
|
||||||
|
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||||
|
(__v8di) _mm512_setzero_si512 (),
|
||||||
|
(__mmask8) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_lzcnt_epi32 (__m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i)
|
||||||
|
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||||
|
(__v16si) _mm512_setzero_si512 (),
|
||||||
|
(__mmask16) -1);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||||
|
(__v16si) __W,
|
||||||
|
(__mmask16) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||||
|
{
|
||||||
|
return (__m512i)
|
||||||
|
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||||
|
(__v16si) _mm512_setzero_si512 (),
|
||||||
|
(__mmask16) __U);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_broadcastmb_epi64 (__mmask8 __A)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
|
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_broadcastmw_epi32(__mmask16 __A) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_broadcastmw_epi32 (__mmask16 __A)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
|
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512CD__
|
#ifdef __DISABLE_AVX512CD__
|
||||||
#undef __DISABLE_AVX512CD__
|
#undef __DISABLE_AVX512CD__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512CD__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512CDINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
3514
third_party/intel/avx512dqintrin.internal.h
vendored
3514
third_party/intel/avx512dqintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
504
third_party/intel/avx512erintrin.internal.h
vendored
504
third_party/intel/avx512erintrin.internal.h
vendored
|
@ -1,281 +1,357 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
|
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512ERINTRIN_H_INCLUDED
|
#ifndef _AVX512ERINTRIN_H_INCLUDED
|
||||||
#define _AVX512ERINTRIN_H_INCLUDED
|
#define _AVX512ERINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX512ER__
|
#ifndef __AVX512ER__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512er")
|
#pragma GCC target("avx512er")
|
||||||
#define __DISABLE_AVX512ER__
|
#define __DISABLE_AVX512ER__
|
||||||
#endif /* __AVX512ER__ */
|
#endif
|
||||||
|
|
||||||
typedef double __v8df __attribute__ ((__vector_size__ (64)));
|
typedef double __v8df __attribute__ ((__vector_size__ (64)));
|
||||||
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
|
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
|
||||||
|
|
||||||
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
|
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||||
|
|
||||||
typedef unsigned char __mmask8;
|
typedef unsigned char __mmask8;
|
||||||
typedef unsigned short __mmask16;
|
typedef unsigned short __mmask16;
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m512d _mm512_exp2a23_round_pd(__m512d __A, int __R) {
|
extern __inline __m512d
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_exp2a23_round_pd (__m512d __A, int __R)
|
||||||
|
{
|
||||||
__m512d __W;
|
__m512d __W;
|
||||||
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
|
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) __W,
|
||||||
(__mmask8) -1, __R);
|
(__mmask8) -1, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512d
|
||||||
__funline __m512d _mm512_mask_exp2a23_round_pd(__m512d __W, __mmask8 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512d __A, int __R) {
|
_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||||
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
|
{
|
||||||
|
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) __W,
|
||||||
(__mmask8) __U, __R);
|
(__mmask8) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512d
|
||||||
__funline __m512d _mm512_maskz_exp2a23_round_pd(__mmask8 __U, __m512d __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __R) {
|
_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||||
return (__m512d)__builtin_ia32_exp2pd_mask(
|
{
|
||||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) _mm512_setzero_pd (),
|
||||||
|
(__mmask8) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_exp2a23_round_ps(__m512 __A, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_exp2a23_round_ps (__m512 __A, int __R)
|
||||||
|
{
|
||||||
__m512 __W;
|
__m512 __W;
|
||||||
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
|
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) __W,
|
||||||
(__mmask16) -1, __R);
|
(__mmask16) -1, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_mask_exp2a23_round_ps(__m512 __W, __mmask16 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512 __A, int __R) {
|
_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||||
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
|
{
|
||||||
|
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) __W,
|
||||||
(__mmask16) __U, __R);
|
(__mmask16) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_maskz_exp2a23_round_ps(__mmask16 __U, __m512 __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __R) {
|
_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||||
return (__m512)__builtin_ia32_exp2ps_mask(
|
{
|
||||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) _mm512_setzero_ps (),
|
||||||
|
(__mmask16) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512d
|
||||||
__funline __m512d _mm512_rcp28_round_pd(__m512d __A, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_rcp28_round_pd (__m512d __A, int __R)
|
||||||
|
{
|
||||||
__m512d __W;
|
__m512d __W;
|
||||||
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
|
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) __W,
|
||||||
(__mmask8) -1, __R);
|
(__mmask8) -1, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512d
|
||||||
__funline __m512d _mm512_mask_rcp28_round_pd(__m512d __W, __mmask8 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512d __A, int __R) {
|
_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||||
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
|
{
|
||||||
|
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) __W,
|
||||||
(__mmask8) __U, __R);
|
(__mmask8) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512d
|
||||||
__funline __m512d _mm512_maskz_rcp28_round_pd(__mmask8 __U, __m512d __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __R) {
|
_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||||
return (__m512d)__builtin_ia32_rcp28pd_mask(
|
{
|
||||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) _mm512_setzero_pd (),
|
||||||
|
(__mmask8) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_rcp28_round_ps(__m512 __A, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_rcp28_round_ps (__m512 __A, int __R)
|
||||||
|
{
|
||||||
__m512 __W;
|
__m512 __W;
|
||||||
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
|
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) __W,
|
||||||
(__mmask16) -1, __R);
|
(__mmask16) -1, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_mask_rcp28_round_ps(__m512 __W, __mmask16 __U, __m512 __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __R) {
|
_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||||
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
|
{
|
||||||
|
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) __W,
|
||||||
(__mmask16) __U, __R);
|
(__mmask16) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_maskz_rcp28_round_ps(__mmask16 __U, __m512 __A, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512)__builtin_ia32_rcp28ps_mask(
|
_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
{
|
||||||
|
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) _mm512_setzero_ps (),
|
||||||
|
(__mmask16) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_rcp28_round_sd(__m128d __A, __m128d __B, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128d)__builtin_ia32_rcp28sd_round((__v2df)__B, (__v2df)__A, __R);
|
_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
|
||||||
|
(__v2df) __A,
|
||||||
|
__R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128 _mm_rcp28_round_ss(__m128 __A, __m128 __B, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128)__builtin_ia32_rcp28ss_round((__v4sf)__B, (__v4sf)__A, __R);
|
_mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||||
|
__m128d __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||||
|
(__v2df) __A,
|
||||||
|
(__v2df) __W,
|
||||||
|
__U,
|
||||||
|
__R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m512d _mm512_rsqrt28_round_pd(__m512d __A, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||||
|
(__v2df) __A,
|
||||||
|
(__v2df)
|
||||||
|
_mm_setzero_pd (),
|
||||||
|
__U,
|
||||||
|
__R);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
|
||||||
|
(__v4sf) __A,
|
||||||
|
__R);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||||
|
__m128 __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(__v4sf) __W,
|
||||||
|
__U,
|
||||||
|
__R);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(__v4sf)
|
||||||
|
_mm_setzero_ps (),
|
||||||
|
__U,
|
||||||
|
__R);
|
||||||
|
}
|
||||||
|
extern __inline __m512d
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
|
||||||
|
{
|
||||||
__m512d __W;
|
__m512d __W;
|
||||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
|
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) __W,
|
||||||
(__mmask8) -1, __R);
|
(__mmask8) -1, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512d
|
||||||
__funline __m512d _mm512_mask_rsqrt28_round_pd(__m512d __W, __mmask8 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512d __A, int __R) {
|
_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
|
{
|
||||||
|
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) __W,
|
||||||
(__mmask8) __U, __R);
|
(__mmask8) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512d
|
||||||
__funline __m512d _mm512_maskz_rsqrt28_round_pd(__mmask8 __U, __m512d __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __R) {
|
_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask(
|
{
|
||||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||||
|
(__v8df) _mm512_setzero_pd (),
|
||||||
|
(__mmask8) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_rsqrt28_round_ps(__m512 __A, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_rsqrt28_round_ps (__m512 __A, int __R)
|
||||||
|
{
|
||||||
__m512 __W;
|
__m512 __W;
|
||||||
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
|
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) __W,
|
||||||
(__mmask16) -1, __R);
|
(__mmask16) -1, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_mask_rsqrt28_round_ps(__m512 __W, __mmask16 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512 __A, int __R) {
|
_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||||
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
|
{
|
||||||
|
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) __W,
|
||||||
(__mmask16) __U, __R);
|
(__mmask16) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512
|
||||||
__funline __m512 _mm512_maskz_rsqrt28_round_ps(__mmask16 __U, __m512 __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __R) {
|
_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||||
return (__m512)__builtin_ia32_rsqrt28ps_mask(
|
{
|
||||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||||
|
(__v16sf) _mm512_setzero_ps (),
|
||||||
|
(__mmask16) __U, __R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_rsqrt28_round_sd(__m128d __A, __m128d __B, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)__B, (__v2df)__A, __R);
|
_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
|
||||||
|
(__v2df) __A,
|
||||||
|
__R);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128 _mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)__B, (__v4sf)__A, __R);
|
_mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||||
|
__m128d __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||||
|
(__v2df) __A,
|
||||||
|
(__v2df) __W,
|
||||||
|
__U,
|
||||||
|
__R);
|
||||||
|
}
|
||||||
|
extern __inline __m128d
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||||
|
(__v2df) __A,
|
||||||
|
(__v2df)
|
||||||
|
_mm_setzero_pd (),
|
||||||
|
__U,
|
||||||
|
__R);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
|
||||||
|
(__v4sf) __A,
|
||||||
|
__R);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||||
|
__m128 __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(__v4sf) __W,
|
||||||
|
__U,
|
||||||
|
__R);
|
||||||
|
}
|
||||||
|
extern __inline __m128
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||||
|
(__v4sf) __A,
|
||||||
|
(__v4sf)
|
||||||
|
_mm_setzero_ps (),
|
||||||
|
__U,
|
||||||
|
__R);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define _mm512_exp2a23_round_pd(A, C) \
|
#define _mm512_exp2a23_round_pd(A, C) __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) __builtin_ia32_exp2pd_mask(A, W, U, C)
|
||||||
|
#define _mm512_maskz_exp2a23_round_pd(U, A, C) __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||||
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
|
#define _mm512_exp2a23_round_ps(A, C) __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||||
__builtin_ia32_exp2pd_mask(A, W, U, C)
|
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) __builtin_ia32_exp2ps_mask(A, W, U, C)
|
||||||
|
#define _mm512_maskz_exp2a23_round_ps(U, A, C) __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||||
#define _mm512_maskz_exp2a23_round_pd(U, A, C) \
|
#define _mm512_rcp28_round_pd(A, C) __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
#define _mm512_mask_rcp28_round_pd(W, U, A, C) __builtin_ia32_rcp28pd_mask(A, W, U, C)
|
||||||
|
#define _mm512_maskz_rcp28_round_pd(U, A, C) __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||||
#define _mm512_exp2a23_round_ps(A, C) \
|
#define _mm512_rcp28_round_ps(A, C) __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
#define _mm512_mask_rcp28_round_ps(W, U, A, C) __builtin_ia32_rcp28ps_mask(A, W, U, C)
|
||||||
|
#define _mm512_maskz_rcp28_round_ps(U, A, C) __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||||
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
|
#define _mm512_rsqrt28_round_pd(A, C) __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||||
__builtin_ia32_exp2ps_mask(A, W, U, C)
|
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
|
||||||
|
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||||
#define _mm512_maskz_exp2a23_round_ps(U, A, C) \
|
#define _mm512_rsqrt28_round_ps(A, C) __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
|
||||||
|
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||||
#define _mm512_rcp28_round_pd(A, C) \
|
|
||||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
|
||||||
|
|
||||||
#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
|
|
||||||
__builtin_ia32_rcp28pd_mask(A, W, U, C)
|
|
||||||
|
|
||||||
#define _mm512_maskz_rcp28_round_pd(U, A, C) \
|
|
||||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
|
||||||
|
|
||||||
#define _mm512_rcp28_round_ps(A, C) \
|
|
||||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
|
||||||
|
|
||||||
#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
|
|
||||||
__builtin_ia32_rcp28ps_mask(A, W, U, C)
|
|
||||||
|
|
||||||
#define _mm512_maskz_rcp28_round_ps(U, A, C) \
|
|
||||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
|
||||||
|
|
||||||
#define _mm512_rsqrt28_round_pd(A, C) \
|
|
||||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
|
||||||
|
|
||||||
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
|
|
||||||
__builtin_ia32_rsqrt28pd_mask(A, W, U, C)
|
|
||||||
|
|
||||||
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
|
|
||||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
|
||||||
|
|
||||||
#define _mm512_rsqrt28_round_ps(A, C) \
|
|
||||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
|
||||||
|
|
||||||
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
|
|
||||||
__builtin_ia32_rsqrt28ps_mask(A, W, U, C)
|
|
||||||
|
|
||||||
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
|
|
||||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
|
||||||
|
|
||||||
#define _mm_rcp28_round_sd(A, B, R) __builtin_ia32_rcp28sd_round(A, B, R)
|
#define _mm_rcp28_round_sd(A, B, R) __builtin_ia32_rcp28sd_round(A, B, R)
|
||||||
|
#define _mm_mask_rcp28_round_sd(W, U, A, B, R) __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
|
||||||
|
#define _mm_maskz_rcp28_round_sd(U, A, B, R) __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (R))
|
||||||
#define _mm_rcp28_round_ss(A, B, R) __builtin_ia32_rcp28ss_round(A, B, R)
|
#define _mm_rcp28_round_ss(A, B, R) __builtin_ia32_rcp28ss_round(A, B, R)
|
||||||
|
#define _mm_mask_rcp28_round_ss(W, U, A, B, R) __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
|
||||||
|
#define _mm_maskz_rcp28_round_ss(U, A, B, R) __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (R))
|
||||||
#define _mm_rsqrt28_round_sd(A, B, R) __builtin_ia32_rsqrt28sd_round(A, B, R)
|
#define _mm_rsqrt28_round_sd(A, B, R) __builtin_ia32_rsqrt28sd_round(A, B, R)
|
||||||
|
#define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
|
||||||
|
#define _mm_maskz_rsqrt28_round_sd(U, A, B, R) __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (R))
|
||||||
#define _mm_rsqrt28_round_ss(A, B, R) __builtin_ia32_rsqrt28ss_round(A, B, R)
|
#define _mm_rsqrt28_round_ss(A, B, R) __builtin_ia32_rsqrt28ss_round(A, B, R)
|
||||||
|
#define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
|
||||||
|
#define _mm_maskz_rsqrt28_round_ss(U, A, B, R) __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (R))
|
||||||
#endif
|
#endif
|
||||||
|
#define _mm_mask_rcp28_sd(W, U, A, B) _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_exp2a23_pd(A) \
|
#define _mm_maskz_rcp28_sd(U, A, B) _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm_mask_rcp28_ss(W, U, A, B) _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm_maskz_rcp28_ss(U, A, B) _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_mask_exp2a23_pd(W, U, A) \
|
#define _mm_mask_rsqrt28_sd(W, U, A, B) _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm_maskz_rsqrt28_sd(U, A, B) _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm_mask_rsqrt28_ss(W, U, A, B) _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_maskz_exp2a23_pd(U, A) \
|
#define _mm_maskz_rsqrt28_ss(U, A, B) _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm512_exp2a23_pd(A) _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm512_mask_exp2a23_pd(W, U, A) _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_exp2a23_ps(A) \
|
#define _mm512_maskz_exp2a23_pd(U, A) _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm512_exp2a23_ps(A) _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm512_mask_exp2a23_ps(W, U, A) _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_mask_exp2a23_ps(W, U, A) \
|
#define _mm512_maskz_exp2a23_ps(U, A) _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm512_maskz_exp2a23_ps(U, A) \
|
|
||||||
_mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm512_rcp28_pd(A) _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm512_rcp28_pd(A) _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm512_mask_rcp28_pd(W, U, A) _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_mask_rcp28_pd(W, U, A) \
|
#define _mm512_maskz_rcp28_pd(U, A) _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm512_maskz_rcp28_pd(U, A) \
|
|
||||||
_mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm512_rcp28_ps(A) _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm512_rcp28_ps(A) _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm512_mask_rcp28_ps(W, U, A) _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_mask_rcp28_ps(W, U, A) \
|
#define _mm512_maskz_rcp28_ps(U, A) _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm512_rsqrt28_pd(A) _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm512_mask_rsqrt28_pd(W, U, A) _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_maskz_rcp28_ps(U, A) \
|
#define _mm512_maskz_rsqrt28_pd(U, A) _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm512_rsqrt28_ps(A) _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm512_mask_rsqrt28_ps(W, U, A) _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_rsqrt28_pd(A) \
|
#define _mm512_maskz_rsqrt28_ps(U, A) _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm_rcp28_sd(A, B) __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
|
#define _mm_rcp28_ss(A, B) __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
#define _mm512_mask_rsqrt28_pd(W, U, A) \
|
#define _mm_rsqrt28_sd(A, B) __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
_mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
#define _mm_rsqrt28_ss(A, B) __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||||
|
|
||||||
#define _mm512_maskz_rsqrt28_pd(U, A) \
|
|
||||||
_mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm512_rsqrt28_ps(A) \
|
|
||||||
_mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm512_mask_rsqrt28_ps(W, U, A) \
|
|
||||||
_mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm512_maskz_rsqrt28_ps(U, A) \
|
|
||||||
_mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm_rcp28_sd(A, B) \
|
|
||||||
__builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm_rcp28_ss(A, B) \
|
|
||||||
__builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm_rsqrt28_sd(A, B) \
|
|
||||||
__builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#define _mm_rsqrt28_ss(A, B) \
|
|
||||||
__builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512ER__
|
#ifdef __DISABLE_AVX512ER__
|
||||||
#undef __DISABLE_AVX512ER__
|
#undef __DISABLE_AVX512ER__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512ER__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512ERINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
19786
third_party/intel/avx512fintrin.internal.h
vendored
19786
third_party/intel/avx512fintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
91
third_party/intel/avx512ifmaintrin.internal.h
vendored
91
third_party/intel/avx512ifmaintrin.internal.h
vendored
|
@ -1,53 +1,74 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead."
|
#error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512IFMAINTRIN_H_INCLUDED
|
#ifndef _AVX512IFMAINTRIN_H_INCLUDED
|
||||||
#define _AVX512IFMAINTRIN_H_INCLUDED
|
#define _AVX512IFMAINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX512IFMA__
|
#ifndef __AVX512IFMA__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512ifma")
|
#pragma GCC target("avx512ifma")
|
||||||
#define __DISABLE_AVX512IFMA__
|
#define __DISABLE_AVX512IFMA__
|
||||||
#endif /* __AVX512IFMA__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpmadd52luq512_mask((__v8di)__X, (__v8di)__Y,
|
_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||||
(__v8di)__Z, (__mmask8)-1);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
|
||||||
|
(__v8di) __Y,
|
||||||
|
(__v8di) __Z,
|
||||||
|
(__mmask8) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpmadd52huq512_mask((__v8di)__X, (__v8di)__Y,
|
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||||
(__v8di)__Z, (__mmask8)-1);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
|
||||||
|
(__v8di) __Y,
|
||||||
|
(__v8di) __Z,
|
||||||
|
(__mmask8) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __X, __m512i __Y) {
|
_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||||
return (__m512i)__builtin_ia32_vpmadd52luq512_mask(
|
__m512i __Y)
|
||||||
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
|
||||||
|
(__v8di) __X,
|
||||||
|
(__v8di) __Y,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __X, __m512i __Y) {
|
_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||||
return (__m512i)__builtin_ia32_vpmadd52huq512_mask(
|
__m512i __Y)
|
||||||
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
|
||||||
|
(__v8di) __X,
|
||||||
|
(__v8di) __Y,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __Y, __m512i __Z) {
|
_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||||
return (__m512i)__builtin_ia32_vpmadd52luq512_maskz(
|
{
|
||||||
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
|
return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
|
||||||
|
(__v8di) __Y,
|
||||||
|
(__v8di) __Z,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __Y, __m512i __Z) {
|
_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||||
return (__m512i)__builtin_ia32_vpmadd52huq512_maskz(
|
{
|
||||||
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
|
return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
|
||||||
|
(__v8di) __Y,
|
||||||
|
(__v8di) __Z,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512IFMA__
|
#ifdef __DISABLE_AVX512IFMA__
|
||||||
#undef __DISABLE_AVX512IFMA__
|
#undef __DISABLE_AVX512IFMA__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512IFMA__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512IFMAINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
170
third_party/intel/avx512ifmavlintrin.internal.h
vendored
170
third_party/intel/avx512ifmavlintrin.internal.h
vendored
|
@ -1,88 +1,128 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
|
||||||
"Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
|
#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
|
||||||
#define _AVX512IFMAVLINTRIN_H_INCLUDED
|
#define _AVX512IFMAVLINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
|
#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512ifma,avx512vl")
|
#pragma GCC target("avx512ifma,avx512vl")
|
||||||
#define __DISABLE_AVX512IFMAVL__
|
#define __DISABLE_AVX512IFMAVL__
|
||||||
#endif /* __AVX512IFMAVL__ */
|
#endif
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpmadd52luq128_mask((__v2di)__X, (__v2di)__Y,
|
_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||||
(__v2di)__Z, (__mmask8)-1);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
|
||||||
|
(__v2di) __Y,
|
||||||
|
(__v2di) __Z,
|
||||||
|
(__mmask8) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpmadd52huq128_mask((__v2di)__X, (__v2di)__Y,
|
_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||||
(__v2di)__Z, (__mmask8)-1);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
|
||||||
|
(__v2di) __Y,
|
||||||
|
(__v2di) __Z,
|
||||||
|
(__mmask8) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpmadd52luq256_mask((__v4di)__X, (__v4di)__Y,
|
_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||||
(__v4di)__Z, (__mmask8)-1);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
|
||||||
|
(__v4di) __Y,
|
||||||
|
(__v4di) __Z,
|
||||||
|
(__mmask8) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpmadd52huq256_mask((__v4di)__X, (__v4di)__Y,
|
_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||||
(__v4di)__Z, (__mmask8)-1);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
|
||||||
|
(__v4di) __Y,
|
||||||
|
(__v4di) __Z,
|
||||||
|
(__mmask8) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __Y) {
|
_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||||
return (__m128i)__builtin_ia32_vpmadd52luq128_mask(
|
{
|
||||||
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
|
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
|
||||||
|
(__v2di) __X,
|
||||||
|
(__v2di) __Y,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __Y) {
|
_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||||
return (__m128i)__builtin_ia32_vpmadd52huq128_mask(
|
{
|
||||||
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
|
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
|
||||||
|
(__v2di) __X,
|
||||||
|
(__v2di) __Y,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __X, __m256i __Y) {
|
_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||||
return (__m256i)__builtin_ia32_vpmadd52luq256_mask(
|
__m256i __Y)
|
||||||
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
|
||||||
|
(__v4di) __X,
|
||||||
|
(__v4di) __Y,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __X, __m256i __Y) {
|
_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||||
return (__m256i)__builtin_ia32_vpmadd52huq256_mask(
|
__m256i __Y)
|
||||||
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
|
||||||
|
(__v4di) __X,
|
||||||
|
(__v4di) __Y,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __Z) {
|
_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||||
return (__m128i)__builtin_ia32_vpmadd52luq128_maskz(
|
{
|
||||||
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
|
return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
|
||||||
|
(__v2di) __Y,
|
||||||
|
(__v2di) __Z,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __Z) {
|
_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||||
return (__m128i)__builtin_ia32_vpmadd52huq128_maskz(
|
{
|
||||||
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
|
return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
|
||||||
|
(__v2di) __Y,
|
||||||
|
(__v2di) __Z,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __Y, __m256i __Z) {
|
_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||||
return (__m256i)__builtin_ia32_vpmadd52luq256_maskz(
|
{
|
||||||
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
|
return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
|
||||||
|
(__v4di) __Y,
|
||||||
|
(__v4di) __Z,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __Y, __m256i __Z) {
|
_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||||
return (__m256i)__builtin_ia32_vpmadd52huq256_maskz(
|
{
|
||||||
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
|
return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
|
||||||
|
(__v4di) __Y,
|
||||||
|
(__v4di) __Z,
|
||||||
|
(__mmask8) __M);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512IFMAVL__
|
#ifdef __DISABLE_AVX512IFMAVL__
|
||||||
#undef __DISABLE_AVX512IFMAVL__
|
#undef __DISABLE_AVX512IFMAVL__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512IFMAVL__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512IFMAVLINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
274
third_party/intel/avx512pfintrin.internal.h
vendored
274
third_party/intel/avx512pfintrin.internal.h
vendored
|
@ -1,190 +1,170 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
|
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512PFINTRIN_H_INCLUDED
|
#ifndef _AVX512PFINTRIN_H_INCLUDED
|
||||||
#define _AVX512PFINTRIN_H_INCLUDED
|
#define _AVX512PFINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX512PF__
|
#ifndef __AVX512PF__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512pf")
|
#pragma GCC target("avx512pf")
|
||||||
#define __DISABLE_AVX512PF__
|
#define __DISABLE_AVX512PF__
|
||||||
#endif /* __AVX512PF__ */
|
#endif
|
||||||
|
|
||||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||||
typedef unsigned char __mmask8;
|
typedef unsigned char __mmask8;
|
||||||
typedef unsigned short __mmask16;
|
typedef unsigned short __mmask16;
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline void _mm512_prefetch_i32gather_pd(__m256i __index, void const *__addr,
|
extern __inline void
|
||||||
int __scale, int __hint) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
|
_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
|
||||||
__hint);
|
int __scale, int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||||
|
__scale, __hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_prefetch_i32gather_ps(__m512i __index, void const *__addr,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __scale, int __hint) {
|
_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
|
||||||
|
int __scale, int __hint)
|
||||||
|
{
|
||||||
__builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
__builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||||
__scale, __hint);
|
__scale, __hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_mask_prefetch_i32gather_pd(__m256i __index, __mmask8 __mask,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
void const *__addr, int __scale,
|
_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
|
||||||
int __hint) {
|
void const *__addr, int __scale, int __hint)
|
||||||
__builtin_ia32_gatherpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
|
{
|
||||||
}
|
__builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||||
|
|
||||||
__funline void _mm512_mask_prefetch_i32gather_ps(__m512i __index,
|
|
||||||
__mmask16 __mask,
|
|
||||||
void const *__addr, int __scale,
|
|
||||||
int __hint) {
|
|
||||||
__builtin_ia32_gatherpfdps(__mask, (__v16si)__index, __addr, __scale, __hint);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline void _mm512_prefetch_i64gather_pd(__m512i __index, void const *__addr,
|
|
||||||
int __scale, int __hint) {
|
|
||||||
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
|
||||||
__hint);
|
__hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_prefetch_i64gather_ps(__m512i __index, void const *__addr,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __scale, int __hint) {
|
_mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
|
||||||
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
void const *__addr, int __scale, int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||||
__hint);
|
__hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_mask_prefetch_i64gather_pd(__m512i __index, __mmask8 __mask,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
void const *__addr, int __scale,
|
_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
|
||||||
int __hint) {
|
int __scale, int __hint)
|
||||||
__builtin_ia32_gatherpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
|
{
|
||||||
|
__builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||||
|
__scale, __hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_mask_prefetch_i64gather_ps(__m512i __index, __mmask8 __mask,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
void const *__addr, int __scale,
|
_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
|
||||||
int __hint) {
|
int __scale, int __hint)
|
||||||
__builtin_ia32_gatherpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
|
{
|
||||||
|
__builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||||
|
__scale, __hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_prefetch_i32scatter_pd(void *__addr, __m256i __index,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __scale, int __hint) {
|
_mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
|
||||||
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
|
void const *__addr, int __scale, int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||||
__hint);
|
__hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_prefetch_i32scatter_ps(void *__addr, __m512i __index,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __scale, int __hint) {
|
_mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
|
||||||
|
void const *__addr, int __scale, int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||||
|
__hint);
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
|
||||||
|
int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||||
|
__scale, __hint);
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||||
|
int __hint)
|
||||||
|
{
|
||||||
__builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
__builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||||
__scale, __hint);
|
__scale, __hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_mask_prefetch_i32scatter_pd(void *__addr, __mmask8 __mask,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __index, int __scale,
|
_mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
|
||||||
int __hint) {
|
__m256i __index, int __scale, int __hint)
|
||||||
__builtin_ia32_scatterpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
|
{
|
||||||
|
__builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||||
|
__hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_mask_prefetch_i32scatter_ps(void *__addr, __mmask16 __mask,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __index, int __scale,
|
_mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
|
||||||
int __hint) {
|
__m512i __index, int __scale, int __hint)
|
||||||
|
{
|
||||||
__builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
|
__builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||||
__hint);
|
__hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_prefetch_i64scatter_pd(void *__addr, __m512i __index,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __scale, int __hint) {
|
_mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
|
||||||
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
|
||||||
|
__scale, __hint);
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||||
|
int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||||
|
__scale, __hint);
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask8 __mask,
|
||||||
|
__m512i __index, int __scale, int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||||
__hint);
|
__hint);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_prefetch_i64scatter_ps(void *__addr, __m512i __index,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __scale, int __hint) {
|
_mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask8 __mask,
|
||||||
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
__m512i __index, int __scale, int __hint)
|
||||||
|
{
|
||||||
|
__builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||||
__hint);
|
__hint);
|
||||||
}
|
}
|
||||||
|
|
||||||
__funline void _mm512_mask_prefetch_i64scatter_pd(void *__addr, __mmask8 __mask,
|
|
||||||
__m512i __index, int __scale,
|
|
||||||
int __hint) {
|
|
||||||
__builtin_ia32_scatterpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline void _mm512_mask_prefetch_i64scatter_ps(void *__addr, __mmask8 __mask,
|
|
||||||
__m512i __index, int __scale,
|
|
||||||
int __hint) {
|
|
||||||
__builtin_ia32_scatterpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \
|
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
|
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
|
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \
|
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
__builtin_ia32_gatherpfdps((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
|
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
|
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
__builtin_ia32_gatherpfdpd((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
|
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
|
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
__builtin_ia32_gatherpfdps((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
|
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
|
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||||
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \
|
|
||||||
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \
|
|
||||||
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
|
||||||
__builtin_ia32_gatherpfqpd((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
|
||||||
__builtin_ia32_gatherpfqps((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
|
||||||
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
|
||||||
__builtin_ia32_scatterpfdps((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
|
||||||
__builtin_ia32_scatterpfdpd((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
|
||||||
__builtin_ia32_scatterpfdps((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
|
||||||
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
|
||||||
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
|
||||||
__builtin_ia32_scatterpfqpd((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
|
|
||||||
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
|
||||||
__builtin_ia32_scatterpfqps((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
|
||||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512PF__
|
#ifdef __DISABLE_AVX512PF__
|
||||||
#undef __DISABLE_AVX512PF__
|
#undef __DISABLE_AVX512PF__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512PF__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512PFINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
584
third_party/intel/avx512vbmi2intrin.internal.h
vendored
584
third_party/intel/avx512vbmi2intrin.internal.h
vendored
|
@ -1,381 +1,407 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||||
"Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED
|
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED
|
||||||
#define __AVX512VBMI2INTRIN_H_INCLUDED
|
#define __AVX512VBMI2INTRIN_H_INCLUDED
|
||||||
|
|
||||||
#if !defined(__AVX512VBMI2__)
|
#if !defined(__AVX512VBMI2__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512vbmi2")
|
#pragma GCC target("avx512vbmi2")
|
||||||
#define __DISABLE_AVX512VBMI2__
|
#define __DISABLE_AVX512VBMI2__
|
||||||
#endif /* __AVX512VBMI2__ */
|
#endif
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m512i _mm512_shrdi_epi16(__m512i __A, __m512i __B, int __C) {
|
extern __inline __m512i
|
||||||
return (__m512i)__builtin_ia32_vpshrd_v32hi((__v32hi)__A, (__v32hi)__B, __C);
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shrdi_epi16 (__m512i __A, __m512i __B, int __C)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||||
|
__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shrdi_epi32(__m512i __A, __m512i __B, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpshrd_v16si((__v16si)__A, (__v16si)__B, __C);
|
_mm512_shrdi_epi32 (__m512i __A, __m512i __B, int __C)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)__A, (__v16si) __B,
|
||||||
|
__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shrdi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D, int __E) {
|
_mm512_mask_shrdi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
|
||||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
|
int __E)
|
||||||
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__C,
|
||||||
|
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shrdi_epi32(__mmask16 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, int __D) {
|
_mm512_maskz_shrdi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
|
||||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
|
{
|
||||||
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
|
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__B,
|
||||||
(__mmask16)__A);
|
(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shrdi_epi64(__m512i __A, __m512i __B, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shrdi_epi64 (__m512i __A, __m512i __B, int __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C);
|
return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shrdi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D, int __E) {
|
_mm512_mask_shrdi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask((__v8di)__C, (__v8di)__D, __E,
|
int __E)
|
||||||
(__v8di)__A, (__mmask8)__B);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__C, (__v8di) __D,
|
||||||
|
__E, (__v8di) __A, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shrdi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __D) {
|
_mm512_maskz_shrdi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
|
||||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask(
|
{
|
||||||
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
|
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__B, (__v8di) __C,
|
||||||
(__mmask8)__A);
|
__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shldi_epi16(__m512i __A, __m512i __B, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpshld_v32hi((__v32hi)__A, (__v32hi)__B, __C);
|
_mm512_shldi_epi16 (__m512i __A, __m512i __B, int __C)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||||
|
__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shldi_epi32(__m512i __A, __m512i __B, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpshld_v16si((__v16si)__A, (__v16si)__B, __C);
|
_mm512_shldi_epi32 (__m512i __A, __m512i __B, int __C)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpshld_v16si ((__v16si)__A, (__v16si) __B,
|
||||||
|
__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shldi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D, int __E) {
|
_mm512_mask_shldi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
|
||||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
|
int __E)
|
||||||
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__C,
|
||||||
|
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shldi_epi32(__mmask16 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, int __D) {
|
_mm512_maskz_shldi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
|
||||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
|
{
|
||||||
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
|
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__B,
|
||||||
(__mmask16)__A);
|
(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shldi_epi64(__m512i __A, __m512i __B, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shldi_epi64 (__m512i __A, __m512i __B, int __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpshld_v8di ((__v8di)__A, (__v8di) __B, __C);
|
return (__m512i) __builtin_ia32_vpshld_v8di ((__v8di)__A, (__v8di) __B, __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shldi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D, int __E) {
|
_mm512_mask_shldi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask((__v8di)__C, (__v8di)__D, __E,
|
int __E)
|
||||||
(__v8di)__A, (__mmask8)__B);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__C, (__v8di) __D,
|
||||||
|
__E, (__v8di) __A, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shldi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
int __D) {
|
_mm512_maskz_shldi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
|
||||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask(
|
{
|
||||||
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
|
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__B, (__v8di) __C,
|
||||||
(__mmask8)__A);
|
__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _mm512_shrdi_epi16(A, B, C) \
|
#define _mm512_shrdi_epi16(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), (__v32hi)(__m512i)(B),(int)(C)))
|
||||||
((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), \
|
#define _mm512_shrdi_epi32(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B),(int)(C)))
|
||||||
(__v32hi)(__m512i)(B),(int)(C))
|
#define _mm512_mask_shrdi_epi32(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), (__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A), (__mmask16)(B)))
|
||||||
#define _mm512_shrdi_epi32(A, B, C) \
|
#define _mm512_maskz_shrdi_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), (__v16si)(__m512i)(C),(int)(D), (__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)))
|
||||||
((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), \
|
#define _mm512_shrdi_epi64(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B),(int)(C)))
|
||||||
(__v16si)(__m512i)(B),(int)(C))
|
#define _mm512_mask_shrdi_epi64(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), (__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A), (__mmask8)(B)))
|
||||||
#define _mm512_mask_shrdi_epi32(A, B, C, D, E) \
|
#define _mm512_maskz_shrdi_epi64(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), (__v8di)(__m512i)(C),(int)(D), (__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A)))
|
||||||
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), \
|
#define _mm512_shldi_epi16(A, B, C) ((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), (__v32hi)(__m512i)(B),(int)(C)))
|
||||||
(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B))
|
#define _mm512_shldi_epi32(A, B, C) ((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B),(int)(C)))
|
||||||
#define _mm512_maskz_shrdi_epi32(A, B, C, D) \
|
#define _mm512_mask_shldi_epi32(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), (__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A), (__mmask16)(B)))
|
||||||
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), \
|
#define _mm512_maskz_shldi_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), (__v16si)(__m512i)(C),(int)(D), (__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)))
|
||||||
(__v16si)(__m512i)(C),(int)(D), \
|
#define _mm512_shldi_epi64(A, B, C) ((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (int)(C)))
|
||||||
(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A))
|
#define _mm512_mask_shldi_epi64(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), (__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A), (__mmask8)(B)))
|
||||||
#define _mm512_shrdi_epi64(A, B, C) \
|
#define _mm512_maskz_shldi_epi64(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), (__v8di)(__m512i)(C),(int)(D), (__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A)))
|
||||||
((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), \
|
|
||||||
(__v8di)(__m512i)(B),(int)(C))
|
|
||||||
#define _mm512_mask_shrdi_epi64(A, B, C, D, E) \
|
|
||||||
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), \
|
|
||||||
(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
|
|
||||||
#define _mm512_maskz_shrdi_epi64(A, B, C, D) \
|
|
||||||
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), \
|
|
||||||
(__v8di)(__m512i)(C),(int)(D), \
|
|
||||||
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
|
|
||||||
#define _mm512_shldi_epi16(A, B, C) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), \
|
|
||||||
(__v32hi)(__m512i)(B),(int)(C))
|
|
||||||
#define _mm512_shldi_epi32(A, B, C) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), \
|
|
||||||
(__v16si)(__m512i)(B),(int)(C))
|
|
||||||
#define _mm512_mask_shldi_epi32(A, B, C, D, E) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), \
|
|
||||||
(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B))
|
|
||||||
#define _mm512_maskz_shldi_epi32(A, B, C, D) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), \
|
|
||||||
(__v16si)(__m512i)(C),(int)(D), \
|
|
||||||
(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A))
|
|
||||||
#define _mm512_shldi_epi64(A, B, C) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), \
|
|
||||||
(__v8di)(__m512i)(B),(int)(C))
|
|
||||||
#define _mm512_mask_shldi_epi64(A, B, C, D, E) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), \
|
|
||||||
(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
|
|
||||||
#define _mm512_maskz_shldi_epi64(A, B, C, D) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), \
|
|
||||||
(__v8di)(__m512i)(C),(int)(D), \
|
|
||||||
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
|
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shrdv_epi16 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpshrdv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
return (__m512i) __builtin_ia32_vpshrdv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||||
(__v32hi) __C);
|
(__v32hi) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shrdv_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpshrdv_v16si ((__v16si)__A, (__v16si) __B,
|
return (__m512i) __builtin_ia32_vpshrdv_v16si ((__v16si)__A, (__v16si) __B,
|
||||||
(__v16si) __C);
|
(__v16si) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D) {
|
_mm512_mask_shrdv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask(
|
{
|
||||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask ((__v16si)__A,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shrdv_epi32(__mmask16 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_maskz_shrdv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz(
|
{
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz ((__v16si)__B,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shrdv_epi64 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpshrdv_v8di ((__v8di)__A, (__v8di) __B,
|
return (__m512i) __builtin_ia32_vpshrdv_v8di ((__v8di)__A, (__v8di) __B,
|
||||||
(__v8di) __C);
|
(__v8di) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D) {
|
_mm512_mask_shrdv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
|
||||||
|
{
|
||||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
||||||
(__v8di) __D, (__mmask8)__B);
|
(__v8di) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shrdv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D) {
|
_mm512_maskz_shrdv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||||
|
{
|
||||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
||||||
(__v8di) __D, (__mmask8)__A);
|
(__v8di) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
__funline __m512i _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
extern __inline __m512i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shldv_epi16 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpshldv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
return (__m512i) __builtin_ia32_vpshldv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||||
(__v32hi) __C);
|
(__v32hi) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shldv_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpshldv_v16si ((__v16si)__A, (__v16si) __B,
|
return (__m512i) __builtin_ia32_vpshldv_v16si ((__v16si)__A, (__v16si) __B,
|
||||||
(__v16si) __C);
|
(__v16si) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D) {
|
_mm512_mask_shldv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpshldv_v16si_mask(
|
{
|
||||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
return (__m512i)__builtin_ia32_vpshldv_v16si_mask ((__v16si)__A,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shldv_epi32(__mmask16 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_maskz_shldv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz(
|
{
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz ((__v16si)__B,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_shldv_epi64 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpshldv_v8di ((__v8di)__A, (__v8di) __B,
|
return (__m512i) __builtin_ia32_vpshldv_v8di ((__v8di)__A, (__v8di) __B,
|
||||||
(__v8di) __C);
|
(__v8di) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D) {
|
_mm512_mask_shldv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
|
||||||
|
{
|
||||||
return (__m512i)__builtin_ia32_vpshldv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
return (__m512i)__builtin_ia32_vpshldv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
||||||
(__v8di) __D, (__mmask8)__B);
|
(__v8di) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D) {
|
_mm512_maskz_shldv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||||
|
{
|
||||||
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
||||||
(__v8di) __D, (__mmask8)__A);
|
(__v8di) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512VBMI2__
|
#ifdef __DISABLE_AVX512VBMI2__
|
||||||
#undef __DISABLE_AVX512VBMI2__
|
#undef __DISABLE_AVX512VBMI2__
|
||||||
|
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512VBMI2__ */
|
#endif
|
||||||
|
|
||||||
#if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__)
|
#if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512vbmi2,avx512bw")
|
#pragma GCC target("avx512vbmi2,avx512bw")
|
||||||
#define __DISABLE_AVX512VBMI2BW__
|
#define __DISABLE_AVX512VBMI2BW__
|
||||||
#endif /* __AVX512VBMI2BW__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_compress_epi8(__m512i __A, __mmask64 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C) {
|
_mm512_mask_compress_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
|
||||||
return (__m512i)__builtin_ia32_compressqi512_mask((__v64qi)__C, (__v64qi)__A,
|
{
|
||||||
(__mmask64)__B);
|
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__C,
|
||||||
|
(__v64qi)__A, (__mmask64)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_compress_epi8(__mmask64 __A, __m512i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_compressqi512_mask(
|
_mm512_maskz_compress_epi8 (__mmask64 __A, __m512i __B)
|
||||||
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__B,
|
||||||
|
(__v64qi)_mm512_setzero_si512 (), (__mmask64)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_mask_compressstoreu_epi8(void *__A, __mmask64 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C) {
|
_mm512_mask_compressstoreu_epi8 (void * __A, __mmask64 __B, __m512i __C)
|
||||||
|
{
|
||||||
__builtin_ia32_compressstoreuqi512_mask ((__v64qi *) __A, (__v64qi) __C,
|
__builtin_ia32_compressstoreuqi512_mask ((__v64qi *) __A, (__v64qi) __C,
|
||||||
(__mmask64) __B);
|
(__mmask64) __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_compress_epi16(__m512i __A, __mmask32 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C) {
|
_mm512_mask_compress_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
|
||||||
return (__m512i)__builtin_ia32_compresshi512_mask((__v32hi)__C, (__v32hi)__A,
|
{
|
||||||
(__mmask32)__B);
|
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__C,
|
||||||
|
(__v32hi)__A, (__mmask32)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_compress_epi16(__mmask32 __A, __m512i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_compresshi512_mask(
|
_mm512_maskz_compress_epi16 (__mmask32 __A, __m512i __B)
|
||||||
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__B,
|
||||||
|
(__v32hi)_mm512_setzero_si512 (), (__mmask32)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _mm512_mask_compressstoreu_epi16(void *__A, __mmask32 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C) {
|
_mm512_mask_compressstoreu_epi16 (void * __A, __mmask32 __B, __m512i __C)
|
||||||
|
{
|
||||||
__builtin_ia32_compressstoreuhi512_mask ((__v32hi *) __A, (__v32hi) __C,
|
__builtin_ia32_compressstoreuhi512_mask ((__v32hi *) __A, (__v32hi) __C,
|
||||||
(__mmask32) __B);
|
(__mmask32) __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_expand_epi8(__m512i __A, __mmask64 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C) {
|
_mm512_mask_expand_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
|
||||||
return (__m512i)__builtin_ia32_expandqi512_mask((__v64qi)__C, (__v64qi)__A,
|
{
|
||||||
|
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __C,
|
||||||
|
(__v64qi) __A,
|
||||||
(__mmask64) __B);
|
(__mmask64) __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_expand_epi8(__mmask64 __A, __m512i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_expandqi512_maskz(
|
_mm512_maskz_expand_epi8 (__mmask64 __A, __m512i __B)
|
||||||
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_expandqi512_maskz ((__v64qi) __B,
|
||||||
|
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_expandloadu_epi8(__m512i __A, __mmask64 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
const void *__C) {
|
_mm512_mask_expandloadu_epi8 (__m512i __A, __mmask64 __B, const void * __C)
|
||||||
return (__m512i)__builtin_ia32_expandloadqi512_mask(
|
{
|
||||||
(const __v64qi *)__C, (__v64qi)__A, (__mmask64)__B);
|
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *) __C,
|
||||||
|
(__v64qi) __A, (__mmask64) __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_expandloadu_epi8(__mmask64 __A, const void *__B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_expandloadqi512_maskz(
|
_mm512_maskz_expandloadu_epi8 (__mmask64 __A, const void * __B)
|
||||||
(const __v64qi *)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_expandloadqi512_maskz ((const __v64qi *) __B,
|
||||||
|
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_expand_epi16(__m512i __A, __mmask32 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C) {
|
_mm512_mask_expand_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
|
||||||
return (__m512i)__builtin_ia32_expandhi512_mask((__v32hi)__C, (__v32hi)__A,
|
{
|
||||||
|
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __C,
|
||||||
|
(__v32hi) __A,
|
||||||
(__mmask32) __B);
|
(__mmask32) __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_expand_epi16(__mmask32 __A, __m512i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_expandhi512_maskz(
|
_mm512_maskz_expand_epi16 (__mmask32 __A, __m512i __B)
|
||||||
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_expandhi512_maskz ((__v32hi) __B,
|
||||||
|
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_expandloadu_epi16(__m512i __A, __mmask32 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
const void *__C) {
|
_mm512_mask_expandloadu_epi16 (__m512i __A, __mmask32 __B, const void * __C)
|
||||||
return (__m512i)__builtin_ia32_expandloadhi512_mask(
|
{
|
||||||
(const __v32hi *)__C, (__v32hi)__A, (__mmask32)__B);
|
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *) __C,
|
||||||
|
(__v32hi) __A, (__mmask32) __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_expandloadu_epi16(__mmask32 __A, const void *__B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_expandloadhi512_maskz(
|
_mm512_maskz_expandloadu_epi16 (__mmask32 __A, const void * __B)
|
||||||
(const __v32hi *)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_expandloadhi512_maskz ((const __v32hi *) __B,
|
||||||
|
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m512i _mm512_mask_shrdi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
extern __inline __m512i
|
||||||
__m512i __D, int __E) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
|
_mm512_mask_shrdi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
|
||||||
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
|
int __E)
|
||||||
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__C,
|
||||||
|
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shrdi_epi16(__mmask32 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, int __D) {
|
_mm512_maskz_shrdi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
|
||||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
|
{
|
||||||
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
|
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__B,
|
||||||
(__mmask32)__A);
|
(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shldi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D, int __E) {
|
_mm512_mask_shldi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
|
||||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
|
int __E)
|
||||||
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__C,
|
||||||
|
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shldi_epi16(__mmask32 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, int __D) {
|
_mm512_maskz_shldi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
|
||||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
|
{
|
||||||
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
|
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__B,
|
||||||
(__mmask32)__A);
|
(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define _mm512_mask_shrdi_epi16(A, B, C, D, E) \
|
#define _mm512_mask_shrdi_epi16(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), (__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A), (__mmask32)(B)))
|
||||||
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), \
|
#define _mm512_maskz_shrdi_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), (__v32hi)(__m512i)(C),(int)(D), (__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A)))
|
||||||
(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B))
|
#define _mm512_mask_shldi_epi16(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), (__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A), (__mmask32)(B)))
|
||||||
#define _mm512_maskz_shrdi_epi16(A, B, C, D) \
|
#define _mm512_maskz_shldi_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), (__v32hi)(__m512i)(C),(int)(D), (__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A)))
|
||||||
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), \
|
|
||||||
(__v32hi)(__m512i)(C),(int)(D), \
|
|
||||||
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
|
|
||||||
#define _mm512_mask_shldi_epi16(A, B, C, D, E) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), \
|
|
||||||
(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B))
|
|
||||||
#define _mm512_maskz_shldi_epi16(A, B, C, D) \
|
|
||||||
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), \
|
|
||||||
(__v32hi)(__m512i)(C),(int)(D), \
|
|
||||||
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
|
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D) {
|
_mm512_mask_shrdv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask(
|
{
|
||||||
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
|
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask ((__v32hi)__A,
|
||||||
|
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shrdv_epi16(__mmask32 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_maskz_shrdv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz(
|
{
|
||||||
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
|
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz ((__v32hi)__B,
|
||||||
|
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __D) {
|
_mm512_mask_shldv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask(
|
{
|
||||||
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
|
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask ((__v32hi)__A,
|
||||||
|
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_shldv_epi16(__mmask32 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_maskz_shldv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz(
|
{
|
||||||
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
|
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz ((__v32hi)__B,
|
||||||
|
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512VBMI2BW__
|
#ifdef __DISABLE_AVX512VBMI2BW__
|
||||||
#undef __DISABLE_AVX512VBMI2BW__
|
#undef __DISABLE_AVX512VBMI2BW__
|
||||||
|
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512VBMI2BW__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* __AVX512VBMI2INTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
1072
third_party/intel/avx512vbmi2vlintrin.internal.h
vendored
1072
third_party/intel/avx512vbmi2vlintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
166
third_party/intel/avx512vbmiintrin.internal.h
vendored
166
third_party/intel/avx512vbmiintrin.internal.h
vendored
|
@ -1,90 +1,124 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
|
#error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512VBMIINTRIN_H_INCLUDED
|
#ifndef _AVX512VBMIINTRIN_H_INCLUDED
|
||||||
#define _AVX512VBMIINTRIN_H_INCLUDED
|
#define _AVX512VBMIINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX512VBMI__
|
#ifndef __AVX512VBMI__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512vbmi")
|
#pragma GCC target("avx512vbmi")
|
||||||
#define __DISABLE_AVX512VBMI__
|
#define __DISABLE_AVX512VBMI__
|
||||||
#endif /* __AVX512VBMI__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __X, __m512i __Y) {
|
_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
|
||||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
{
|
||||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)__W, (__mmask64)__M);
|
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||||
}
|
(__v64qi) __Y,
|
||||||
|
(__v64qi) __W,
|
||||||
__funline __m512i _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X,
|
|
||||||
__m512i __Y) {
|
|
||||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
|
||||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_setzero_si512(),
|
|
||||||
(__mmask64) __M);
|
(__mmask64) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
|
||||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_undefined_epi32(),
|
{
|
||||||
(__mmask64)-1);
|
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||||
}
|
(__v64qi) __Y,
|
||||||
|
(__v64qi)
|
||||||
__funline __m512i _mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
|
_mm512_setzero_si512 (),
|
||||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
|
||||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_undefined_epi32(),
|
|
||||||
(__mmask64)-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m512i _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A,
|
|
||||||
__m512i __B) {
|
|
||||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
|
||||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_setzero_si512(),
|
|
||||||
(__mmask64) __M);
|
(__mmask64) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __A, __m512i __B) {
|
_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
|
||||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
{
|
||||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)__W, (__mmask64)__M);
|
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||||
|
(__v64qi) __Y,
|
||||||
|
(__v64qi)
|
||||||
|
_mm512_undefined_epi32 (),
|
||||||
|
(__mmask64) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_permutex2var_epi8(__m512i __A, __m512i __I,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B) {
|
_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
|
||||||
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
|
{
|
||||||
(__v64qi)__I
|
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||||
/* idx */,
|
(__v64qi) __A,
|
||||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
|
(__v64qi)
|
||||||
|
_mm512_undefined_epi32 (),
|
||||||
|
(__mmask64) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __I, __m512i __B) {
|
_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
|
||||||
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
|
__m512i __B)
|
||||||
(__v64qi)__I
|
{
|
||||||
/* idx */,
|
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
(__v64qi) __A,
|
||||||
|
(__v64qi)
|
||||||
|
_mm512_setzero_si512(),
|
||||||
|
(__mmask64) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__mmask64 __U, __m512i __B) {
|
_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
|
||||||
|
__m512i __B)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||||
|
(__v64qi) __A,
|
||||||
|
(__v64qi) __W,
|
||||||
|
(__mmask64) __M);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||||
|
,
|
||||||
|
(__v64qi) __A,
|
||||||
|
(__v64qi) __B,
|
||||||
|
(__mmask64) -1);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
|
||||||
|
__m512i __I, __m512i __B)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||||
|
,
|
||||||
|
(__v64qi) __A,
|
||||||
|
(__v64qi) __B,
|
||||||
|
(__mmask64)
|
||||||
|
__U);
|
||||||
|
}
|
||||||
|
extern __inline __m512i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
|
||||||
|
__mmask64 __U, __m512i __B)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
|
return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
|
||||||
(__v64qi) __I
|
(__v64qi) __I
|
||||||
/* idx */,
|
,
|
||||||
(__v64qi) __B,
|
(__v64qi) __B,
|
||||||
(__mmask64)__U);
|
(__mmask64)
|
||||||
|
__U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __I, __m512i __B) {
|
_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
|
||||||
return (__m512i)__builtin_ia32_vpermt2varqi512_maskz(
|
__m512i __I, __m512i __B)
|
||||||
(__v64qi)__I
|
{
|
||||||
/* idx */,
|
return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
|
||||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
,
|
||||||
|
(__v64qi) __A,
|
||||||
|
(__v64qi) __B,
|
||||||
|
(__mmask64)
|
||||||
|
__U);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512VBMI__
|
#ifdef __DISABLE_AVX512VBMI__
|
||||||
#undef __DISABLE_AVX512VBMI__
|
#undef __DISABLE_AVX512VBMI__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512VBMI__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512VBMIINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
320
third_party/intel/avx512vbmivlintrin.internal.h
vendored
320
third_party/intel/avx512vbmivlintrin.internal.h
vendored
|
@ -1,159 +1,229 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
|
||||||
"Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
|
#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
|
||||||
#define _AVX512VBMIVLINTRIN_H_INCLUDED
|
#define _AVX512VBMIVLINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
|
#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512vbmi,avx512vl")
|
#pragma GCC target("avx512vbmi,avx512vl")
|
||||||
#define __DISABLE_AVX512VBMIVL__
|
#define __DISABLE_AVX512VBMIVL__
|
||||||
#endif /* __AVX512VBMIVL__ */
|
#endif
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __X, __m256i __Y) {
|
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
|
||||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
{
|
||||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)__W, (__mmask32)__M);
|
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||||
}
|
(__v32qi) __Y,
|
||||||
|
(__v32qi) __W,
|
||||||
__funline __m256i _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X,
|
|
||||||
__m256i __Y) {
|
|
||||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
|
||||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_setzero_si256(),
|
|
||||||
(__mmask32) __M);
|
(__mmask32) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_undefined_si256(),
|
{
|
||||||
(__mmask32)-1);
|
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||||
}
|
(__v32qi) __Y,
|
||||||
|
(__v32qi)
|
||||||
__funline __m128i _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M,
|
_mm256_setzero_si256 (),
|
||||||
__m128i __X, __m128i __Y) {
|
|
||||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
|
||||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)__W, (__mmask16)__M);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128i _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X,
|
|
||||||
__m128i __Y) {
|
|
||||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
|
||||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128i _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
|
|
||||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
|
||||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_undefined_si128(),
|
|
||||||
(__mmask16)-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m256i _mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
|
|
||||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
|
||||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_undefined_si256(),
|
|
||||||
(__mmask32)-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m256i _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A,
|
|
||||||
__m256i __B) {
|
|
||||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
|
||||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_setzero_si256(),
|
|
||||||
(__mmask32) __M);
|
(__mmask32) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __A, __m256i __B) {
|
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
{
|
||||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)__W, (__mmask32)__M);
|
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||||
|
(__v32qi) __Y,
|
||||||
|
(__v32qi)
|
||||||
|
_mm256_undefined_si256 (),
|
||||||
|
(__mmask32) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_permutexvar_epi8(__m128i __A, __m128i __B) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
|
||||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_undefined_si128(),
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||||
|
(__v16qi) __Y,
|
||||||
|
(__v16qi) __W,
|
||||||
|
(__mmask16) __M);
|
||||||
|
}
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||||
|
(__v16qi) __Y,
|
||||||
|
(__v16qi)
|
||||||
|
_mm_setzero_si128 (),
|
||||||
|
(__mmask16) __M);
|
||||||
|
}
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||||
|
(__v16qi) __Y,
|
||||||
|
(__v16qi)
|
||||||
|
_mm_undefined_si128 (),
|
||||||
(__mmask16) -1);
|
(__mmask16) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m128i _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __B) {
|
_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
|
||||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
{
|
||||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
|
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||||
|
(__v32qi) __A,
|
||||||
|
(__v32qi)
|
||||||
|
_mm256_undefined_si256 (),
|
||||||
|
(__mmask32) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m128i _mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __A, __m128i __B) {
|
_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
|
||||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
__m256i __B)
|
||||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)__W, (__mmask16)__M);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||||
|
(__v32qi) __A,
|
||||||
|
(__v32qi)
|
||||||
|
_mm256_setzero_si256 (),
|
||||||
|
(__mmask32) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_permutex2var_epi8(__m256i __A, __m256i __I,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __B) {
|
_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
|
||||||
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
|
__m256i __B)
|
||||||
(__v32qi)__I
|
{
|
||||||
/* idx */,
|
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
|
(__v32qi) __A,
|
||||||
|
(__v32qi) __W,
|
||||||
|
(__mmask32) __M);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m256i _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __I, __m256i __B) {
|
_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
|
||||||
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
|
{
|
||||||
(__v32qi)__I
|
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||||
/* idx */,
|
(__v16qi) __A,
|
||||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
(__v16qi)
|
||||||
|
_mm_undefined_si128 (),
|
||||||
|
(__mmask16) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m256i _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__mmask32 __U, __m256i __B) {
|
_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||||
|
(__v16qi) __A,
|
||||||
|
(__v16qi)
|
||||||
|
_mm_setzero_si128 (),
|
||||||
|
(__mmask16) __M);
|
||||||
|
}
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
|
||||||
|
__m128i __B)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||||
|
(__v16qi) __A,
|
||||||
|
(__v16qi) __W,
|
||||||
|
(__mmask16) __M);
|
||||||
|
}
|
||||||
|
extern __inline __m256i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
|
||||||
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||||
|
,
|
||||||
|
(__v32qi) __A,
|
||||||
|
(__v32qi) __B,
|
||||||
|
(__mmask32) -1);
|
||||||
|
}
|
||||||
|
extern __inline __m256i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
|
||||||
|
__m256i __I, __m256i __B)
|
||||||
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||||
|
,
|
||||||
|
(__v32qi) __A,
|
||||||
|
(__v32qi) __B,
|
||||||
|
(__mmask32)
|
||||||
|
__U);
|
||||||
|
}
|
||||||
|
extern __inline __m256i
|
||||||
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
|
||||||
|
__mmask32 __U, __m256i __B)
|
||||||
|
{
|
||||||
return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
|
return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
|
||||||
(__v32qi) __I
|
(__v32qi) __I
|
||||||
/* idx */,
|
,
|
||||||
(__v32qi) __B,
|
(__v32qi) __B,
|
||||||
(__mmask32)__U);
|
(__mmask32)
|
||||||
|
__U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __I, __m256i __B) {
|
_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
|
||||||
return (__m256i)__builtin_ia32_vpermt2varqi256_maskz(
|
__m256i __I, __m256i __B)
|
||||||
(__v32qi)__I
|
{
|
||||||
/* idx */,
|
return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
|
||||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
,
|
||||||
|
(__v32qi) __A,
|
||||||
|
(__v32qi) __B,
|
||||||
|
(__mmask32)
|
||||||
|
__U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
|
_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
|
||||||
(__v16qi)__I
|
{
|
||||||
/* idx */,
|
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
|
,
|
||||||
|
(__v16qi) __A,
|
||||||
|
(__v16qi) __B,
|
||||||
|
(__mmask16) -1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __I, __m128i __B) {
|
_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
|
||||||
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
|
__m128i __B)
|
||||||
(__v16qi)__I
|
{
|
||||||
/* idx */,
|
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
,
|
||||||
|
(__v16qi) __A,
|
||||||
|
(__v16qi) __B,
|
||||||
|
(__mmask16)
|
||||||
|
__U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__mmask16 __U, __m128i __B) {
|
_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
|
||||||
|
__m128i __B)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
|
return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
|
||||||
(__v16qi) __I
|
(__v16qi) __I
|
||||||
/* idx */,
|
,
|
||||||
(__v16qi) __B,
|
(__v16qi) __B,
|
||||||
(__mmask16)__U);
|
(__mmask16)
|
||||||
|
__U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A,
|
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __I, __m128i __B) {
|
_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
|
||||||
return (__m128i)__builtin_ia32_vpermt2varqi128_maskz(
|
__m128i __B)
|
||||||
(__v16qi)__I
|
{
|
||||||
/* idx */,
|
return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
|
||||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
,
|
||||||
|
(__v16qi) __A,
|
||||||
|
(__v16qi) __B,
|
||||||
|
(__mmask16)
|
||||||
|
__U);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512VBMIVL__
|
#ifdef __DISABLE_AVX512VBMIVL__
|
||||||
#undef __DISABLE_AVX512VBMIVL__
|
#undef __DISABLE_AVX512VBMIVL__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512VBMIVL__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
5985
third_party/intel/avx512vlbwintrin.internal.h
vendored
5985
third_party/intel/avx512vlbwintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
2541
third_party/intel/avx512vldqintrin.internal.h
vendored
2541
third_party/intel/avx512vldqintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
17314
third_party/intel/avx512vlintrin.internal.h
vendored
17314
third_party/intel/avx512vlintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
131
third_party/intel/avx512vnniintrin.internal.h
vendored
131
third_party/intel/avx512vnniintrin.internal.h
vendored
|
@ -1,87 +1,108 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
|
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef __AVX512VNNIINTRIN_H_INCLUDED
|
#ifndef __AVX512VNNIINTRIN_H_INCLUDED
|
||||||
#define __AVX512VNNIINTRIN_H_INCLUDED
|
#define __AVX512VNNIINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#if !defined(__AVX512VNNI__)
|
#if !defined(__AVX512VNNI__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512vnni")
|
#pragma GCC target("avx512vnni")
|
||||||
#define __DISABLE_AVX512VNNI__
|
#define __DISABLE_AVX512VNNI__
|
||||||
#endif /* __AVX512VNNI__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_dpbusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_dpbusd_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpdpbusd_v16si ((__v16si)__A, (__v16si) __B,
|
return (__m512i) __builtin_ia32_vpdpbusd_v16si ((__v16si)__A, (__v16si) __B,
|
||||||
(__v16si) __C);
|
(__v16si) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_dpbusd_epi32(__m512i __A, __mmask16 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_mask_dpbusd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask(
|
{
|
||||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask ((__v16si)__A,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_dpbusd_epi32(__mmask16 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_maskz_dpbusd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz(
|
__m512i __D)
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz ((__v16si)__B,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_dpbusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_dpbusds_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpdpbusds_v16si ((__v16si)__A, (__v16si) __B,
|
return (__m512i) __builtin_ia32_vpdpbusds_v16si ((__v16si)__A, (__v16si) __B,
|
||||||
(__v16si) __C);
|
(__v16si) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_dpbusds_epi32(__m512i __A, __mmask16 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_mask_dpbusds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask(
|
__m512i __D)
|
||||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask ((__v16si)__A,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_dpbusds_epi32(__mmask16 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_maskz_dpbusds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz(
|
__m512i __D)
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz ((__v16si)__B,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpdpwssd_v16si ((__v16si)__A, (__v16si) __B,
|
return (__m512i) __builtin_ia32_vpdpwssd_v16si ((__v16si)__A, (__v16si) __B,
|
||||||
(__v16si) __C);
|
(__v16si) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_dpwssd_epi32(__m512i __A, __mmask16 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_mask_dpwssd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask(
|
{
|
||||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask ((__v16si)__A,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_dpwssd_epi32(__mmask16 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_maskz_dpwssd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz(
|
__m512i __D)
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz ((__v16si)__B,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpdpwssds_v16si ((__v16si)__A, (__v16si) __B,
|
return (__m512i) __builtin_ia32_vpdpwssds_v16si ((__v16si)__A, (__v16si) __B,
|
||||||
(__v16si) __C);
|
(__v16si) __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_dpwssds_epi32(__m512i __A, __mmask16 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_mask_dpwssds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask(
|
__m512i __D)
|
||||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask ((__v16si)__A,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_dpwssds_epi32(__mmask16 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_maskz_dpwssds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz(
|
__m512i __D)
|
||||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
{
|
||||||
|
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz ((__v16si)__B,
|
||||||
|
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512VNNI__
|
#ifdef __DISABLE_AVX512VNNI__
|
||||||
#undef __DISABLE_AVX512VNNI__
|
#undef __DISABLE_AVX512VNNI__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512VNNI__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* __AVX512VNNIINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
224
third_party/intel/avx512vnnivlintrin.internal.h
vendored
224
third_party/intel/avx512vnnivlintrin.internal.h
vendored
|
@ -1,154 +1,140 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
#error "Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
|
||||||
"Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
|
#ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
|
||||||
#define _AVX512VNNIVLINTRIN_H_INCLUDED
|
#define _AVX512VNNIVLINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
|
#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512vnni,avx512vl")
|
#pragma GCC target("avx512vnni,avx512vl")
|
||||||
#define __DISABLE_AVX512VNNIVL__
|
#define __DISABLE_AVX512VNNIVL__
|
||||||
#endif /* __AVX512VNNIVL__ */
|
#endif
|
||||||
|
#define _mm256_dpbusd_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||||
__funline __m256i _mm256_dpbusd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
extern __inline __m256i
|
||||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si((__v8si)__A, (__v8si)__B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
(__v8si)__C);
|
_mm256_mask_dpbusd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||||
}
|
{
|
||||||
|
|
||||||
__funline __m256i _mm256_mask_dpbusd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
|
||||||
__m256i __D) {
|
|
||||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
||||||
(__v8si) __D, (__mmask8)__B);
|
(__v8si) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_dpbusd_epi32(__mmask8 __A, __m256i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C, __m256i __D) {
|
_mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
|
||||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz(
|
{
|
||||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz ((__v8si)__B,
|
||||||
|
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
#define _mm_dpbusd_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||||
__funline __m128i _mm_dpbusd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
extern __inline __m128i
|
||||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si((__v4si)__A, (__v4si)__B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
(__v4si)__C);
|
_mm_mask_dpbusd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||||
}
|
{
|
||||||
|
|
||||||
__funline __m128i _mm_mask_dpbusd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
|
||||||
__m128i __D) {
|
|
||||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
||||||
(__v4si) __D, (__mmask8)__B);
|
(__v4si) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_dpbusd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __D) {
|
_mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz(
|
{
|
||||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz ((__v4si)__B,
|
||||||
|
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
#define _mm256_dpbusds_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||||
__funline __m256i _mm256_dpbusds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
extern __inline __m256i
|
||||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si((__v8si)__A, (__v8si)__B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
(__v8si)__C);
|
_mm256_mask_dpbusds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||||
|
{
|
||||||
|
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask ((__v8si)__A,
|
||||||
|
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_dpbusds_epi32(__m256i __A, __mmask8 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C, __m256i __D) {
|
_mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
|
||||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask(
|
__m256i __D)
|
||||||
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
|
{
|
||||||
|
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz ((__v8si)__B,
|
||||||
|
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
#define _mm_dpbusds_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||||
__funline __m256i _mm256_maskz_dpbusds_epi32(__mmask8 __A, __m256i __B,
|
extern __inline __m128i
|
||||||
__m256i __C, __m256i __D) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz(
|
_mm_mask_dpbusds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
{
|
||||||
|
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask ((__v4si)__A,
|
||||||
|
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_dpbusds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si((__v4si)__A, (__v4si)__B,
|
_mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||||
(__v4si)__C);
|
{
|
||||||
|
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz ((__v4si)__B,
|
||||||
|
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
#define _mm256_dpwssd_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||||
__funline __m128i _mm_mask_dpbusds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
extern __inline __m256i
|
||||||
__m128i __D) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask(
|
_mm256_mask_dpwssd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||||
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
|
{
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128i _mm_maskz_dpbusds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
|
||||||
__m128i __D) {
|
|
||||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz(
|
|
||||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m256i _mm256_dpwssd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
|
||||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si((__v8si)__A, (__v8si)__B,
|
|
||||||
(__v8si)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m256i _mm256_mask_dpwssd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
|
||||||
__m256i __D) {
|
|
||||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
||||||
(__v8si) __D, (__mmask8)__B);
|
(__v8si) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_dpwssd_epi32(__mmask8 __A, __m256i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C, __m256i __D) {
|
_mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
|
||||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz(
|
{
|
||||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz ((__v8si)__B,
|
||||||
|
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
#define _mm_dpwssd_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||||
__funline __m128i _mm_dpwssd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
extern __inline __m128i
|
||||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si((__v4si)__A, (__v4si)__B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
(__v4si)__C);
|
_mm_mask_dpwssd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||||
}
|
{
|
||||||
|
|
||||||
__funline __m128i _mm_mask_dpwssd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
|
||||||
__m128i __D) {
|
|
||||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
||||||
(__v4si) __D, (__mmask8)__B);
|
(__v4si) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_dpwssd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __D) {
|
_mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz(
|
{
|
||||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz ((__v4si)__B,
|
||||||
|
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
#define _mm256_dpwssds_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||||
__funline __m256i _mm256_dpwssds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
extern __inline __m256i
|
||||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si((__v8si)__A, (__v8si)__B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
(__v8si)__C);
|
_mm256_mask_dpwssds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||||
|
{
|
||||||
|
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask ((__v8si)__A,
|
||||||
|
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_dpwssds_epi32(__m256i __A, __mmask8 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C, __m256i __D) {
|
_mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
|
||||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask(
|
__m256i __D)
|
||||||
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
|
{
|
||||||
|
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz ((__v8si)__B,
|
||||||
|
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||||
}
|
}
|
||||||
|
#define _mm_dpwssds_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||||
__funline __m256i _mm256_maskz_dpwssds_epi32(__mmask8 __A, __m256i __B,
|
extern __inline __m128i
|
||||||
__m256i __C, __m256i __D) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz(
|
_mm_mask_dpwssds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
{
|
||||||
|
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask ((__v4si)__A,
|
||||||
|
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_dpwssds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si((__v4si)__A, (__v4si)__B,
|
_mm_maskz_dpwssds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||||
(__v4si)__C);
|
{
|
||||||
}
|
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz ((__v4si)__B,
|
||||||
|
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||||
__funline __m128i _mm_mask_dpwssds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
|
||||||
__m128i __D) {
|
|
||||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask(
|
|
||||||
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128i _mm_maskz_dpwssds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
|
||||||
__m128i __D) {
|
|
||||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz(
|
|
||||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
|
||||||
}
|
}
|
||||||
#ifdef __DISABLE_AVX512VNNIVL__
|
#ifdef __DISABLE_AVX512VNNIVL__
|
||||||
#undef __DISABLE_AVX512VNNIVL__
|
#undef __DISABLE_AVX512VNNIVL__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512VNNIVL__ */
|
#endif
|
||||||
#endif /* __DISABLE_AVX512VNNIVL__ */
|
#endif
|
||||||
|
#endif
|
||||||
|
|
32
third_party/intel/avx512vp2intersectintrin.internal.h
vendored
Normal file
32
third_party/intel/avx512vp2intersectintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
|
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
|
||||||
|
#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
|
||||||
|
#if !defined(__AVX512VP2INTERSECT__)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("avx512vp2intersect")
|
||||||
|
#define __DISABLE_AVX512VP2INTERSECT__
|
||||||
|
#endif
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_2intersect_epi32 (__m512i __A, __m512i __B, __mmask16 *__U,
|
||||||
|
__mmask16 *__M)
|
||||||
|
{
|
||||||
|
__builtin_ia32_2intersectd512 (__U, __M, (__v16si) __A, (__v16si) __B);
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_2intersect_epi64 (__m512i __A, __m512i __B, __mmask8 *__U,
|
||||||
|
__mmask8 *__M)
|
||||||
|
{
|
||||||
|
__builtin_ia32_2intersectq512 (__U, __M, (__v8di) __A, (__v8di) __B);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_AVX512VP2INTERSECT__
|
||||||
|
#undef __DISABLE_AVX512VP2INTERSECT__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
44
third_party/intel/avx512vp2intersectvlintrin.internal.h
vendored
Normal file
44
third_party/intel/avx512vp2intersectvlintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
|
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
|
||||||
|
#define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
|
||||||
|
#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("avx512vp2intersect,avx512vl")
|
||||||
|
#define __DISABLE_AVX512VP2INTERSECTVL__
|
||||||
|
#endif
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_2intersect_epi32 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
|
||||||
|
{
|
||||||
|
__builtin_ia32_2intersectd128 (__U, __M, (__v4si) __A, (__v4si) __B);
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_2intersect_epi32 (__m256i __A, __m256i __B, __mmask8 *__U,
|
||||||
|
__mmask8 *__M)
|
||||||
|
{
|
||||||
|
__builtin_ia32_2intersectd256 (__U, __M, (__v8si) __A, (__v8si) __B);
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_2intersect_epi64 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
|
||||||
|
{
|
||||||
|
__builtin_ia32_2intersectq128 (__U, __M, (__v2di) __A, (__v2di) __B);
|
||||||
|
}
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_2intersect_epi64 (__m256i __A, __m256i __B, __mmask8 *__U,
|
||||||
|
__mmask8 *__M)
|
||||||
|
{
|
||||||
|
__builtin_ia32_2intersectq256 (__U, __M, (__v4di) __A, (__v4di) __B);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_AVX512VP2INTERSECTVL__
|
||||||
|
#undef __DISABLE_AVX512VP2INTERSECTVL__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
|
@ -1,50 +1,64 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
# error "Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
|
||||||
"Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
||||||
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __AVX512VPOPCNTDQ__
|
#ifndef __AVX512VPOPCNTDQ__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512vpopcntdq")
|
#pragma GCC target("avx512vpopcntdq")
|
||||||
#define __DISABLE_AVX512VPOPCNTDQ__
|
#define __DISABLE_AVX512VPOPCNTDQ__
|
||||||
#endif /* __AVX512VPOPCNTDQ__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_popcnt_epi32(__m512i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_popcnt_epi32 (__m512i __A)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpopcountd_v16si ((__v16si) __A);
|
return (__m512i) __builtin_ia32_vpopcountd_v16si ((__v16si) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_popcnt_epi32(__m512i __A, __mmask16 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B) {
|
_mm512_mask_popcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||||
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
|
{
|
||||||
(__v16si)__A, (__v16si)__B, (__mmask16)__U);
|
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
|
||||||
|
(__v16si) __W,
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
|
_mm512_maskz_popcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
|
||||||
|
(__v16si)
|
||||||
|
_mm512_setzero_si512 (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_popcnt_epi64(__m512i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_popcnt_epi64 (__m512i __A)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vpopcountq_v8di ((__v8di) __A);
|
return (__m512i) __builtin_ia32_vpopcountq_v8di ((__v8di) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_popcnt_epi64(__m512i __A, __mmask8 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B) {
|
_mm512_mask_popcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||||
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask((__v8di)__A, (__v8di)__B,
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
|
||||||
|
(__v8di) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask(
|
_mm512_maskz_popcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
|
||||||
|
(__v8di)
|
||||||
|
_mm512_setzero_si512 (),
|
||||||
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512VPOPCNTDQ__
|
#ifdef __DISABLE_AVX512VPOPCNTDQ__
|
||||||
#undef __DISABLE_AVX512VPOPCNTDQ__
|
#undef __DISABLE_AVX512VPOPCNTDQ__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512VPOPCNTDQ__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512VPOPCNTDQINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
126
third_party/intel/avx512vpopcntdqvlintrin.internal.h
vendored
126
third_party/intel/avx512vpopcntdqvlintrin.internal.h
vendored
|
@ -1,78 +1,110 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
# error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
||||||
"Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
||||||
#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
|
#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("avx512vpopcntdq,avx512vl")
|
#pragma GCC target("avx512vpopcntdq,avx512vl")
|
||||||
#define __DISABLE_AVX512VPOPCNTDQVL__
|
#define __DISABLE_AVX512VPOPCNTDQVL__
|
||||||
#endif /* __AVX512VPOPCNTDQVL__ */
|
#endif
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_popcnt_epi32(__m128i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_popcnt_epi32 (__m128i __A)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
|
return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask_popcnt_epi32(__m128i __A, __mmask16 __U, __m128i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask((__v4si)__A, (__v4si)__B,
|
_mm_mask_popcnt_epi32 (__m128i __W, __mmask16 __U, __m128i __A)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
|
||||||
|
(__v4si) __W,
|
||||||
(__mmask16) __U);
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_popcnt_epi32(__mmask16 __U, __m128i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask(
|
_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
|
||||||
(__v4si)__A, (__v4si)_mm_setzero_si128(), (__mmask16)__U);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
|
||||||
|
(__v4si)
|
||||||
|
_mm_setzero_si128 (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_popcnt_epi32(__m256i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_popcnt_epi32 (__m256i __A)
|
||||||
|
{
|
||||||
return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
|
return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_popcnt_epi32(__m256i __A, __mmask16 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __B) {
|
_mm256_mask_popcnt_epi32 (__m256i __W, __mmask16 __U, __m256i __A)
|
||||||
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask((__v8si)__A, (__v8si)__B,
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
|
||||||
|
(__v8si) __W,
|
||||||
(__mmask16) __U);
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_popcnt_epi32(__mmask16 __U, __m256i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask(
|
_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
|
||||||
(__v8si)__A, (__v8si)_mm256_setzero_si256(), (__mmask16)__U);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
|
||||||
|
(__v8si)
|
||||||
|
_mm256_setzero_si256 (),
|
||||||
|
(__mmask16) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_popcnt_epi64(__m128i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_popcnt_epi64 (__m128i __A)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
|
return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask_popcnt_epi64(__m128i __A, __mmask8 __U, __m128i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask((__v2di)__A, (__v2di)__B,
|
_mm_mask_popcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
|
||||||
|
(__v2di) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask(
|
_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
|
||||||
(__v2di)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
|
||||||
|
(__v2di)
|
||||||
|
_mm_setzero_si128 (),
|
||||||
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_popcnt_epi64(__m256i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_popcnt_epi64 (__m256i __A)
|
||||||
|
{
|
||||||
return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
|
return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_popcnt_epi64(__m256i __A, __mmask8 __U,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __B) {
|
_mm256_mask_popcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||||
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask((__v4di)__A, (__v4di)__B,
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
|
||||||
|
(__v4di) __W,
|
||||||
(__mmask8) __U);
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask(
|
_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
|
||||||
(__v4di)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
|
||||||
|
(__v4di)
|
||||||
|
_mm256_setzero_si256 (),
|
||||||
|
(__mmask8) __U);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_AVX512VPOPCNTDQVL__
|
#ifdef __DISABLE_AVX512VPOPCNTDQVL__
|
||||||
#undef __DISABLE_AVX512VPOPCNTDQVL__
|
#undef __DISABLE_AVX512VPOPCNTDQVL__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
1318
third_party/intel/avxintrin.internal.h
vendored
1318
third_party/intel/avxintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
82
third_party/intel/avxvnniintrin.internal.h
vendored
Normal file
82
third_party/intel/avxvnniintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
|
#error "Never use <avxvnniintrin.h> directly; include <immintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _AVXVNNIINTRIN_H_INCLUDED
|
||||||
|
#define _AVXVNNIINTRIN_H_INCLUDED
|
||||||
|
#if !defined(__AVXVNNI__)
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("avxvnni")
|
||||||
|
#define __DISABLE_AVXVNNIVL__
|
||||||
|
#endif
|
||||||
|
extern __inline __m256i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_dpbusd_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||||
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) __A,
|
||||||
|
(__v8si) __B,
|
||||||
|
(__v8si) __C);
|
||||||
|
}
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_dpbusd_avx_epi32(__m128i __A, __m128i __B, __m128i __C)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) __A,
|
||||||
|
(__v4si) __B,
|
||||||
|
(__v4si) __C);
|
||||||
|
}
|
||||||
|
extern __inline __m256i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_dpbusds_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||||
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) __A,
|
||||||
|
(__v8si) __B,
|
||||||
|
(__v8si) __C);
|
||||||
|
}
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_dpbusds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) __A,
|
||||||
|
(__v4si) __B,
|
||||||
|
(__v4si) __C);
|
||||||
|
}
|
||||||
|
extern __inline __m256i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_dpwssd_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
|
||||||
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) __A,
|
||||||
|
(__v8si) __B,
|
||||||
|
(__v8si) __C);
|
||||||
|
}
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_dpwssd_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) __A,
|
||||||
|
(__v4si) __B,
|
||||||
|
(__v4si) __C);
|
||||||
|
}
|
||||||
|
extern __inline __m256i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_dpwssds_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
|
||||||
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) __A,
|
||||||
|
(__v8si) __B,
|
||||||
|
(__v8si) __C);
|
||||||
|
}
|
||||||
|
extern __inline __m128i
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_dpwssds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) __A,
|
||||||
|
(__v4si) __B,
|
||||||
|
(__v4si) __C);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_AVXVNNIVL__
|
||||||
|
#undef __DISABLE_AVXVNNIVL__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
79
third_party/intel/bmi2intrin.internal.h
vendored
79
third_party/intel/bmi2intrin.internal.h
vendored
|
@ -1,67 +1,74 @@
|
||||||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <bmi2intrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _BMI2INTRIN_H_INCLUDED
|
#ifndef _BMI2INTRIN_H_INCLUDED
|
||||||
#define _BMI2INTRIN_H_INCLUDED
|
#define _BMI2INTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __BMI2__
|
#ifndef __BMI2__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("bmi2")
|
#pragma GCC target("bmi2")
|
||||||
#define __DISABLE_BMI2__
|
#define __DISABLE_BMI2__
|
||||||
#endif /* __BMI2__ */
|
#endif
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int _bzhi_u32(unsigned int __X, unsigned int __Y) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_bzhi_u32 (unsigned int __X, unsigned int __Y)
|
||||||
|
{
|
||||||
return __builtin_ia32_bzhi_si (__X, __Y);
|
return __builtin_ia32_bzhi_si (__X, __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int _pdep_u32(unsigned int __X, unsigned int __Y) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_pdep_u32 (unsigned int __X, unsigned int __Y)
|
||||||
|
{
|
||||||
return __builtin_ia32_pdep_si (__X, __Y);
|
return __builtin_ia32_pdep_si (__X, __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int _pext_u32(unsigned int __X, unsigned int __Y) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_pext_u32 (unsigned int __X, unsigned int __Y)
|
||||||
|
{
|
||||||
return __builtin_ia32_pext_si (__X, __Y);
|
return __builtin_ia32_pext_si (__X, __Y);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long _bzhi_u64(unsigned long long __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned long long __Y) {
|
_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
|
||||||
|
{
|
||||||
return __builtin_ia32_bzhi_di (__X, __Y);
|
return __builtin_ia32_bzhi_di (__X, __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long _pdep_u64(unsigned long long __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned long long __Y) {
|
_pdep_u64 (unsigned long long __X, unsigned long long __Y)
|
||||||
|
{
|
||||||
return __builtin_ia32_pdep_di (__X, __Y);
|
return __builtin_ia32_pdep_di (__X, __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long _pext_u64(unsigned long long __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned long long __Y) {
|
_pext_u64 (unsigned long long __X, unsigned long long __Y)
|
||||||
|
{
|
||||||
return __builtin_ia32_pext_di (__X, __Y);
|
return __builtin_ia32_pext_di (__X, __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long _mulx_u64(unsigned long long __X,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned long long __Y,
|
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
|
||||||
unsigned long long *__P) {
|
unsigned long long *__P)
|
||||||
|
{
|
||||||
unsigned __int128 __res = (unsigned __int128) __X * __Y;
|
unsigned __int128 __res = (unsigned __int128) __X * __Y;
|
||||||
*__P = (unsigned long long) (__res >> 64);
|
*__P = (unsigned long long) (__res >> 64);
|
||||||
return (unsigned long long) __res;
|
return (unsigned long long) __res;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
#else /* !__x86_64__ */
|
extern __inline unsigned int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline unsigned int _mulx_u32(unsigned int __X, unsigned int __Y,
|
_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
|
||||||
unsigned int *__P) {
|
{
|
||||||
unsigned long long __res = (unsigned long long) __X * __Y;
|
unsigned long long __res = (unsigned long long) __X * __Y;
|
||||||
*__P = (unsigned int) (__res >> 32);
|
*__P = (unsigned int) (__res >> 32);
|
||||||
return (unsigned int) __res;
|
return (unsigned int) __res;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
#endif /* !__x86_64__ */
|
|
||||||
|
|
||||||
#ifdef __DISABLE_BMI2__
|
#ifdef __DISABLE_BMI2__
|
||||||
#undef __DISABLE_BMI2__
|
#undef __DISABLE_BMI2__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_BMI2__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _BMI2INTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
181
third_party/intel/bmiintrin.internal.h
vendored
181
third_party/intel/bmiintrin.internal.h
vendored
|
@ -1,160 +1,135 @@
|
||||||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <bmiintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _BMIINTRIN_H_INCLUDED
|
#ifndef _BMIINTRIN_H_INCLUDED
|
||||||
#define _BMIINTRIN_H_INCLUDED
|
#define _BMIINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __BMI__
|
#ifndef __BMI__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("bmi")
|
#pragma GCC target("bmi")
|
||||||
#define __DISABLE_BMI__
|
#define __DISABLE_BMI__
|
||||||
#endif /* __BMI__ */
|
#endif
|
||||||
|
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned short
|
__tzcnt_u16 (unsigned short __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__tzcnt_u16(unsigned short __X) {
|
|
||||||
return __builtin_ia32_tzcnt_u16 (__X);
|
return __builtin_ia32_tzcnt_u16 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
__andn_u32 (unsigned int __X, unsigned int __Y)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__andn_u32(unsigned int __X, unsigned int __Y) {
|
|
||||||
return ~__X & __Y;
|
return ~__X & __Y;
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
__bextr_u32 (unsigned int __X, unsigned int __Y)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__bextr_u32(unsigned int __X, unsigned int __Y) {
|
|
||||||
return __builtin_ia32_bextr_u32 (__X, __Y);
|
return __builtin_ia32_bextr_u32 (__X, __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_bextr_u32(unsigned int __X, unsigned int __Y, unsigned __Z) {
|
|
||||||
return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
__blsi_u32 (unsigned int __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__blsi_u32(unsigned int __X) {
|
|
||||||
return __X & -__X;
|
return __X & -__X;
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
_blsi_u32 (unsigned int __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_blsi_u32(unsigned int __X) {
|
|
||||||
return __blsi_u32 (__X);
|
return __blsi_u32 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
__blsmsk_u32 (unsigned int __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__blsmsk_u32(unsigned int __X) {
|
|
||||||
return __X ^ (__X - 1);
|
return __X ^ (__X - 1);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
_blsmsk_u32 (unsigned int __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_blsmsk_u32(unsigned int __X) {
|
|
||||||
return __blsmsk_u32 (__X);
|
return __blsmsk_u32 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
__blsr_u32 (unsigned int __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__blsr_u32(unsigned int __X) {
|
|
||||||
return __X & (__X - 1);
|
return __X & (__X - 1);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
_blsr_u32 (unsigned int __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_blsr_u32(unsigned int __X) {
|
|
||||||
return __blsr_u32 (__X);
|
return __blsr_u32 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
__tzcnt_u32 (unsigned int __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__tzcnt_u32(unsigned int __X) {
|
|
||||||
return __builtin_ia32_tzcnt_u32 (__X);
|
return __builtin_ia32_tzcnt_u32 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned int
|
_tzcnt_u32 (unsigned int __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_tzcnt_u32(unsigned int __X) {
|
|
||||||
return __builtin_ia32_tzcnt_u32 (__X);
|
return __builtin_ia32_tzcnt_u32 (__X);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
extern __inline unsigned long long
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
__andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||||
__andn_u64(unsigned long long __X, unsigned long long __Y) {
|
{
|
||||||
return ~__X & __Y;
|
return ~__X & __Y;
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
__bextr_u64 (unsigned long long __X, unsigned long long __Y)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__bextr_u64(unsigned long long __X, unsigned long long __Y) {
|
|
||||||
return __builtin_ia32_bextr_u64 (__X, __Y);
|
return __builtin_ia32_bextr_u64 (__X, __Y);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) {
|
|
||||||
return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
__blsi_u64 (unsigned long long __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__blsi_u64(unsigned long long __X) {
|
|
||||||
return __X & -__X;
|
return __X & -__X;
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
_blsi_u64 (unsigned long long __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_blsi_u64(unsigned long long __X) {
|
|
||||||
return __blsi_u64 (__X);
|
return __blsi_u64 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
__blsmsk_u64 (unsigned long long __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__blsmsk_u64(unsigned long long __X) {
|
|
||||||
return __X ^ (__X - 1);
|
return __X ^ (__X - 1);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
_blsmsk_u64 (unsigned long long __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_blsmsk_u64(unsigned long long __X) {
|
|
||||||
return __blsmsk_u64 (__X);
|
return __blsmsk_u64 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
__blsr_u64 (unsigned long long __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__blsr_u64(unsigned long long __X) {
|
|
||||||
return __X & (__X - 1);
|
return __X & (__X - 1);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
_blsr_u64 (unsigned long long __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_blsr_u64(unsigned long long __X) {
|
|
||||||
return __blsr_u64 (__X);
|
return __blsr_u64 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
__tzcnt_u64 (unsigned long long __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
__tzcnt_u64(unsigned long long __X) {
|
|
||||||
return __builtin_ia32_tzcnt_u64 (__X);
|
return __builtin_ia32_tzcnt_u64 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
extern __inline unsigned long long
|
_tzcnt_u64 (unsigned long long __X)
|
||||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
{
|
||||||
_tzcnt_u64(unsigned long long __X) {
|
|
||||||
return __builtin_ia32_tzcnt_u64 (__X);
|
return __builtin_ia32_tzcnt_u64 (__X);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
#endif /* __x86_64__ */
|
|
||||||
|
|
||||||
#ifdef __DISABLE_BMI__
|
#ifdef __DISABLE_BMI__
|
||||||
#undef __DISABLE_BMI__
|
#undef __DISABLE_BMI__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_BMI__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _BMIINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
82
third_party/intel/cetintrin.internal.h
vendored
82
third_party/intel/cetintrin.internal.h
vendored
|
@ -1,73 +1,95 @@
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <cetintrin.h> directly; include <x86intrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <cetintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _CETINTRIN_H_INCLUDED
|
#ifndef _CETINTRIN_H_INCLUDED
|
||||||
#define _CETINTRIN_H_INCLUDED
|
#define _CETINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __SHSTK__
|
#ifndef __SHSTK__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target ("shstk")
|
#pragma GCC target ("shstk")
|
||||||
#define __DISABLE_SHSTK__
|
#define __DISABLE_SHSTK__
|
||||||
#endif /* __SHSTK__ */
|
#endif
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__funline unsigned long long _get_ssp(void) {
|
extern __inline unsigned long long
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_get_ssp (void)
|
||||||
|
{
|
||||||
return __builtin_ia32_rdsspq ();
|
return __builtin_ia32_rdsspq ();
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
__funline unsigned int _get_ssp(void) {
|
extern __inline unsigned int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_get_ssp (void)
|
||||||
|
{
|
||||||
return __builtin_ia32_rdsspd ();
|
return __builtin_ia32_rdsspd ();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline void
|
||||||
__funline void _inc_ssp(unsigned int __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_inc_ssp (unsigned int __B)
|
||||||
|
{
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__builtin_ia32_incsspq ((unsigned long long) __B);
|
__builtin_ia32_incsspq ((unsigned long long) __B);
|
||||||
#else
|
#else
|
||||||
__builtin_ia32_incsspd (__B);
|
__builtin_ia32_incsspd (__B);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _saveprevssp(void) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_saveprevssp (void)
|
||||||
|
{
|
||||||
__builtin_ia32_saveprevssp ();
|
__builtin_ia32_saveprevssp ();
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _rstorssp(void *__B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_rstorssp (void *__B)
|
||||||
|
{
|
||||||
__builtin_ia32_rstorssp (__B);
|
__builtin_ia32_rstorssp (__B);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _wrssd(unsigned int __B, void *__C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_wrssd (unsigned int __B, void *__C)
|
||||||
|
{
|
||||||
__builtin_ia32_wrssd (__B, __C);
|
__builtin_ia32_wrssd (__B, __C);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__funline void _wrssq(unsigned long long __B, void *__C) {
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_wrssq (unsigned long long __B, void *__C)
|
||||||
|
{
|
||||||
__builtin_ia32_wrssq (__B, __C);
|
__builtin_ia32_wrssq (__B, __C);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline void
|
||||||
__funline void _wrussd(unsigned int __B, void *__C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_wrussd (unsigned int __B, void *__C)
|
||||||
|
{
|
||||||
__builtin_ia32_wrussd (__B, __C);
|
__builtin_ia32_wrussd (__B, __C);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__funline void _wrussq(unsigned long long __B, void *__C) {
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_wrussq (unsigned long long __B, void *__C)
|
||||||
|
{
|
||||||
__builtin_ia32_wrussq (__B, __C);
|
__builtin_ia32_wrussq (__B, __C);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline void
|
||||||
__funline void _setssbsy(void) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_setssbsy (void)
|
||||||
|
{
|
||||||
__builtin_ia32_setssbsy ();
|
__builtin_ia32_setssbsy ();
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _clrssbsy(void *__B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_clrssbsy (void *__B)
|
||||||
|
{
|
||||||
__builtin_ia32_clrssbsy (__B);
|
__builtin_ia32_clrssbsy (__B);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_SHSTK__
|
#ifdef __DISABLE_SHSTK__
|
||||||
#undef __DISABLE_SHSTK__
|
#undef __DISABLE_SHSTK__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_SHSTK__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _CETINTRIN_H_INCLUDED. */
|
#endif
|
||||||
|
|
21
third_party/intel/cldemoteintrin.internal.h
vendored
21
third_party/intel/cldemoteintrin.internal.h
vendored
|
@ -1,21 +1,24 @@
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <cldemoteintrin.h> directly; include <immintrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <cldemoteintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _CLDEMOTE_H_INCLUDED
|
#ifndef _CLDEMOTE_H_INCLUDED
|
||||||
#define _CLDEMOTE_H_INCLUDED
|
#define _CLDEMOTE_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __CLDEMOTE__
|
#ifndef __CLDEMOTE__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("cldemote")
|
#pragma GCC target("cldemote")
|
||||||
#define __DISABLE_CLDEMOTE__
|
#define __DISABLE_CLDEMOTE__
|
||||||
#endif /* __CLDEMOTE__ */
|
#endif
|
||||||
__funline void _cldemote(void *__A) {
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_cldemote (void *__A)
|
||||||
|
{
|
||||||
__builtin_ia32_cldemote (__A);
|
__builtin_ia32_cldemote (__A);
|
||||||
}
|
}
|
||||||
#ifdef __DISABLE_CLDEMOTE__
|
#ifdef __DISABLE_CLDEMOTE__
|
||||||
#undef __DISABLE_CLDEMOTE__
|
#undef __DISABLE_CLDEMOTE__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_CLDEMOTE__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _CLDEMOTE_H_INCLUDED */
|
#endif
|
||||||
|
|
23
third_party/intel/clflushoptintrin.internal.h
vendored
23
third_party/intel/clflushoptintrin.internal.h
vendored
|
@ -1,23 +1,24 @@
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <clflushoptintrin.h> directly; include <immintrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <clflushoptintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
|
#ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
|
||||||
#define _CLFLUSHOPTINTRIN_H_INCLUDED
|
#define _CLFLUSHOPTINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __CLFLUSHOPT__
|
#ifndef __CLFLUSHOPT__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("clflushopt")
|
#pragma GCC target("clflushopt")
|
||||||
#define __DISABLE_CLFLUSHOPT__
|
#define __DISABLE_CLFLUSHOPT__
|
||||||
#endif /* __CLFLUSHOPT__ */
|
#endif
|
||||||
|
extern __inline void
|
||||||
__funline void _mm_clflushopt(void *__A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_clflushopt (void *__A)
|
||||||
|
{
|
||||||
__builtin_ia32_clflushopt (__A);
|
__builtin_ia32_clflushopt (__A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_CLFLUSHOPT__
|
#ifdef __DISABLE_CLFLUSHOPT__
|
||||||
#undef __DISABLE_CLFLUSHOPT__
|
#undef __DISABLE_CLFLUSHOPT__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_CLFLUSHOPT__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _CLFLUSHOPTINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
23
third_party/intel/clwbintrin.internal.h
vendored
23
third_party/intel/clwbintrin.internal.h
vendored
|
@ -1,23 +1,24 @@
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <clwbintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _CLWBINTRIN_H_INCLUDED
|
#ifndef _CLWBINTRIN_H_INCLUDED
|
||||||
#define _CLWBINTRIN_H_INCLUDED
|
#define _CLWBINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __CLWB__
|
#ifndef __CLWB__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("clwb")
|
#pragma GCC target("clwb")
|
||||||
#define __DISABLE_CLWB__
|
#define __DISABLE_CLWB__
|
||||||
#endif /* __CLWB__ */
|
#endif
|
||||||
|
extern __inline void
|
||||||
__funline void _mm_clwb(void *__A) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_clwb (void *__A)
|
||||||
|
{
|
||||||
__builtin_ia32_clwb (__A);
|
__builtin_ia32_clwb (__A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_CLWB__
|
#ifdef __DISABLE_CLWB__
|
||||||
#undef __DISABLE_CLWB__
|
#undef __DISABLE_CLWB__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_CLWB__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _CLWBINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
19
third_party/intel/clzerointrin.internal.h
vendored
19
third_party/intel/clzerointrin.internal.h
vendored
|
@ -1,21 +1,20 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _CLZEROINTRIN_H_INCLUDED
|
#ifndef _CLZEROINTRIN_H_INCLUDED
|
||||||
#define _CLZEROINTRIN_H_INCLUDED
|
#define _CLZEROINTRIN_H_INCLUDED
|
||||||
#ifdef __x86_64__
|
|
||||||
|
|
||||||
#ifndef __CLZERO__
|
#ifndef __CLZERO__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("clzero")
|
#pragma GCC target("clzero")
|
||||||
#define __DISABLE_CLZERO__
|
#define __DISABLE_CLZERO__
|
||||||
#endif /* __CLZERO__ */
|
#endif
|
||||||
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void _mm_clzero(void* __I) {
|
_mm_clzero (void * __I)
|
||||||
|
{
|
||||||
__builtin_ia32_clzero (__I);
|
__builtin_ia32_clzero (__I);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_CLZERO__
|
#ifdef __DISABLE_CLZERO__
|
||||||
#undef __DISABLE_CLZERO__
|
#undef __DISABLE_CLZERO__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_CLZERO__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* __x86_64__ */
|
#endif
|
||||||
#endif /* _CLZEROINTRIN_H_INCLUDED */
|
|
||||||
|
|
115
third_party/intel/cpuid.internal.h
vendored
115
third_party/intel/cpuid.internal.h
vendored
|
@ -1,8 +1,10 @@
|
||||||
#ifndef COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_
|
/* clang-format off */
|
||||||
#define COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifdef __x86_64__
|
#ifndef _CPUID_H_INCLUDED
|
||||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
#define _CPUID_H_INCLUDED
|
||||||
|
#define bit_AVXVNNI (1 << 4)
|
||||||
|
#define bit_AVX512BF16 (1 << 5)
|
||||||
|
#define bit_HRESET (1 << 22)
|
||||||
#define bit_SSE3 (1 << 0)
|
#define bit_SSE3 (1 << 0)
|
||||||
#define bit_PCLMUL (1 << 1)
|
#define bit_PCLMUL (1 << 1)
|
||||||
#define bit_LZCNT (1 << 5)
|
#define bit_LZCNT (1 << 5)
|
||||||
|
@ -19,14 +21,12 @@
|
||||||
#define bit_AVX (1 << 28)
|
#define bit_AVX (1 << 28)
|
||||||
#define bit_F16C (1 << 29)
|
#define bit_F16C (1 << 29)
|
||||||
#define bit_RDRND (1 << 30)
|
#define bit_RDRND (1 << 30)
|
||||||
|
|
||||||
#define bit_CMPXCHG8B (1 << 8)
|
#define bit_CMPXCHG8B (1 << 8)
|
||||||
#define bit_CMOV (1 << 15)
|
#define bit_CMOV (1 << 15)
|
||||||
#define bit_MMX (1 << 23)
|
#define bit_MMX (1 << 23)
|
||||||
#define bit_FXSAVE (1 << 24)
|
#define bit_FXSAVE (1 << 24)
|
||||||
#define bit_SSE (1 << 25)
|
#define bit_SSE (1 << 25)
|
||||||
#define bit_SSE2 (1 << 26)
|
#define bit_SSE2 (1 << 26)
|
||||||
|
|
||||||
#define bit_LAHF_LM (1 << 0)
|
#define bit_LAHF_LM (1 << 0)
|
||||||
#define bit_ABM (1 << 5)
|
#define bit_ABM (1 << 5)
|
||||||
#define bit_SSE4a (1 << 6)
|
#define bit_SSE4a (1 << 6)
|
||||||
|
@ -36,15 +36,12 @@
|
||||||
#define bit_FMA4 (1 << 16)
|
#define bit_FMA4 (1 << 16)
|
||||||
#define bit_TBM (1 << 21)
|
#define bit_TBM (1 << 21)
|
||||||
#define bit_MWAITX (1 << 29)
|
#define bit_MWAITX (1 << 29)
|
||||||
|
|
||||||
#define bit_MMXEXT (1 << 22)
|
#define bit_MMXEXT (1 << 22)
|
||||||
#define bit_LM (1 << 29)
|
#define bit_LM (1 << 29)
|
||||||
#define bit_3DNOWP (1 << 30)
|
#define bit_3DNOWP (1 << 30)
|
||||||
#define bit_3DNOW (1u << 31)
|
#define bit_3DNOW (1u << 31)
|
||||||
|
|
||||||
#define bit_CLZERO (1 << 0)
|
#define bit_CLZERO (1 << 0)
|
||||||
#define bit_WBNOINVD (1 << 9)
|
#define bit_WBNOINVD (1 << 9)
|
||||||
|
|
||||||
#define bit_FSGSBASE (1 << 0)
|
#define bit_FSGSBASE (1 << 0)
|
||||||
#define bit_SGX (1 << 2)
|
#define bit_SGX (1 << 2)
|
||||||
#define bit_BMI (1 << 3)
|
#define bit_BMI (1 << 3)
|
||||||
|
@ -66,7 +63,6 @@
|
||||||
#define bit_SHA (1 << 29)
|
#define bit_SHA (1 << 29)
|
||||||
#define bit_AVX512BW (1 << 30)
|
#define bit_AVX512BW (1 << 30)
|
||||||
#define bit_AVX512VL (1u << 31)
|
#define bit_AVX512VL (1u << 31)
|
||||||
|
|
||||||
#define bit_PREFETCHWT1 (1 << 0)
|
#define bit_PREFETCHWT1 (1 << 0)
|
||||||
#define bit_AVX512VBMI (1 << 1)
|
#define bit_AVX512VBMI (1 << 1)
|
||||||
#define bit_PKU (1 << 3)
|
#define bit_PKU (1 << 3)
|
||||||
|
@ -83,97 +79,76 @@
|
||||||
#define bit_RDPID (1 << 22)
|
#define bit_RDPID (1 << 22)
|
||||||
#define bit_MOVDIRI (1 << 27)
|
#define bit_MOVDIRI (1 << 27)
|
||||||
#define bit_MOVDIR64B (1 << 28)
|
#define bit_MOVDIR64B (1 << 28)
|
||||||
|
#define bit_ENQCMD (1 << 29)
|
||||||
#define bit_CLDEMOTE (1 << 25)
|
#define bit_CLDEMOTE (1 << 25)
|
||||||
|
#define bit_KL (1 << 23)
|
||||||
#define bit_AVX5124VNNIW (1 << 2)
|
#define bit_AVX5124VNNIW (1 << 2)
|
||||||
#define bit_AVX5124FMAPS (1 << 3)
|
#define bit_AVX5124FMAPS (1 << 3)
|
||||||
|
#define bit_AVX512VP2INTERSECT (1 << 8)
|
||||||
#define bit_IBT (1 << 20)
|
#define bit_IBT (1 << 20)
|
||||||
|
#define bit_UINTR (1 << 5)
|
||||||
#define bit_PCONFIG (1 << 18)
|
#define bit_PCONFIG (1 << 18)
|
||||||
|
#define bit_SERIALIZE (1 << 14)
|
||||||
|
#define bit_TSXLDTRK (1 << 16)
|
||||||
|
#define bit_AMX_BF16 (1 << 22)
|
||||||
|
#define bit_AMX_TILE (1 << 24)
|
||||||
|
#define bit_AMX_INT8 (1 << 25)
|
||||||
#define bit_BNDREGS (1 << 3)
|
#define bit_BNDREGS (1 << 3)
|
||||||
#define bit_BNDCSR (1 << 4)
|
#define bit_BNDCSR (1 << 4)
|
||||||
|
|
||||||
#define bit_XSAVEOPT (1 << 0)
|
#define bit_XSAVEOPT (1 << 0)
|
||||||
#define bit_XSAVEC (1 << 1)
|
#define bit_XSAVEC (1 << 1)
|
||||||
#define bit_XSAVES (1 << 3)
|
#define bit_XSAVES (1 << 3)
|
||||||
|
|
||||||
#define bit_PTWRITE (1 << 4)
|
#define bit_PTWRITE (1 << 4)
|
||||||
|
#define bit_AESKLE ( 1<<0 )
|
||||||
|
#define bit_WIDEKL ( 1<<2 )
|
||||||
#define signature_AMD_ebx 0x68747541
|
#define signature_AMD_ebx 0x68747541
|
||||||
#define signature_AMD_ecx 0x444d4163
|
#define signature_AMD_ecx 0x444d4163
|
||||||
#define signature_AMD_edx 0x69746e65
|
#define signature_AMD_edx 0x69746e65
|
||||||
|
|
||||||
#define signature_CENTAUR_ebx 0x746e6543
|
#define signature_CENTAUR_ebx 0x746e6543
|
||||||
#define signature_CENTAUR_ecx 0x736c7561
|
#define signature_CENTAUR_ecx 0x736c7561
|
||||||
#define signature_CENTAUR_edx 0x48727561
|
#define signature_CENTAUR_edx 0x48727561
|
||||||
|
|
||||||
#define signature_CYRIX_ebx 0x69727943
|
#define signature_CYRIX_ebx 0x69727943
|
||||||
#define signature_CYRIX_ecx 0x64616574
|
#define signature_CYRIX_ecx 0x64616574
|
||||||
#define signature_CYRIX_edx 0x736e4978
|
#define signature_CYRIX_edx 0x736e4978
|
||||||
|
|
||||||
#define signature_INTEL_ebx 0x756e6547
|
#define signature_INTEL_ebx 0x756e6547
|
||||||
#define signature_INTEL_ecx 0x6c65746e
|
#define signature_INTEL_ecx 0x6c65746e
|
||||||
#define signature_INTEL_edx 0x49656e69
|
#define signature_INTEL_edx 0x49656e69
|
||||||
|
|
||||||
#define signature_TM1_ebx 0x6e617254
|
#define signature_TM1_ebx 0x6e617254
|
||||||
#define signature_TM1_ecx 0x55504361
|
#define signature_TM1_ecx 0x55504361
|
||||||
#define signature_TM1_edx 0x74656d73
|
#define signature_TM1_edx 0x74656d73
|
||||||
|
|
||||||
#define signature_TM2_ebx 0x756e6547
|
#define signature_TM2_ebx 0x756e6547
|
||||||
#define signature_TM2_ecx 0x3638784d
|
#define signature_TM2_ecx 0x3638784d
|
||||||
#define signature_TM2_edx 0x54656e69
|
#define signature_TM2_edx 0x54656e69
|
||||||
|
|
||||||
#define signature_NSC_ebx 0x646f6547
|
#define signature_NSC_ebx 0x646f6547
|
||||||
#define signature_NSC_ecx 0x43534e20
|
#define signature_NSC_ecx 0x43534e20
|
||||||
#define signature_NSC_edx 0x79622065
|
#define signature_NSC_edx 0x79622065
|
||||||
|
|
||||||
#define signature_NEXGEN_ebx 0x4778654e
|
#define signature_NEXGEN_ebx 0x4778654e
|
||||||
#define signature_NEXGEN_ecx 0x6e657669
|
#define signature_NEXGEN_ecx 0x6e657669
|
||||||
#define signature_NEXGEN_edx 0x72446e65
|
#define signature_NEXGEN_edx 0x72446e65
|
||||||
|
|
||||||
#define signature_RISE_ebx 0x65736952
|
#define signature_RISE_ebx 0x65736952
|
||||||
#define signature_RISE_ecx 0x65736952
|
#define signature_RISE_ecx 0x65736952
|
||||||
#define signature_RISE_edx 0x65736952
|
#define signature_RISE_edx 0x65736952
|
||||||
|
|
||||||
#define signature_SIS_ebx 0x20536953
|
#define signature_SIS_ebx 0x20536953
|
||||||
#define signature_SIS_ecx 0x20536953
|
#define signature_SIS_ecx 0x20536953
|
||||||
#define signature_SIS_edx 0x20536953
|
#define signature_SIS_edx 0x20536953
|
||||||
|
|
||||||
#define signature_UMC_ebx 0x20434d55
|
#define signature_UMC_ebx 0x20434d55
|
||||||
#define signature_UMC_ecx 0x20434d55
|
#define signature_UMC_ecx 0x20434d55
|
||||||
#define signature_UMC_edx 0x20434d55
|
#define signature_UMC_edx 0x20434d55
|
||||||
|
|
||||||
#define signature_VIA_ebx 0x20414956
|
#define signature_VIA_ebx 0x20414956
|
||||||
#define signature_VIA_ecx 0x20414956
|
#define signature_VIA_ecx 0x20414956
|
||||||
#define signature_VIA_edx 0x20414956
|
#define signature_VIA_edx 0x20414956
|
||||||
|
|
||||||
#define signature_VORTEX_ebx 0x74726f56
|
#define signature_VORTEX_ebx 0x74726f56
|
||||||
#define signature_VORTEX_ecx 0x436f5320
|
#define signature_VORTEX_ecx 0x436f5320
|
||||||
#define signature_VORTEX_edx 0x36387865
|
#define signature_VORTEX_edx 0x36387865
|
||||||
|
|
||||||
#ifndef __x86_64__
|
#ifndef __x86_64__
|
||||||
|
#define __cpuid(level, a, b, c, d) do { if (__builtin_constant_p (level) && (level) != 1) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level)); else __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level), "1" (0), "2" (0)); } while (0)
|
||||||
#define __cpuid(level, a, b, c, d) \
|
|
||||||
do { \
|
|
||||||
if (__builtin_constant_p(level) && (level) != 1) \
|
|
||||||
__asm__("cpuid\n\t" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(level)); \
|
|
||||||
else \
|
|
||||||
__asm__("cpuid\n\t" \
|
|
||||||
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \
|
|
||||||
: "0"(level), "1"(0), "2"(0)); \
|
|
||||||
} while (0)
|
|
||||||
#else
|
#else
|
||||||
#define __cpuid(level, a, b, c, d) \
|
#define __cpuid(level, a, b, c, d) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level))
|
||||||
__asm__("cpuid\n\t" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(level))
|
|
||||||
#endif
|
#endif
|
||||||
|
#define __cpuid_count(level, count, a, b, c, d) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level), "2" (count))
|
||||||
#define __cpuid_count(level, count, a, b, c, d) \
|
static __inline unsigned int
|
||||||
__asm__("cpuid\n\t" \
|
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
|
||||||
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \
|
{
|
||||||
: "0"(level), "2"(count))
|
|
||||||
|
|
||||||
static __inline unsigned int __get_cpuid_max(unsigned int __ext,
|
|
||||||
unsigned int *__sig) {
|
|
||||||
unsigned int __eax, __ebx, __ecx, __edx;
|
unsigned int __eax, __ebx, __ecx, __edx;
|
||||||
#ifndef __x86_64__
|
#ifndef __x86_64__
|
||||||
#if __GNUC__ >= 3
|
#if __GNUC__ >= 3
|
||||||
|
@ -203,35 +178,43 @@ static __inline unsigned int __get_cpuid_max(unsigned int __ext,
|
||||||
: "=&r" (__eax), "=&r" (__ebx)
|
: "=&r" (__eax), "=&r" (__ebx)
|
||||||
: "i" (0x00200000));
|
: "i" (0x00200000));
|
||||||
#endif
|
#endif
|
||||||
if (!((__eax ^ __ebx) & 0x00200000)) return 0;
|
if (!((__eax ^ __ebx) & 0x00200000))
|
||||||
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
|
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
|
||||||
if (__sig) *__sig = __ebx;
|
if (__sig)
|
||||||
|
*__sig = __ebx;
|
||||||
return __eax;
|
return __eax;
|
||||||
}
|
}
|
||||||
|
static __inline int
|
||||||
static __inline int __get_cpuid(unsigned int __leaf, unsigned int *__eax,
|
__get_cpuid (unsigned int __leaf,
|
||||||
unsigned int *__ebx, unsigned int *__ecx,
|
unsigned int *__eax, unsigned int *__ebx,
|
||||||
unsigned int *__edx) {
|
unsigned int *__ecx, unsigned int *__edx)
|
||||||
|
{
|
||||||
unsigned int __ext = __leaf & 0x80000000;
|
unsigned int __ext = __leaf & 0x80000000;
|
||||||
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
||||||
if (__maxlevel == 0 || __maxlevel < __leaf) return 0;
|
if (__maxlevel == 0 || __maxlevel < __leaf)
|
||||||
|
return 0;
|
||||||
__cpuid (__leaf, *__eax, *__ebx, *__ecx, *__edx);
|
__cpuid (__leaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
static __inline int
|
||||||
static __inline int __get_cpuid_count(unsigned int __leaf,
|
__get_cpuid_count (unsigned int __leaf, unsigned int __subleaf,
|
||||||
unsigned int __subleaf,
|
|
||||||
unsigned int *__eax, unsigned int *__ebx,
|
unsigned int *__eax, unsigned int *__ebx,
|
||||||
unsigned int *__ecx,
|
unsigned int *__ecx, unsigned int *__edx)
|
||||||
unsigned int *__edx) {
|
{
|
||||||
unsigned int __ext = __leaf & 0x80000000;
|
unsigned int __ext = __leaf & 0x80000000;
|
||||||
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
||||||
if (__maxlevel == 0 || __maxlevel < __leaf) return 0;
|
if (__maxlevel == 0 || __maxlevel < __leaf)
|
||||||
|
return 0;
|
||||||
__cpuid_count (__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
|
__cpuid_count (__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
static __inline void
|
||||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
__cpuidex (int __cpuid_info[4], int __leaf, int __subleaf)
|
||||||
#endif /* __x86_64__ */
|
{
|
||||||
#endif /* COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_ */
|
__cpuid_count (__leaf, __subleaf, __cpuid_info[0], __cpuid_info[1],
|
||||||
|
__cpuid_info[2], __cpuid_info[3]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
1291
third_party/intel/emmintrin.internal.h
vendored
1291
third_party/intel/emmintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
30
third_party/intel/enqcmdintrin.internal.h
vendored
Normal file
30
third_party/intel/enqcmdintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <enqcmdintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _ENQCMDINTRIN_H_INCLUDED
|
||||||
|
#define _ENQCMDINTRIN_H_INCLUDED
|
||||||
|
#ifndef __ENQCMD__
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target ("enqcmd")
|
||||||
|
#define __DISABLE_ENQCMD__
|
||||||
|
#endif
|
||||||
|
extern __inline int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_enqcmd (void * __P, const void * __Q)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_enqcmd (__P, __Q);
|
||||||
|
}
|
||||||
|
extern __inline int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_enqcmds (void * __P, const void * __Q)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_enqcmds (__P, __Q);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_ENQCMD__
|
||||||
|
#undef __DISABLE_ENQCMD__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
75
third_party/intel/f16cintrin.internal.h
vendored
75
third_party/intel/f16cintrin.internal.h
vendored
|
@ -1,75 +1,58 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||||
#error \
|
# error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
|
||||||
"Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _F16CINTRIN_H_INCLUDED
|
#ifndef _F16CINTRIN_H_INCLUDED
|
||||||
#define _F16CINTRIN_H_INCLUDED
|
#define _F16CINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __F16C__
|
#ifndef __F16C__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("f16c")
|
#pragma GCC target("f16c")
|
||||||
#define __DISABLE_F16C__
|
#define __DISABLE_F16C__
|
||||||
#endif /* __F16C__ */
|
#endif
|
||||||
|
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline float _cvtsh_ss(unsigned short __S) {
|
_cvtsh_ss (unsigned short __S)
|
||||||
|
{
|
||||||
__v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
|
__v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
__v4sf __A = __builtin_ia32_vcvtph2ps (__H);
|
__v4sf __A = __builtin_ia32_vcvtph2ps (__H);
|
||||||
return __builtin_ia32_vec_ext_v4sf (__A, 0);
|
return __builtin_ia32_vec_ext_v4sf (__A, 0);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
/**
|
_mm_cvtph_ps (__m128i __A)
|
||||||
* Converts four half-precision (16-bit) floating point values to
|
{
|
||||||
* single-precision floating point values.
|
|
||||||
*/
|
|
||||||
__funline __m128 _mm_cvtph_ps(__m128i __A) {
|
|
||||||
return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
|
return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
/**
|
_mm256_cvtph_ps (__m128i __A)
|
||||||
* Converts eight half-precision (16-bit) floating point values to
|
{
|
||||||
* single-precision floating point values.
|
|
||||||
*/
|
|
||||||
__funline __m256 _mm256_cvtph_ps(__m128i __A) {
|
|
||||||
return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
|
return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline unsigned short _cvtss_sh(float __F, const int __I) {
|
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_cvtss_sh (float __F, const int __I)
|
||||||
|
{
|
||||||
__v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 };
|
__v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 };
|
||||||
__v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
|
__v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
|
||||||
return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
|
return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m128i _mm_cvtps_ph(__m128 __A, const int __I) {
|
_mm_cvtps_ph (__m128 __A, const int __I)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
|
return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
/**
|
_mm256_cvtps_ph (__m256 __A, const int __I)
|
||||||
* Converts eight single-precision floating point values to
|
{
|
||||||
* half-precision (16-bit) floating point values.
|
|
||||||
*/
|
|
||||||
__funline __m128i _mm256_cvtps_ph(__m256 __A, const int __I) {
|
|
||||||
return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
|
return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _cvtss_sh(__F, __I) \
|
#define _cvtss_sh(__F, __I) (__extension__ ({ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); }))
|
||||||
(__extension__({ \
|
#define _mm_cvtps_ph(A, I) ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) (A), (int) (I)))
|
||||||
__v4sf __A = __extension__(__v4sf){__F, 0, 0, 0}; \
|
#define _mm256_cvtps_ph(A, I) ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) (A), (int) (I)))
|
||||||
__v8hi __H = __builtin_ia32_vcvtps2ph(__A, __I); \
|
#endif
|
||||||
(unsigned short)__builtin_ia32_vec_ext_v8hi(__H, 0); \
|
|
||||||
}))
|
|
||||||
|
|
||||||
#define _mm_cvtps_ph(A, I) \
|
|
||||||
((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)A, (int)(I)))
|
|
||||||
|
|
||||||
#define _mm256_cvtps_ph(A, I) \
|
|
||||||
((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)A, (int)(I)))
|
|
||||||
#endif /* __OPTIMIZE */
|
|
||||||
|
|
||||||
#ifdef __DISABLE_F16C__
|
#ifdef __DISABLE_F16C__
|
||||||
#undef __DISABLE_F16C__
|
#undef __DISABLE_F16C__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_F16C__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _F16CINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
299
third_party/intel/fma4intrin.internal.h
vendored
299
third_party/intel/fma4intrin.internal.h
vendored
|
@ -1,184 +1,179 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _X86INTRIN_H_INCLUDED
|
#ifndef _X86INTRIN_H_INCLUDED
|
||||||
# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
|
# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _FMA4INTRIN_H_INCLUDED
|
#ifndef _FMA4INTRIN_H_INCLUDED
|
||||||
#define _FMA4INTRIN_H_INCLUDED
|
#define _FMA4INTRIN_H_INCLUDED
|
||||||
|
|
||||||
#include "third_party/intel/ammintrin.internal.h"
|
#include "third_party/intel/ammintrin.internal.h"
|
||||||
|
|
||||||
#ifndef __FMA4__
|
#ifndef __FMA4__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("fma4")
|
#pragma GCC target("fma4")
|
||||||
#define __DISABLE_FMA4__
|
#define __DISABLE_FMA4__
|
||||||
#endif /* __FMA4__ */
|
#endif
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m128d _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
_mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
{
|
||||||
(__v2df)__C);
|
return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
_mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m128d _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
_mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
|
|
||||||
(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
|
|
||||||
|
|
||||||
{
|
{
|
||||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B,
|
return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||||
-(__v4sf)__C);
|
|
||||||
}
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m128d _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
_mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
|
||||||
-(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
|
||||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B,
|
|
||||||
-(__v4sf)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128d _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
|
||||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
|
|
||||||
-(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
|
||||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
|
|
||||||
(__v4sf)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128d _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
|
||||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
|
|
||||||
(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
|
||||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
|
|
||||||
(__v4sf)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128d _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
|
||||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
|
|
||||||
(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
|
||||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
|
|
||||||
-(__v4sf)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128d _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
|
||||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
|
|
||||||
-(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
|
||||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
|
|
||||||
-(__v4sf)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128d _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
|
||||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
|
|
||||||
-(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
|
||||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
|
||||||
(__v4sf)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128d _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
|
||||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
|
||||||
(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
|
||||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
|
||||||
-(__v4sf)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m128d _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
|
||||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
|
||||||
-(__v2df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 256b Floating point multiply/add type instructions. */
|
|
||||||
__funline __m256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
|
||||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
|
||||||
(__v8sf)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m256d _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
|
||||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
|
||||||
(__v4df)__C);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline __m256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
|
|
||||||
|
|
||||||
{
|
{
|
||||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||||
-(__v8sf)__C);
|
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256d _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
_mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
{
|
||||||
-(__v4df)__C);
|
return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
_mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
|
{
|
||||||
(__v8sf)__C);
|
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256d _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
_mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
|
{
|
||||||
(__v4df)__C);
|
return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
_mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
|
{
|
||||||
-(__v8sf)__C);
|
return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256d _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
_mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
|
{
|
||||||
-(__v4df)__C);
|
return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
_mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
{
|
||||||
(__v8sf)__C);
|
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256d _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
_mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
{
|
||||||
(__v4df)__C);
|
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
_mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
{
|
||||||
-(__v8sf)__C);
|
return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m256d _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
_mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
{
|
||||||
-(__v4df)__C);
|
return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
|
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
|
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
|
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
|
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
|
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
|
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
|
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||||
|
}
|
||||||
|
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
|
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_FMA4__
|
#ifdef __DISABLE_FMA4__
|
||||||
#undef __DISABLE_FMA4__
|
#undef __DISABLE_FMA4__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_FMA4__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
221
third_party/intel/fmaintrin.internal.h
vendored
221
third_party/intel/fmaintrin.internal.h
vendored
|
@ -1,177 +1,246 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
|
# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _FMAINTRIN_H_INCLUDED
|
#ifndef _FMAINTRIN_H_INCLUDED
|
||||||
#define _FMAINTRIN_H_INCLUDED
|
#define _FMAINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __FMA__
|
#ifndef __FMA__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("fma")
|
#pragma GCC target("fma")
|
||||||
#define __DISABLE_FMA__
|
#define __DISABLE_FMA__
|
||||||
#endif /* __FMA__ */
|
#endif
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256d
|
||||||
__funline __m256d _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
|
return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||||
(__v4df)__C);
|
(__v4df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
|
return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||||
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256
|
||||||
__funline __m256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||||
(__v8sf)__C);
|
(__v8sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
|
return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256d
|
||||||
__funline __m256d _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
|
return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
|
||||||
(__v4df)__C);
|
(__v4df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
|
return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B,
|
||||||
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256
|
||||||
__funline __m256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
||||||
(__v8sf)__C);
|
(__v8sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256d
|
||||||
__funline __m256d _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B,
|
return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||||
(__v4df)__C);
|
(__v4df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B,
|
return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256
|
||||||
__funline __m256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||||
(__v8sf)__C);
|
(__v8sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256d
|
||||||
__funline __m256d _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
|
return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
|
||||||
(__v4df)__C);
|
(__v4df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
|
return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256
|
||||||
__funline __m256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
||||||
(__v8sf)__C);
|
(__v8sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
||||||
(__v2df)__C);
|
(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256d
|
||||||
__funline __m256d _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
|
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
|
||||||
|
(__v4df)__B,
|
||||||
(__v4df)__C);
|
(__v4df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
||||||
(__v4sf)__C);
|
(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256
|
||||||
__funline __m256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
|
||||||
|
(__v8sf)__B,
|
||||||
(__v8sf)__C);
|
(__v8sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128d
|
||||||
__funline __m128d _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||||
|
{
|
||||||
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
||||||
-(__v2df)__C);
|
-(__v2df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256d
|
||||||
__funline __m256d _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||||
|
{
|
||||||
|
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
|
||||||
|
(__v4df)__B,
|
||||||
-(__v4df)__C);
|
-(__v4df)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128
|
||||||
__funline __m128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||||
|
{
|
||||||
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
||||||
-(__v4sf)__C);
|
-(__v4sf)__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256
|
||||||
__funline __m256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||||
|
{
|
||||||
|
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
|
||||||
|
(__v8sf)__B,
|
||||||
-(__v8sf)__C);
|
-(__v8sf)__C);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_FMA__
|
#ifdef __DISABLE_FMA__
|
||||||
#undef __DISABLE_FMA__
|
#undef __DISABLE_FMA__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_FMA__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
41
third_party/intel/fxsrintrin.internal.h
vendored
41
third_party/intel/fxsrintrin.internal.h
vendored
|
@ -1,37 +1,44 @@
|
||||||
#if !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <fxsrintrin.h> directly; include <immintrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <fxsrintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _FXSRINTRIN_H_INCLUDED
|
#ifndef _FXSRINTRIN_H_INCLUDED
|
||||||
#define _FXSRINTRIN_H_INCLUDED
|
#define _FXSRINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __FXSR__
|
#ifndef __FXSR__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("fxsr")
|
#pragma GCC target("fxsr")
|
||||||
#define __DISABLE_FXSR__
|
#define __DISABLE_FXSR__
|
||||||
#endif /* __FXSR__ */
|
#endif
|
||||||
|
extern __inline void
|
||||||
__funline void _fxsave(void *__P) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_fxsave (void *__P)
|
||||||
|
{
|
||||||
__builtin_ia32_fxsave (__P);
|
__builtin_ia32_fxsave (__P);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _fxrstor(void *__P) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_fxrstor (void *__P)
|
||||||
|
{
|
||||||
__builtin_ia32_fxrstor (__P);
|
__builtin_ia32_fxrstor (__P);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__funline void _fxsave64(void *__P) {
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_fxsave64 (void *__P)
|
||||||
|
{
|
||||||
__builtin_ia32_fxsave64 (__P);
|
__builtin_ia32_fxsave64 (__P);
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void _fxrstor64(void *__P) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_fxrstor64 (void *__P)
|
||||||
|
{
|
||||||
__builtin_ia32_fxrstor64 (__P);
|
__builtin_ia32_fxrstor64 (__P);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_FXSR__
|
#ifdef __DISABLE_FXSR__
|
||||||
#undef __DISABLE_FXSR__
|
#undef __DISABLE_FXSR__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_FXSR__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _FXSRINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
437
third_party/intel/gfniintrin.internal.h
vendored
437
third_party/intel/gfniintrin.internal.h
vendored
|
@ -1,311 +1,310 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
|
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _GFNIINTRIN_H_INCLUDED
|
#ifndef _GFNIINTRIN_H_INCLUDED
|
||||||
#define _GFNIINTRIN_H_INCLUDED
|
#define _GFNIINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#if !defined(__GFNI__) || !defined(__SSE2__)
|
#if !defined(__GFNI__) || !defined(__SSE2__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("gfni,sse2")
|
#pragma GCC target("gfni,sse2")
|
||||||
#define __DISABLE_GFNI__
|
#define __DISABLE_GFNI__
|
||||||
#endif /* __GFNI__ */
|
#endif
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi((__v16qi)__A, (__v16qi)__B);
|
_mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
|
||||||
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
|
||||||
|
(__v16qi) __B);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m128i _mm_gf2p8affineinv_epi64_epi8(__m128i __A, __m128i __B,
|
extern __inline __m128i
|
||||||
const int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
|
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
|
||||||
(__v16qi)__B, __C);
|
(__v16qi) __B,
|
||||||
|
__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_gf2p8affine_epi64_epi8(__m128i __A, __m128i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
const int __C) {
|
_mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
|
||||||
|
{
|
||||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
|
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
|
||||||
(__v16qi) __B, __C);
|
(__v16qi) __B, __C);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) \
|
#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi( \
|
#define _mm_gf2p8affine_epi64_epi8(A, B, C) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||||
(__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
|
||||||
#define _mm_gf2p8affine_epi64_epi8(A, B, C) \
|
|
||||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi( \
|
|
||||||
(__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_GFNI__
|
#ifdef __DISABLE_GFNI__
|
||||||
#undef __DISABLE_GFNI__
|
#undef __DISABLE_GFNI__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_GFNI__ */
|
#endif
|
||||||
|
|
||||||
#if !defined(__GFNI__) || !defined(__AVX__)
|
#if !defined(__GFNI__) || !defined(__AVX__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("gfni,avx")
|
#pragma GCC target("gfni,avx")
|
||||||
#define __DISABLE_GFNIAVX__
|
#define __DISABLE_GFNIAVX__
|
||||||
#endif /* __GFNIAVX__ */
|
#endif
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi((__v32qi)__A, (__v32qi)__B);
|
_mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
|
||||||
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
|
||||||
|
(__v32qi) __B);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m256i _mm256_gf2p8affineinv_epi64_epi8(__m256i __A, __m256i __B,
|
extern __inline __m256i
|
||||||
const int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
|
||||||
|
{
|
||||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
|
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
|
||||||
(__v32qi)__B, __C);
|
(__v32qi) __B,
|
||||||
|
__C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_gf2p8affine_epi64_epi8(__m256i __A, __m256i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
const int __C) {
|
_mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
|
||||||
|
{
|
||||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
|
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
|
||||||
(__v32qi) __B, __C);
|
(__v32qi) __B, __C);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) \
|
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
||||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi( \
|
#define _mm256_gf2p8affine_epi64_epi8(A, B, C) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A), ( __v32qi)(__m256i)(B), (int)(C)))
|
||||||
(__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
|
||||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, C) \
|
|
||||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi( \
|
|
||||||
(__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_GFNIAVX__
|
#ifdef __DISABLE_GFNIAVX__
|
||||||
#undef __DISABLE_GFNIAVX__
|
#undef __DISABLE_GFNIAVX__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __GFNIAVX__ */
|
#endif
|
||||||
|
|
||||||
#if !defined(__GFNI__) || !defined(__AVX512VL__)
|
#if !defined(__GFNI__) || !defined(__AVX512VL__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("gfni,avx512vl")
|
#pragma GCC target("gfni,avx512vl")
|
||||||
#define __DISABLE_GFNIAVX512VL__
|
#define __DISABLE_GFNIAVX512VL__
|
||||||
#endif /* __GFNIAVX512VL__ */
|
#endif
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask_gf2p8mul_epi8(__m128i __A, __mmask16 __B, __m128i __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __D) {
|
_mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
|
||||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
|
{
|
||||||
(__v16qi)__C, (__v16qi)__D, (__v16qi)__A, __B);
|
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
|
||||||
|
(__v16qi) __D,
|
||||||
|
(__v16qi)__A, __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_gf2p8mul_epi8(__mmask16 __A, __m128i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __C) {
|
_mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
|
||||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
|
{
|
||||||
(__v16qi)__B, (__v16qi)__C, (__v16qi)_mm_setzero_si128(), __A);
|
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
|
||||||
|
(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m128i _mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A, __mmask16 __B,
|
extern __inline __m128i
|
||||||
__m128i __C, __m128i __D,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
const int __E) {
|
_mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
|
||||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
|
__m128i __D, const int __E)
|
||||||
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
|
||||||
|
(__v16qi) __D,
|
||||||
|
__E,
|
||||||
|
(__v16qi)__A,
|
||||||
|
__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A, __m128i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __C,
|
_mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
|
||||||
const int __D) {
|
const int __D)
|
||||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
|
{
|
||||||
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
|
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
|
||||||
|
(__v16qi) __C, __D,
|
||||||
|
(__v16qi) _mm_setzero_si128 (),
|
||||||
|
__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_mask_gf2p8affine_epi64_epi8(__m128i __A, __mmask16 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __C, __m128i __D,
|
_mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
|
||||||
const int __E) {
|
__m128i __D, const int __E)
|
||||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
|
{
|
||||||
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
|
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
|
||||||
|
(__v16qi) __D, __E, (__v16qi)__A, __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m128i
|
||||||
__funline __m128i _mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A, __m128i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m128i __C, const int __D) {
|
_mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
|
||||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
|
const int __D)
|
||||||
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
|
{
|
||||||
|
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
|
||||||
|
(__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( (__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
|
#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( (__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
|
||||||
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), \
|
#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||||
(__v16qi)(__m128i)(A), (__mmask16)(B)))
|
#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
|
||||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
|
||||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
|
|
||||||
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), \
|
|
||||||
(__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)(A)))
|
|
||||||
#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
|
||||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( \
|
|
||||||
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), \
|
|
||||||
(__v16qi)(__m128i)(A), (__mmask16)(B)))
|
|
||||||
#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
|
||||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( \
|
|
||||||
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), \
|
|
||||||
(__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)(A)))
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_GFNIAVX512VL__
|
#ifdef __DISABLE_GFNIAVX512VL__
|
||||||
#undef __DISABLE_GFNIAVX512VL__
|
#undef __DISABLE_GFNIAVX512VL__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __GFNIAVX512VL__ */
|
#endif
|
||||||
|
|
||||||
#if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
#if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("gfni,avx512vl,avx512bw")
|
#pragma GCC target("gfni,avx512vl,avx512bw")
|
||||||
#define __DISABLE_GFNIAVX512VLBW__
|
#define __DISABLE_GFNIAVX512VLBW__
|
||||||
#endif /* __GFNIAVX512VLBW__ */
|
#endif
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_gf2p8mul_epi8(__m256i __A, __mmask32 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C, __m256i __D) {
|
_mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
|
||||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
|
__m256i __D)
|
||||||
(__v32qi)__C, (__v32qi)__D, (__v32qi)__A, __B);
|
{
|
||||||
|
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
|
||||||
|
(__v32qi) __D,
|
||||||
|
(__v32qi)__A, __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_gf2p8mul_epi8(__mmask32 __A, __m256i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C) {
|
_mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
|
||||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
|
{
|
||||||
(__v32qi)__B, (__v32qi)__C, (__v32qi)_mm256_setzero_si256(), __A);
|
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
|
||||||
|
(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m256i _mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A,
|
extern __inline __m256i
|
||||||
__mmask32 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C, __m256i __D,
|
_mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
|
||||||
const int __E) {
|
__m256i __C, __m256i __D, const int __E)
|
||||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
|
{
|
||||||
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
|
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
|
||||||
|
(__v32qi) __D,
|
||||||
|
__E,
|
||||||
|
(__v32qi)__A,
|
||||||
|
__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __B, __m256i __C,
|
_mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
|
||||||
const int __D) {
|
__m256i __C, const int __D)
|
||||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
|
{
|
||||||
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
|
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
|
||||||
|
(__v32qi) __C, __D,
|
||||||
|
(__v32qi) _mm256_setzero_si256 (), __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_mask_gf2p8affine_epi64_epi8(__m256i __A, __mmask32 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C, __m256i __D,
|
_mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
|
||||||
const int __E) {
|
__m256i __D, const int __E)
|
||||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
|
{
|
||||||
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
|
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
|
||||||
|
(__v32qi) __D,
|
||||||
|
__E,
|
||||||
|
(__v32qi)__A,
|
||||||
|
__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m256i
|
||||||
__funline __m256i _mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A, __m256i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m256i __C,
|
_mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
|
||||||
const int __D) {
|
__m256i __C, const int __D)
|
||||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
|
{
|
||||||
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
|
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
|
||||||
|
(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( (__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
|
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( (__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
|
||||||
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
|
#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||||
(__v32qi)(__m256i)(A), (__mmask32)(B)))
|
#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
|
||||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
|
||||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
|
|
||||||
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
|
|
||||||
(__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)(A)))
|
|
||||||
#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
|
||||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( \
|
|
||||||
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
|
|
||||||
(__v32qi)(__m256i)(A), (__mmask32)(B)))
|
|
||||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
|
||||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( \
|
|
||||||
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
|
|
||||||
(__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)(A)))
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_GFNIAVX512VLBW__
|
#ifdef __DISABLE_GFNIAVX512VLBW__
|
||||||
#undef __DISABLE_GFNIAVX512VLBW__
|
#undef __DISABLE_GFNIAVX512VLBW__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __GFNIAVX512VLBW__ */
|
#endif
|
||||||
|
|
||||||
#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
|
#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("gfni,avx512f,avx512bw")
|
#pragma GCC target("gfni,avx512f,avx512bw")
|
||||||
#define __DISABLE_GFNIAVX512FBW__
|
#define __DISABLE_GFNIAVX512FBW__
|
||||||
#endif /* __GFNIAVX512FBW__ */
|
#endif
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_gf2p8mul_epi8(__m512i __A, __mmask64 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D) {
|
_mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
|
__m512i __D)
|
||||||
(__v64qi)__C, (__v64qi)__D, (__v64qi)__A, __B);
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
|
||||||
|
(__v64qi) __D, (__v64qi)__A, __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_gf2p8mul_epi8(__mmask64 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C) {
|
_mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
|
||||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
|
{
|
||||||
(__v64qi)__B, (__v64qi)__C, (__v64qi)_mm512_setzero_si512(), __A);
|
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
|
||||||
|
(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
|
||||||
}
|
}
|
||||||
__funline __m512i _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) {
|
extern __inline __m512i
|
||||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi((__v64qi)__A, (__v64qi)__B);
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
|
||||||
|
{
|
||||||
|
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
|
||||||
|
(__v64qi) __B);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline __m512i _mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A,
|
extern __inline __m512i
|
||||||
__mmask64 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D,
|
_mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||||
const int __E) {
|
__m512i __D, const int __E)
|
||||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
|
{
|
||||||
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
|
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
|
||||||
|
(__v64qi) __D,
|
||||||
|
__E,
|
||||||
|
(__v64qi)__A,
|
||||||
|
__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __B, __m512i __C,
|
_mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
|
||||||
const int __D) {
|
__m512i __C, const int __D)
|
||||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
|
{
|
||||||
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
|
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
|
||||||
|
(__v64qi) __C, __D,
|
||||||
|
(__v64qi) _mm512_setzero_si512 (), __A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_gf2p8affineinv_epi64_epi8(__m512i __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
const int __C) {
|
_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
|
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
|
||||||
(__v64qi) __B, __C);
|
(__v64qi) __B, __C);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_mask_gf2p8affine_epi64_epi8(__m512i __A, __mmask64 __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C, __m512i __D,
|
_mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||||
const int __E) {
|
__m512i __D, const int __E)
|
||||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
|
{
|
||||||
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
|
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
|
||||||
|
(__v64qi) __D, __E, (__v64qi)__A, __B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m512i
|
||||||
__funline __m512i _mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A, __m512i __B,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__m512i __C,
|
_mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
|
||||||
const int __D) {
|
const int __D)
|
||||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
|
{
|
||||||
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
|
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
|
||||||
|
(__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
|
||||||
}
|
}
|
||||||
__funline __m512i _mm512_gf2p8affine_epi64_epi8(__m512i __A, __m512i __B,
|
extern __inline __m512i
|
||||||
const int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
|
||||||
|
{
|
||||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
|
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
|
||||||
(__v64qi) __B, __C);
|
(__v64qi) __B, __C);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( (__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
|
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( (__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
|
||||||
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
|
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||||
(__v64qi)(__m512i)(A), (__mmask64)(B)))
|
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
|
||||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
|
#define _mm512_gf2p8affine_epi64_epi8(A, B, C) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||||
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
|
|
||||||
(__v64qi)(__m512i)_mm512_setzero_si512(), (__mmask64)(A)))
|
|
||||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \
|
|
||||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi( \
|
|
||||||
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
|
||||||
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
|
||||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( \
|
|
||||||
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
|
|
||||||
(__v64qi)(__m512i)(A), (__mmask64)(B)))
|
|
||||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
|
||||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( \
|
|
||||||
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
|
|
||||||
(__v64qi)(__m512i)_mm512_setzero_si512(), (__mmask64)(A)))
|
|
||||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \
|
|
||||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi( \
|
|
||||||
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_GFNIAVX512FBW__
|
#ifdef __DISABLE_GFNIAVX512FBW__
|
||||||
#undef __DISABLE_GFNIAVX512FBW__
|
#undef __DISABLE_GFNIAVX512FBW__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __GFNIAVX512FBW__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _GFNIINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
24
third_party/intel/hresetintrin.internal.h
vendored
Normal file
24
third_party/intel/hresetintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#if !defined _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <hresetintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _HRESETINTRIN_H_INCLUDED
|
||||||
|
#define _HRESETINTRIN_H_INCLUDED
|
||||||
|
#ifndef __HRESET__
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target ("hreset")
|
||||||
|
#define __DISABLE_HRESET__
|
||||||
|
#endif
|
||||||
|
extern __inline void
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_hreset (unsigned int __EAX)
|
||||||
|
{
|
||||||
|
__builtin_ia32_hreset (__EAX);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_HRESET__
|
||||||
|
#undef __DISABLE_HRESET__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
205
third_party/intel/ia32intrin.internal.h
vendored
205
third_party/intel/ia32intrin.internal.h
vendored
|
@ -1,166 +1,199 @@
|
||||||
#ifndef _X86INTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <ia32intrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline int
|
||||||
__funline int __bsfd(int __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__bsfd (int __X)
|
||||||
|
{
|
||||||
return __builtin_ctz (__X);
|
return __builtin_ctz (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline int
|
||||||
__funline int __bsrd(int __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__bsrd (int __X)
|
||||||
|
{
|
||||||
return __builtin_ia32_bsrsi (__X);
|
return __builtin_ia32_bsrsi (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline int
|
||||||
__funline int __bswapd(int __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__bswapd (int __X)
|
||||||
|
{
|
||||||
return __builtin_bswap32 (__X);
|
return __builtin_bswap32 (__X);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __iamcu__
|
#ifndef __iamcu__
|
||||||
|
|
||||||
#ifndef __SSE4_2__
|
#ifndef __SSE4_2__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("sse4.2")
|
#pragma GCC target("sse4.2")
|
||||||
#define __DISABLE_SSE4_2__
|
#define __DISABLE_SSE4_2__
|
||||||
#endif /* __SSE4_2__ */
|
#endif
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int __crc32b(unsigned int __C, unsigned char __V) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__crc32b (unsigned int __C, unsigned char __V)
|
||||||
|
{
|
||||||
return __builtin_ia32_crc32qi (__C, __V);
|
return __builtin_ia32_crc32qi (__C, __V);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int __crc32w(unsigned int __C, unsigned short __V) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__crc32w (unsigned int __C, unsigned short __V)
|
||||||
|
{
|
||||||
return __builtin_ia32_crc32hi (__C, __V);
|
return __builtin_ia32_crc32hi (__C, __V);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int __crc32d(unsigned int __C, unsigned int __V) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__crc32d (unsigned int __C, unsigned int __V)
|
||||||
|
{
|
||||||
return __builtin_ia32_crc32si (__C, __V);
|
return __builtin_ia32_crc32si (__C, __V);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_SSE4_2__
|
#ifdef __DISABLE_SSE4_2__
|
||||||
#undef __DISABLE_SSE4_2__
|
#undef __DISABLE_SSE4_2__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_SSE4_2__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* __iamcu__ */
|
extern __inline int
|
||||||
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline int __popcntd(unsigned int __X) {
|
__popcntd (unsigned int __X)
|
||||||
|
{
|
||||||
return __builtin_popcount (__X);
|
return __builtin_popcount (__X);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __iamcu__
|
#ifndef __iamcu__
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long __rdpmc(int __S) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rdpmc (int __S)
|
||||||
|
{
|
||||||
return __builtin_ia32_rdpmc (__S);
|
return __builtin_ia32_rdpmc (__S);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
#endif /* __iamcu__ */
|
#define __rdtsc() __builtin_ia32_rdtsc ()
|
||||||
|
|
||||||
__funline unsigned long long __rdtsc(void) {
|
|
||||||
return __builtin_ia32_rdtsc();
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef __iamcu__
|
#ifndef __iamcu__
|
||||||
|
#define __rdtscp(a) __builtin_ia32_rdtscp (a)
|
||||||
__funline unsigned long long __rdtscp(unsigned int *__A) {
|
#endif
|
||||||
return __builtin_ia32_rdtscp(__A);
|
extern __inline unsigned char
|
||||||
}
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rolb (unsigned char __X, int __C)
|
||||||
#endif /* __iamcu__ */
|
{
|
||||||
|
|
||||||
__funline unsigned char __rolb(unsigned char __X, int __C) {
|
|
||||||
return __builtin_ia32_rolqi (__X, __C);
|
return __builtin_ia32_rolqi (__X, __C);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned short
|
||||||
__funline unsigned short __rolw(unsigned short __X, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rolw (unsigned short __X, int __C)
|
||||||
|
{
|
||||||
return __builtin_ia32_rolhi (__X, __C);
|
return __builtin_ia32_rolhi (__X, __C);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int __rold(unsigned int __X, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rold (unsigned int __X, int __C)
|
||||||
|
{
|
||||||
__C &= 31;
|
__C &= 31;
|
||||||
return (__X << __C) | (__X >> (-__C & 31));
|
return (__X << __C) | (__X >> (-__C & 31));
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned char
|
||||||
__funline unsigned char __rorb(unsigned char __X, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rorb (unsigned char __X, int __C)
|
||||||
|
{
|
||||||
return __builtin_ia32_rorqi (__X, __C);
|
return __builtin_ia32_rorqi (__X, __C);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned short
|
||||||
__funline unsigned short __rorw(unsigned short __X, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rorw (unsigned short __X, int __C)
|
||||||
|
{
|
||||||
return __builtin_ia32_rorhi (__X, __C);
|
return __builtin_ia32_rorhi (__X, __C);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int __rord(unsigned int __X, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rord (unsigned int __X, int __C)
|
||||||
|
{
|
||||||
__C &= 31;
|
__C &= 31;
|
||||||
return (__X >> __C) | (__X << (-__C & 31));
|
return (__X >> __C) | (__X << (-__C & 31));
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void __pause(void) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__pause (void)
|
||||||
|
{
|
||||||
__builtin_ia32_pause ();
|
__builtin_ia32_pause ();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
extern __inline int
|
||||||
__funline int __bsfq(long long __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__bsfq (long long __X)
|
||||||
|
{
|
||||||
return __builtin_ctzll (__X);
|
return __builtin_ctzll (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline int
|
||||||
__funline int __bsrq(long long __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__bsrq (long long __X)
|
||||||
|
{
|
||||||
return __builtin_ia32_bsrdi (__X);
|
return __builtin_ia32_bsrdi (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline long long
|
||||||
__funline long long __bswapq(long long __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__bswapq (long long __X)
|
||||||
|
{
|
||||||
return __builtin_bswap64 (__X);
|
return __builtin_bswap64 (__X);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __SSE4_2__
|
#ifndef __SSE4_2__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("sse4.2")
|
#pragma GCC target("sse4.2")
|
||||||
#define __DISABLE_SSE4_2__
|
#define __DISABLE_SSE4_2__
|
||||||
#endif /* __SSE4_2__ */
|
#endif
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long __crc32q(unsigned long long __C,
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned long long __V) {
|
__crc32q (unsigned long long __C, unsigned long long __V)
|
||||||
|
{
|
||||||
return __builtin_ia32_crc32di (__C, __V);
|
return __builtin_ia32_crc32di (__C, __V);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_SSE4_2__
|
#ifdef __DISABLE_SSE4_2__
|
||||||
#undef __DISABLE_SSE4_2__
|
#undef __DISABLE_SSE4_2__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_SSE4_2__ */
|
#endif
|
||||||
|
extern __inline long long
|
||||||
__funline long long __popcntq(unsigned long long __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__popcntq (unsigned long long __X)
|
||||||
|
{
|
||||||
return __builtin_popcountll (__X);
|
return __builtin_popcountll (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long __rolq(unsigned long long __X, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rolq (unsigned long long __X, int __C)
|
||||||
|
{
|
||||||
__C &= 63;
|
__C &= 63;
|
||||||
return (__X << __C) | (__X >> (-__C & 63));
|
return (__X << __C) | (__X >> (-__C & 63));
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long __rorq(unsigned long long __X, int __C) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__rorq (unsigned long long __X, int __C)
|
||||||
|
{
|
||||||
__C &= 63;
|
__C &= 63;
|
||||||
return (__X >> __C) | (__X << (-__C & 63));
|
return (__X >> __C) | (__X << (-__C & 63));
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long
|
||||||
__funline unsigned long long __readeflags(void) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__readeflags (void)
|
||||||
|
{
|
||||||
return __builtin_ia32_readeflags_u64 ();
|
return __builtin_ia32_readeflags_u64 ();
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void __writeeflags(unsigned long long __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__writeeflags (unsigned long long __X)
|
||||||
|
{
|
||||||
__builtin_ia32_writeeflags_u64 (__X);
|
__builtin_ia32_writeeflags_u64 (__X);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _bswap64(a) __bswapq(a)
|
#define _bswap64(a) __bswapq(a)
|
||||||
#define _popcnt64(a) __popcntq(a)
|
#define _popcnt64(a) __popcntq(a)
|
||||||
#else
|
#else
|
||||||
|
extern __inline unsigned int
|
||||||
__funline unsigned int __readeflags(void) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__readeflags (void)
|
||||||
|
{
|
||||||
return __builtin_ia32_readeflags_u32 ();
|
return __builtin_ia32_readeflags_u32 ();
|
||||||
}
|
}
|
||||||
|
extern __inline void
|
||||||
__funline void __writeeflags(unsigned int __X) {
|
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__writeeflags (unsigned int __X)
|
||||||
|
{
|
||||||
__builtin_ia32_writeeflags_u32 (__X);
|
__builtin_ia32_writeeflags_u32 (__X);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __LP64__
|
#ifdef __LP64__
|
||||||
#define _lrotl(a,b) __rolq((a), (b))
|
#define _lrotl(a,b) __rolq((a), (b))
|
||||||
#define _lrotr(a,b) __rorq((a), (b))
|
#define _lrotr(a,b) __rorq((a), (b))
|
||||||
|
@ -168,7 +201,6 @@ __funline void __writeeflags(unsigned int __X) {
|
||||||
#define _lrotl(a,b) __rold((a), (b))
|
#define _lrotl(a,b) __rold((a), (b))
|
||||||
#define _lrotr(a,b) __rord((a), (b))
|
#define _lrotr(a,b) __rord((a), (b))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define _bit_scan_forward(a) __bsfd(a)
|
#define _bit_scan_forward(a) __bsfd(a)
|
||||||
#define _bit_scan_reverse(a) __bsrd(a)
|
#define _bit_scan_reverse(a) __bsrd(a)
|
||||||
#define _bswap(a) __bswapd(a)
|
#define _bswap(a) __bswapd(a)
|
||||||
|
@ -176,9 +208,10 @@ __funline void __writeeflags(unsigned int __X) {
|
||||||
#ifndef __iamcu__
|
#ifndef __iamcu__
|
||||||
#define _rdpmc(a) __rdpmc(a)
|
#define _rdpmc(a) __rdpmc(a)
|
||||||
#define _rdtscp(a) __rdtscp(a)
|
#define _rdtscp(a) __rdtscp(a)
|
||||||
#endif /* __iamcu__ */
|
#endif
|
||||||
#define _rdtsc() __rdtsc()
|
#define _rdtsc() __rdtsc()
|
||||||
#define _rotwl(a,b) __rolw((a), (b))
|
#define _rotwl(a,b) __rolw((a), (b))
|
||||||
#define _rotwr(a,b) __rorw((a), (b))
|
#define _rotwr(a,b) __rorw((a), (b))
|
||||||
#define _rotl(a,b) __rold((a), (b))
|
#define _rotl(a,b) __rold((a), (b))
|
||||||
#define _rotr(a,b) __rord((a), (b))
|
#define _rotr(a,b) __rord((a), (b))
|
||||||
|
#endif
|
||||||
|
|
152
third_party/intel/immintrin.internal.h
vendored
152
third_party/intel/immintrin.internal.h
vendored
|
@ -1,8 +1,8 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _IMMINTRIN_H_INCLUDED
|
#ifndef _IMMINTRIN_H_INCLUDED
|
||||||
#define _IMMINTRIN_H_INCLUDED
|
#define _IMMINTRIN_H_INCLUDED
|
||||||
#ifdef __x86_64__
|
#include "third_party/intel/x86gprintrin.internal.h"
|
||||||
|
|
||||||
/* clang-format off */
|
|
||||||
#include "third_party/intel/mmintrin.internal.h"
|
#include "third_party/intel/mmintrin.internal.h"
|
||||||
#include "third_party/intel/xmmintrin.internal.h"
|
#include "third_party/intel/xmmintrin.internal.h"
|
||||||
#include "third_party/intel/emmintrin.internal.h"
|
#include "third_party/intel/emmintrin.internal.h"
|
||||||
|
@ -10,12 +10,8 @@
|
||||||
#include "third_party/intel/tmmintrin.internal.h"
|
#include "third_party/intel/tmmintrin.internal.h"
|
||||||
#include "third_party/intel/smmintrin.internal.h"
|
#include "third_party/intel/smmintrin.internal.h"
|
||||||
#include "third_party/intel/wmmintrin.internal.h"
|
#include "third_party/intel/wmmintrin.internal.h"
|
||||||
#include "third_party/intel/fxsrintrin.internal.h"
|
|
||||||
#include "third_party/intel/xsaveintrin.internal.h"
|
|
||||||
#include "third_party/intel/xsaveoptintrin.internal.h"
|
|
||||||
#include "third_party/intel/xsavesintrin.internal.h"
|
|
||||||
#include "third_party/intel/xsavecintrin.internal.h"
|
|
||||||
#include "third_party/intel/avxintrin.internal.h"
|
#include "third_party/intel/avxintrin.internal.h"
|
||||||
|
#include "third_party/intel/avxvnniintrin.internal.h"
|
||||||
#include "third_party/intel/avx2intrin.internal.h"
|
#include "third_party/intel/avx2intrin.internal.h"
|
||||||
#include "third_party/intel/avx512fintrin.internal.h"
|
#include "third_party/intel/avx512fintrin.internal.h"
|
||||||
#include "third_party/intel/avx512erintrin.internal.h"
|
#include "third_party/intel/avx512erintrin.internal.h"
|
||||||
|
@ -39,143 +35,21 @@
|
||||||
#include "third_party/intel/avx512vnnivlintrin.internal.h"
|
#include "third_party/intel/avx512vnnivlintrin.internal.h"
|
||||||
#include "third_party/intel/avx512vpopcntdqvlintrin.internal.h"
|
#include "third_party/intel/avx512vpopcntdqvlintrin.internal.h"
|
||||||
#include "third_party/intel/avx512bitalgintrin.internal.h"
|
#include "third_party/intel/avx512bitalgintrin.internal.h"
|
||||||
|
#include "third_party/intel/avx512vp2intersectintrin.internal.h"
|
||||||
|
#include "third_party/intel/avx512vp2intersectvlintrin.internal.h"
|
||||||
#include "third_party/intel/shaintrin.internal.h"
|
#include "third_party/intel/shaintrin.internal.h"
|
||||||
#include "third_party/intel/lzcntintrin.internal.h"
|
|
||||||
#include "third_party/intel/bmiintrin.internal.h"
|
|
||||||
#include "third_party/intel/bmi2intrin.internal.h"
|
|
||||||
#include "third_party/intel/fmaintrin.internal.h"
|
#include "third_party/intel/fmaintrin.internal.h"
|
||||||
#include "third_party/intel/f16cintrin.internal.h"
|
#include "third_party/intel/f16cintrin.internal.h"
|
||||||
#include "third_party/intel/rtmintrin.internal.h"
|
#include "third_party/intel/rtmintrin.internal.h"
|
||||||
#include "third_party/intel/xtestintrin.internal.h"
|
|
||||||
#include "third_party/intel/cetintrin.internal.h"
|
|
||||||
#include "third_party/intel/gfniintrin.internal.h"
|
#include "third_party/intel/gfniintrin.internal.h"
|
||||||
#include "third_party/intel/vaesintrin.internal.h"
|
#include "third_party/intel/vaesintrin.internal.h"
|
||||||
#include "third_party/intel/vpclmulqdqintrin.internal.h"
|
#include "third_party/intel/vpclmulqdqintrin.internal.h"
|
||||||
#include "third_party/intel/movdirintrin.internal.h"
|
#include "third_party/intel/avx512bf16vlintrin.internal.h"
|
||||||
#include "third_party/intel/sgxintrin.internal.h"
|
#include "third_party/intel/avx512bf16intrin.internal.h"
|
||||||
#include "third_party/intel/pconfigintrin.internal.h"
|
#include "third_party/intel/amxtileintrin.internal.h"
|
||||||
#include "third_party/intel/waitpkgintrin.internal.h"
|
#include "third_party/intel/amxint8intrin.internal.h"
|
||||||
#include "third_party/intel/cldemoteintrin.internal.h"
|
#include "third_party/intel/amxbf16intrin.internal.h"
|
||||||
#include "third_party/intel/rdseedintrin.internal.h"
|
|
||||||
#include "third_party/intel/prfchwintrin.internal.h"
|
#include "third_party/intel/prfchwintrin.internal.h"
|
||||||
#include "third_party/intel/adxintrin.internal.h"
|
#include "third_party/intel/keylockerintrin.internal.h"
|
||||||
#include "third_party/intel/clwbintrin.internal.h"
|
#endif
|
||||||
#include "third_party/intel/clflushoptintrin.internal.h"
|
|
||||||
#include "third_party/intel/wbnoinvdintrin.internal.h"
|
|
||||||
#include "third_party/intel/pkuintrin.internal.h"
|
|
||||||
/* clang-format on */
|
|
||||||
|
|
||||||
__funline void _wbinvd(void) {
|
|
||||||
__builtin_ia32_wbinvd();
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef __RDRND__
|
|
||||||
#pragma GCC push_options
|
|
||||||
#pragma GCC target("rdrnd")
|
|
||||||
#define __DISABLE_RDRND__
|
|
||||||
#endif /* __RDRND__ */
|
|
||||||
__funline int _rdrand16_step(unsigned short *__P) {
|
|
||||||
return __builtin_ia32_rdrand16_step(__P);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline int _rdrand32_step(unsigned int *__P) {
|
|
||||||
return __builtin_ia32_rdrand32_step(__P);
|
|
||||||
}
|
|
||||||
#ifdef __DISABLE_RDRND__
|
|
||||||
#undef __DISABLE_RDRND__
|
|
||||||
#pragma GCC pop_options
|
|
||||||
#endif /* __DISABLE_RDRND__ */
|
|
||||||
|
|
||||||
#ifndef __RDPID__
|
|
||||||
#pragma GCC push_options
|
|
||||||
#pragma GCC target("rdpid")
|
|
||||||
#define __DISABLE_RDPID__
|
|
||||||
#endif /* __RDPID__ */
|
|
||||||
__funline unsigned int _rdpid_u32(void) {
|
|
||||||
return __builtin_ia32_rdpid();
|
|
||||||
}
|
|
||||||
#ifdef __DISABLE_RDPID__
|
|
||||||
#undef __DISABLE_RDPID__
|
|
||||||
#pragma GCC pop_options
|
|
||||||
#endif /* __DISABLE_RDPID__ */
|
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
|
|
||||||
#ifndef __FSGSBASE__
|
|
||||||
#pragma GCC push_options
|
|
||||||
#pragma GCC target("fsgsbase")
|
|
||||||
#define __DISABLE_FSGSBASE__
|
|
||||||
#endif /* __FSGSBASE__ */
|
|
||||||
__funline unsigned int _readfsbase_u32(void) {
|
|
||||||
return __builtin_ia32_rdfsbase32();
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline unsigned long long _readfsbase_u64(void) {
|
|
||||||
return __builtin_ia32_rdfsbase64();
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline unsigned int _readgsbase_u32(void) {
|
|
||||||
return __builtin_ia32_rdgsbase32();
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline unsigned long long _readgsbase_u64(void) {
|
|
||||||
return __builtin_ia32_rdgsbase64();
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline void _writefsbase_u32(unsigned int __B) {
|
|
||||||
__builtin_ia32_wrfsbase32(__B);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline void _writefsbase_u64(unsigned long long __B) {
|
|
||||||
__builtin_ia32_wrfsbase64(__B);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline void _writegsbase_u32(unsigned int __B) {
|
|
||||||
__builtin_ia32_wrgsbase32(__B);
|
|
||||||
}
|
|
||||||
|
|
||||||
__funline void _writegsbase_u64(unsigned long long __B) {
|
|
||||||
__builtin_ia32_wrgsbase64(__B);
|
|
||||||
}
|
|
||||||
#ifdef __DISABLE_FSGSBASE__
|
|
||||||
#undef __DISABLE_FSGSBASE__
|
|
||||||
#pragma GCC pop_options
|
|
||||||
#endif /* __DISABLE_FSGSBASE__ */
|
|
||||||
|
|
||||||
#ifndef __RDRND__
|
|
||||||
#pragma GCC push_options
|
|
||||||
#pragma GCC target("rdrnd")
|
|
||||||
#define __DISABLE_RDRND__
|
|
||||||
#endif /* __RDRND__ */
|
|
||||||
__funline int _rdrand64_step(unsigned long long *__P) {
|
|
||||||
return __builtin_ia32_rdrand64_step(__P);
|
|
||||||
}
|
|
||||||
#ifdef __DISABLE_RDRND__
|
|
||||||
#undef __DISABLE_RDRND__
|
|
||||||
#pragma GCC pop_options
|
|
||||||
#endif /* __DISABLE_RDRND__ */
|
|
||||||
|
|
||||||
#endif /* __x86_64__ */
|
|
||||||
|
|
||||||
#ifndef __PTWRITE__
|
|
||||||
#pragma GCC push_options
|
|
||||||
#pragma GCC target("ptwrite")
|
|
||||||
#define __DISABLE_PTWRITE__
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __x86_64__
|
|
||||||
__funline void _ptwrite64(unsigned long long __B) {
|
|
||||||
__builtin_ia32_ptwrite64(__B);
|
|
||||||
}
|
|
||||||
#endif /* __x86_64__ */
|
|
||||||
|
|
||||||
__funline void _ptwrite32(unsigned __B) {
|
|
||||||
__builtin_ia32_ptwrite32(__B);
|
|
||||||
}
|
|
||||||
#ifdef __DISABLE_PTWRITE__
|
|
||||||
#undef __DISABLE_PTWRITE__
|
|
||||||
#pragma GCC pop_options
|
|
||||||
#endif /* __DISABLE_PTWRITE__ */
|
|
||||||
|
|
||||||
#endif /* __x86_64__ */
|
|
||||||
#endif /* _IMMINTRIN_H_INCLUDED */
|
|
||||||
|
|
93
third_party/intel/keylockerintrin.internal.h
vendored
Normal file
93
third_party/intel/keylockerintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#if !defined _IMMINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <keylockerintrin.h> directly; include <x86intrin.h> instead."
|
||||||
|
#endif
|
||||||
|
#ifndef _KEYLOCKERINTRIN_H_INCLUDED
|
||||||
|
#define _KEYLOCKERINTRIN_H_INCLUDED
|
||||||
|
#ifndef __KL__
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("kl")
|
||||||
|
#define __DISABLE_KL__
|
||||||
|
#endif
|
||||||
|
extern __inline
|
||||||
|
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_loadiwkey (unsigned int __I, __m128i __A, __m128i __B, __m128i __C)
|
||||||
|
{
|
||||||
|
__builtin_ia32_loadiwkey ((__v2di) __B, (__v2di) __C, (__v2di) __A, __I);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_encodekey128_u32 (unsigned int __I, __m128i __A, void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_encodekey128_u32 (__I, (__v2di)__A, __P);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_encodekey256_u32 (unsigned int __I, __m128i __A, __m128i __B, void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_encodekey256_u32 (__I, (__v2di)__A, (__v2di)__B, __P);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_aesdec128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_aesdec128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_aesdec256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_aesdec256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_aesenc128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_aesenc128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_aesenc256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_aesenc256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_KL__
|
||||||
|
#undef __DISABLE_KL__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#ifndef __WIDEKL__
|
||||||
|
#pragma GCC push_options
|
||||||
|
#pragma GCC target("widekl")
|
||||||
|
#define __DISABLE_WIDEKL__
|
||||||
|
#endif
|
||||||
|
extern __inline
|
||||||
|
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_aesdecwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_aesdecwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_aesdecwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_aesdecwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_aesencwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_aesencwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||||
|
}
|
||||||
|
extern __inline
|
||||||
|
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
_mm_aesencwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||||
|
{
|
||||||
|
return __builtin_ia32_aesencwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||||
|
}
|
||||||
|
#ifdef __DISABLE_WIDEKL__
|
||||||
|
#undef __DISABLE_WIDEKL__
|
||||||
|
#pragma GCC pop_options
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
69
third_party/intel/lwpintrin.internal.h
vendored
69
third_party/intel/lwpintrin.internal.h
vendored
|
@ -1,73 +1,68 @@
|
||||||
#ifndef _X86INTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <lwpintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _LWPINTRIN_H_INCLUDED
|
#ifndef _LWPINTRIN_H_INCLUDED
|
||||||
#define _LWPINTRIN_H_INCLUDED
|
#define _LWPINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __LWP__
|
#ifndef __LWP__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("lwp")
|
#pragma GCC target("lwp")
|
||||||
#define __DISABLE_LWP__
|
#define __DISABLE_LWP__
|
||||||
#endif /* __LWP__ */
|
#endif
|
||||||
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void __llwpcb(void *__pcbAddress) {
|
__llwpcb (void *__pcbAddress)
|
||||||
|
{
|
||||||
__builtin_ia32_llwpcb (__pcbAddress);
|
__builtin_ia32_llwpcb (__pcbAddress);
|
||||||
}
|
}
|
||||||
|
extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void *__slwpcb(void) {
|
__slwpcb (void)
|
||||||
|
{
|
||||||
return __builtin_ia32_slwpcb ();
|
return __builtin_ia32_slwpcb ();
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline void __lwpval32(unsigned int __data2, unsigned int __data1,
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned int __flags) {
|
__lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
|
||||||
|
{
|
||||||
__builtin_ia32_lwpval32 (__data2, __data1, __flags);
|
__builtin_ia32_lwpval32 (__data2, __data1, __flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__funline void __lwpval64(unsigned long long __data2, unsigned int __data1,
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned int __flags) {
|
__lwpval64 (unsigned long long __data2, unsigned int __data1,
|
||||||
|
unsigned int __flags)
|
||||||
|
{
|
||||||
__builtin_ia32_lwpval64 (__data2, __data1, __flags);
|
__builtin_ia32_lwpval64 (__data2, __data1, __flags);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#define __lwpval32(D2, D1, F) \
|
#define __lwpval32(D2, D1, F) (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||||
(__builtin_ia32_lwpval32((unsigned int)(D2), (unsigned int)(D1), \
|
|
||||||
(unsigned int)(F)))
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
#define __lwpval64(D2, D1, F) \
|
#define __lwpval64(D2, D1, F) (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||||
(__builtin_ia32_lwpval64((unsigned long long)(D2), (unsigned int)(D1), \
|
|
||||||
(unsigned int)(F)))
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __OPTIMIZE__
|
#ifdef __OPTIMIZE__
|
||||||
__funline unsigned char __lwpins32(unsigned int __data2, unsigned int __data1,
|
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned int __flags) {
|
__lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
|
||||||
|
{
|
||||||
return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
|
return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__funline unsigned char __lwpins64(unsigned long long __data2,
|
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
unsigned int __data1, unsigned int __flags) {
|
__lwpins64 (unsigned long long __data2, unsigned int __data1,
|
||||||
|
unsigned int __flags)
|
||||||
|
{
|
||||||
return __builtin_ia32_lwpins64 (__data2, __data1, __flags);
|
return __builtin_ia32_lwpins64 (__data2, __data1, __flags);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#define __lwpins32(D2, D1, F) \
|
#define __lwpins32(D2, D1, F) (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||||
(__builtin_ia32_lwpins32((unsigned int)(D2), (unsigned int)(D1), \
|
|
||||||
(unsigned int)(F)))
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
#define __lwpins64(D2, D1, F) \
|
#define __lwpins64(D2, D1, F) (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||||
(__builtin_ia32_lwpins64((unsigned long long)(D2), (unsigned int)(D1), \
|
|
||||||
(unsigned int)(F)))
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_LWP__
|
#ifdef __DISABLE_LWP__
|
||||||
#undef __DISABLE_LWP__
|
#undef __DISABLE_LWP__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_LWP__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _LWPINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
42
third_party/intel/lzcntintrin.internal.h
vendored
42
third_party/intel/lzcntintrin.internal.h
vendored
|
@ -1,41 +1,45 @@
|
||||||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
/* clang-format off */
|
||||||
#error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
|
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||||
|
# error "Never use <lzcntintrin.h> directly; include <x86gprintrin.h> instead."
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _LZCNTINTRIN_H_INCLUDED
|
#ifndef _LZCNTINTRIN_H_INCLUDED
|
||||||
#define _LZCNTINTRIN_H_INCLUDED
|
#define _LZCNTINTRIN_H_INCLUDED
|
||||||
|
|
||||||
#ifndef __LZCNT__
|
#ifndef __LZCNT__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#pragma GCC target("lzcnt")
|
#pragma GCC target("lzcnt")
|
||||||
#define __DISABLE_LZCNT__
|
#define __DISABLE_LZCNT__
|
||||||
#endif /* __LZCNT__ */
|
#endif
|
||||||
|
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline unsigned short __lzcnt16(unsigned short __X) {
|
__lzcnt16 (unsigned short __X)
|
||||||
|
{
|
||||||
return __builtin_ia32_lzcnt_u16 (__X);
|
return __builtin_ia32_lzcnt_u16 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline unsigned int __lzcnt32(unsigned int __X) {
|
__lzcnt32 (unsigned int __X)
|
||||||
|
{
|
||||||
return __builtin_ia32_lzcnt_u32 (__X);
|
return __builtin_ia32_lzcnt_u32 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline unsigned int _lzcnt_u32(unsigned int __X) {
|
_lzcnt_u32 (unsigned int __X)
|
||||||
|
{
|
||||||
return __builtin_ia32_lzcnt_u32 (__X);
|
return __builtin_ia32_lzcnt_u32 (__X);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
__funline unsigned long long __lzcnt64(unsigned long long __X) {
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__lzcnt64 (unsigned long long __X)
|
||||||
|
{
|
||||||
return __builtin_ia32_lzcnt_u64 (__X);
|
return __builtin_ia32_lzcnt_u64 (__X);
|
||||||
}
|
}
|
||||||
|
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline unsigned long long _lzcnt_u64(unsigned long long __X) {
|
_lzcnt_u64 (unsigned long long __X)
|
||||||
|
{
|
||||||
return __builtin_ia32_lzcnt_u64 (__X);
|
return __builtin_ia32_lzcnt_u64 (__X);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __DISABLE_LZCNT__
|
#ifdef __DISABLE_LZCNT__
|
||||||
#undef __DISABLE_LZCNT__
|
#undef __DISABLE_LZCNT__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_LZCNT__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* _LZCNTINTRIN_H_INCLUDED */
|
#endif
|
||||||
|
|
167
third_party/intel/mm3dnow.internal.h
vendored
167
third_party/intel/mm3dnow.internal.h
vendored
|
@ -1,9 +1,9 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _MM3DNOW_H_INCLUDED
|
#ifndef _MM3DNOW_H_INCLUDED
|
||||||
#define _MM3DNOW_H_INCLUDED
|
#define _MM3DNOW_H_INCLUDED
|
||||||
#ifdef __x86_64__
|
|
||||||
#include "third_party/intel/mmintrin.internal.h"
|
#include "third_party/intel/mmintrin.internal.h"
|
||||||
#include "third_party/intel/prfchwintrin.internal.h"
|
#include "third_party/intel/prfchwintrin.internal.h"
|
||||||
|
|
||||||
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW__
|
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
@ -12,110 +12,128 @@
|
||||||
#pragma GCC target("3dnow")
|
#pragma GCC target("3dnow")
|
||||||
#endif
|
#endif
|
||||||
#define __DISABLE_3dNOW__
|
#define __DISABLE_3dNOW__
|
||||||
#endif /* __3dNOW__ */
|
#endif
|
||||||
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void _m_femms(void) {
|
_m_femms (void)
|
||||||
|
{
|
||||||
__builtin_ia32_femms();
|
__builtin_ia32_femms();
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pavgusb(__m64 __A, __m64 __B) {
|
_m_pavgusb (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
|
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pf2id(__m64 __A) {
|
_m_pf2id (__m64 __A)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
|
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfacc(__m64 __A, __m64 __B) {
|
_m_pfacc (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfadd(__m64 __A, __m64 __B) {
|
_m_pfadd (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfcmpeq(__m64 __A, __m64 __B) {
|
_m_pfcmpeq (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfcmpge(__m64 __A, __m64 __B) {
|
_m_pfcmpge (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfcmpgt(__m64 __A, __m64 __B) {
|
_m_pfcmpgt (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfmax(__m64 __A, __m64 __B) {
|
_m_pfmax (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfmin(__m64 __A, __m64 __B) {
|
_m_pfmin (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfmul(__m64 __A, __m64 __B) {
|
_m_pfmul (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfrcp(__m64 __A) {
|
_m_pfrcp (__m64 __A)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
|
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfrcpit1(__m64 __A, __m64 __B) {
|
_m_pfrcpit1 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfrcpit2(__m64 __A, __m64 __B) {
|
_m_pfrcpit2 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfrsqrt(__m64 __A) {
|
_m_pfrsqrt (__m64 __A)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
|
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfrsqit1(__m64 __A, __m64 __B) {
|
_m_pfrsqit1 (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfsub(__m64 __A, __m64 __B) {
|
_m_pfsub (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfsubr(__m64 __A, __m64 __B) {
|
_m_pfsubr (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pi2fd(__m64 __A) {
|
_m_pi2fd (__m64 __A)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
|
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pmulhrw(__m64 __A, __m64 __B) {
|
_m_pmulhrw (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
|
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void _m_prefetch(void *__P) {
|
_m_prefetch (void *__P)
|
||||||
__builtin_prefetch(__P, 0, 3 /* _MM_HINT_T0 */);
|
{
|
||||||
|
__builtin_prefetch (__P, 0, 3 );
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_from_float(float __A) {
|
_m_from_float (float __A)
|
||||||
|
{
|
||||||
return __extension__ (__m64)(__v2sf){ __A, 0.0f };
|
return __extension__ (__m64)(__v2sf){ __A, 0.0f };
|
||||||
}
|
}
|
||||||
|
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline float _m_to_float(__m64 __A) {
|
_m_to_float (__m64 __A)
|
||||||
union {
|
{
|
||||||
__v2sf v;
|
union { __v2sf v; float a[2]; } __tmp;
|
||||||
float a[2];
|
|
||||||
} __tmp;
|
|
||||||
__tmp.v = (__v2sf)__A;
|
__tmp.v = (__v2sf)__A;
|
||||||
return __tmp.a[0];
|
return __tmp.a[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_3dNOW__
|
#ifdef __DISABLE_3dNOW__
|
||||||
#undef __DISABLE_3dNOW__
|
#undef __DISABLE_3dNOW__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_3dNOW__ */
|
#endif
|
||||||
|
|
||||||
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW_A__
|
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW_A__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
@ -124,32 +142,35 @@ __funline float _m_to_float(__m64 __A) {
|
||||||
#pragma GCC target("3dnowa")
|
#pragma GCC target("3dnowa")
|
||||||
#endif
|
#endif
|
||||||
#define __DISABLE_3dNOW_A__
|
#define __DISABLE_3dNOW_A__
|
||||||
#endif /* __3dNOW_A__ */
|
#endif
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pf2iw(__m64 __A) {
|
_m_pf2iw (__m64 __A)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
|
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfnacc(__m64 __A, __m64 __B) {
|
_m_pfnacc (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pfpnacc(__m64 __A, __m64 __B) {
|
_m_pfpnacc (__m64 __A, __m64 __B)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
|
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pi2fw(__m64 __A) {
|
_m_pi2fw (__m64 __A)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
|
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pswapd(__m64 __A) {
|
_m_pswapd (__m64 __A)
|
||||||
|
{
|
||||||
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
|
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __DISABLE_3dNOW_A__
|
#ifdef __DISABLE_3dNOW_A__
|
||||||
#undef __DISABLE_3dNOW_A__
|
#undef __DISABLE_3dNOW_A__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_3dNOW_A__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* __x86_64__ */
|
#endif
|
||||||
#endif /* _MM3DNOW_H_INCLUDED */
|
|
||||||
|
|
11
third_party/intel/mm_malloc.internal.h
vendored
11
third_party/intel/mm_malloc.internal.h
vendored
|
@ -1,15 +1,14 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _MM_MALLOC_H_INCLUDED
|
#ifndef _MM_MALLOC_H_INCLUDED
|
||||||
#define _MM_MALLOC_H_INCLUDED
|
#define _MM_MALLOC_H_INCLUDED
|
||||||
#ifdef __x86_64__
|
|
||||||
#include "libc/mem/mem.h"
|
#include "libc/mem/mem.h"
|
||||||
|
|
||||||
#ifndef __cplusplus
|
#ifndef __cplusplus
|
||||||
extern int _mm_posix_memalign(void **, size_t, size_t)
|
extern int _mm_posix_memalign(void **, size_t, size_t)
|
||||||
#else
|
#else
|
||||||
extern "C" int _mm_posix_memalign(void **, size_t, size_t) throw()
|
extern "C" int _mm_posix_memalign(void **, size_t, size_t) throw()
|
||||||
#endif
|
#endif
|
||||||
__asm__("posix_memalign");
|
__asm__("posix_memalign");
|
||||||
|
|
||||||
static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
|
static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
|
||||||
void *__ptr;
|
void *__ptr;
|
||||||
if (__alignment == 1) return malloc(__size);
|
if (__alignment == 1) return malloc(__size);
|
||||||
|
@ -20,10 +19,8 @@ static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
|
||||||
else
|
else
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline void _mm_free(void *__ptr) {
|
static __inline void _mm_free(void *__ptr) {
|
||||||
free(__ptr);
|
free(__ptr);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
#endif /* __x86_64__ */
|
#endif
|
||||||
#endif /* _MM_MALLOC_H_INCLUDED */
|
|
||||||
|
|
700
third_party/intel/mmintrin.internal.h
vendored
700
third_party/intel/mmintrin.internal.h
vendored
|
@ -1,576 +1,710 @@
|
||||||
|
/* clang-format off */
|
||||||
|
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||||
#ifndef _MMINTRIN_H_INCLUDED
|
#ifndef _MMINTRIN_H_INCLUDED
|
||||||
#define _MMINTRIN_H_INCLUDED
|
#define _MMINTRIN_H_INCLUDED
|
||||||
#ifdef __x86_64__
|
|
||||||
|
|
||||||
#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
|
#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
#ifdef __x86_64__
|
#ifdef __MMX_WITH_SSE__
|
||||||
|
#pragma GCC target("sse2")
|
||||||
|
#elif defined __x86_64__
|
||||||
#pragma GCC target("sse,mmx")
|
#pragma GCC target("sse,mmx")
|
||||||
#else
|
#else
|
||||||
#pragma GCC target("mmx")
|
#pragma GCC target("mmx")
|
||||||
#endif
|
#endif
|
||||||
#define __DISABLE_MMX__
|
#define __DISABLE_MMX__
|
||||||
#endif /* __MMX__ */
|
#endif
|
||||||
|
|
||||||
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
|
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
|
||||||
|
typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
|
||||||
typedef int __m64_u
|
typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
|
||||||
__attribute__((__vector_size__(8), __may_alias__, __aligned__(1)));
|
typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
|
||||||
|
typedef int __m32_u __attribute__ ((__vector_size__ (4), __may_alias__, __aligned__ (1)));
|
||||||
|
typedef short __m16_u __attribute__ ((__vector_size__ (2), __may_alias__, __aligned__ (1)));
|
||||||
typedef int __v2si __attribute__ ((__vector_size__ (8)));
|
typedef int __v2si __attribute__ ((__vector_size__ (8)));
|
||||||
typedef short __v4hi __attribute__ ((__vector_size__ (8)));
|
typedef short __v4hi __attribute__ ((__vector_size__ (8)));
|
||||||
typedef char __v8qi __attribute__ ((__vector_size__ (8)));
|
typedef char __v8qi __attribute__ ((__vector_size__ (8)));
|
||||||
typedef long long __v1di __attribute__ ((__vector_size__ (8)));
|
typedef long long __v1di __attribute__ ((__vector_size__ (8)));
|
||||||
typedef float __v2sf __attribute__ ((__vector_size__ (8)));
|
typedef float __v2sf __attribute__ ((__vector_size__ (8)));
|
||||||
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void _mm_empty(void) {
|
_mm_empty (void)
|
||||||
|
{
|
||||||
__builtin_ia32_emms ();
|
__builtin_ia32_emms ();
|
||||||
}
|
}
|
||||||
|
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline void _m_empty(void) {
|
_m_empty (void)
|
||||||
|
{
|
||||||
_mm_empty ();
|
_mm_empty ();
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cvtsi32_si64(int __i) {
|
_mm_cvtsi32_si64 (int __i)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
|
return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_from_int(int __i) {
|
_m_from_int (int __i)
|
||||||
|
{
|
||||||
return _mm_cvtsi32_si64 (__i);
|
return _mm_cvtsi32_si64 (__i);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_from_int64(long long __i) {
|
_m_from_int64 (long long __i)
|
||||||
|
{
|
||||||
return (__m64) __i;
|
return (__m64) __i;
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cvtsi64_m64(long long __i) {
|
_mm_cvtsi64_m64 (long long __i)
|
||||||
|
{
|
||||||
return (__m64) __i;
|
return (__m64) __i;
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cvtsi64x_si64(long long __i) {
|
_mm_cvtsi64x_si64 (long long __i)
|
||||||
|
{
|
||||||
return (__m64) __i;
|
return (__m64) __i;
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_set_pi64x(long long __i) {
|
_mm_set_pi64x (long long __i)
|
||||||
|
{
|
||||||
return (__m64) __i;
|
return (__m64) __i;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline int _mm_cvtsi64_si32(__m64 __i) {
|
_mm_cvtsi64_si32 (__m64 __i)
|
||||||
|
{
|
||||||
return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
|
return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
|
||||||
}
|
}
|
||||||
|
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline int _m_to_int(__m64 __i) {
|
_m_to_int (__m64 __i)
|
||||||
|
{
|
||||||
return _mm_cvtsi64_si32 (__i);
|
return _mm_cvtsi64_si32 (__i);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __x86_64__
|
#ifdef __x86_64__
|
||||||
|
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline long long _m_to_int64(__m64 __i) {
|
_m_to_int64 (__m64 __i)
|
||||||
|
{
|
||||||
return (long long)__i;
|
return (long long)__i;
|
||||||
}
|
}
|
||||||
|
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline long long _mm_cvtm64_si64(__m64 __i) {
|
_mm_cvtm64_si64 (__m64 __i)
|
||||||
|
{
|
||||||
return (long long)__i;
|
return (long long)__i;
|
||||||
}
|
}
|
||||||
|
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline long long _mm_cvtsi64_si64x(__m64 __i) {
|
_mm_cvtsi64_si64x (__m64 __i)
|
||||||
|
{
|
||||||
return (long long)__i;
|
return (long long)__i;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_packs_pi16(__m64 __m1, __m64 __m2) {
|
_mm_packs_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_packsswb(__m64 __m1, __m64 __m2) {
|
_m_packsswb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_packs_pi16 (__m1, __m2);
|
return _mm_packs_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_packs_pi32(__m64 __m1, __m64 __m2) {
|
_mm_packs_pi32 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
|
return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_packssdw(__m64 __m1, __m64 __m2) {
|
_m_packssdw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_packs_pi32 (__m1, __m2);
|
return _mm_packs_pi32 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_packs_pu16(__m64 __m1, __m64 __m2) {
|
_mm_packs_pu16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_packuswb(__m64 __m1, __m64 __m2) {
|
_m_packuswb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_packs_pu16 (__m1, __m2);
|
return _mm_packs_pu16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
|
_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_punpckhbw(__m64 __m1, __m64 __m2) {
|
_m_punpckhbw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_unpackhi_pi8 (__m1, __m2);
|
return _mm_unpackhi_pi8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
|
_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_punpckhwd(__m64 __m1, __m64 __m2) {
|
_m_punpckhwd (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_unpackhi_pi16 (__m1, __m2);
|
return _mm_unpackhi_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
|
_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
|
return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_punpckhdq(__m64 __m1, __m64 __m2) {
|
_m_punpckhdq (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_unpackhi_pi32 (__m1, __m2);
|
return _mm_unpackhi_pi32 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
|
_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_punpcklbw(__m64 __m1, __m64 __m2) {
|
_m_punpcklbw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_unpacklo_pi8 (__m1, __m2);
|
return _mm_unpacklo_pi8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
|
_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_punpcklwd(__m64 __m1, __m64 __m2) {
|
_m_punpcklwd (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_unpacklo_pi16 (__m1, __m2);
|
return _mm_unpacklo_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
|
_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
|
return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_punpckldq(__m64 __m1, __m64 __m2) {
|
_m_punpckldq (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_unpacklo_pi32 (__m1, __m2);
|
return _mm_unpacklo_pi32 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_add_pi8(__m64 __m1, __m64 __m2) {
|
_mm_add_pi8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_paddb(__m64 __m1, __m64 __m2) {
|
_m_paddb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_add_pi8 (__m1, __m2);
|
return _mm_add_pi8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_add_pi16(__m64 __m1, __m64 __m2) {
|
_mm_add_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_paddw(__m64 __m1, __m64 __m2) {
|
_m_paddw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_add_pi16 (__m1, __m2);
|
return _mm_add_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_add_pi32(__m64 __m1, __m64 __m2) {
|
_mm_add_pi32 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
|
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_paddd(__m64 __m1, __m64 __m2) {
|
_m_paddd (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_add_pi32 (__m1, __m2);
|
return _mm_add_pi32 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __SSE2__
|
#ifndef __SSE2__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
|
#ifdef __MMX_WITH_SSE__
|
||||||
|
#pragma GCC target("sse2")
|
||||||
|
#else
|
||||||
#pragma GCC target("sse2,mmx")
|
#pragma GCC target("sse2,mmx")
|
||||||
|
#endif
|
||||||
#define __DISABLE_SSE2__
|
#define __DISABLE_SSE2__
|
||||||
#endif /* __SSE2__ */
|
#endif
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_add_si64(__m64 __m1, __m64 __m2) {
|
_mm_add_si64 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
|
return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
|
||||||
}
|
}
|
||||||
#ifdef __DISABLE_SSE2__
|
#ifdef __DISABLE_SSE2__
|
||||||
#undef __DISABLE_SSE2__
|
#undef __DISABLE_SSE2__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_SSE2__ */
|
#endif
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_adds_pi8(__m64 __m1, __m64 __m2) {
|
_mm_adds_pi8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_paddsb(__m64 __m1, __m64 __m2) {
|
_m_paddsb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_adds_pi8 (__m1, __m2);
|
return _mm_adds_pi8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_adds_pi16(__m64 __m1, __m64 __m2) {
|
_mm_adds_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_paddsw(__m64 __m1, __m64 __m2) {
|
_m_paddsw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_adds_pi16 (__m1, __m2);
|
return _mm_adds_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_adds_pu8(__m64 __m1, __m64 __m2) {
|
_mm_adds_pu8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_paddusb(__m64 __m1, __m64 __m2) {
|
_m_paddusb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_adds_pu8 (__m1, __m2);
|
return _mm_adds_pu8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_adds_pu16(__m64 __m1, __m64 __m2) {
|
_mm_adds_pu16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_paddusw(__m64 __m1, __m64 __m2) {
|
_m_paddusw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_adds_pu16 (__m1, __m2);
|
return _mm_adds_pu16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sub_pi8(__m64 __m1, __m64 __m2) {
|
_mm_sub_pi8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psubb(__m64 __m1, __m64 __m2) {
|
_m_psubb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_sub_pi8 (__m1, __m2);
|
return _mm_sub_pi8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sub_pi16(__m64 __m1, __m64 __m2) {
|
_mm_sub_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psubw(__m64 __m1, __m64 __m2) {
|
_m_psubw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_sub_pi16 (__m1, __m2);
|
return _mm_sub_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sub_pi32(__m64 __m1, __m64 __m2) {
|
_mm_sub_pi32 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
|
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psubd(__m64 __m1, __m64 __m2) {
|
_m_psubd (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_sub_pi32 (__m1, __m2);
|
return _mm_sub_pi32 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __SSE2__
|
#ifndef __SSE2__
|
||||||
#pragma GCC push_options
|
#pragma GCC push_options
|
||||||
|
#ifdef __MMX_WITH_SSE__
|
||||||
|
#pragma GCC target("sse2")
|
||||||
|
#else
|
||||||
#pragma GCC target("sse2,mmx")
|
#pragma GCC target("sse2,mmx")
|
||||||
|
#endif
|
||||||
#define __DISABLE_SSE2__
|
#define __DISABLE_SSE2__
|
||||||
#endif /* __SSE2__ */
|
#endif
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sub_si64(__m64 __m1, __m64 __m2) {
|
_mm_sub_si64 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
|
return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
|
||||||
}
|
}
|
||||||
#ifdef __DISABLE_SSE2__
|
#ifdef __DISABLE_SSE2__
|
||||||
#undef __DISABLE_SSE2__
|
#undef __DISABLE_SSE2__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_SSE2__ */
|
#endif
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_subs_pi8(__m64 __m1, __m64 __m2) {
|
_mm_subs_pi8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psubsb(__m64 __m1, __m64 __m2) {
|
_m_psubsb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_subs_pi8 (__m1, __m2);
|
return _mm_subs_pi8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_subs_pi16(__m64 __m1, __m64 __m2) {
|
_mm_subs_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psubsw(__m64 __m1, __m64 __m2) {
|
_m_psubsw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_subs_pi16 (__m1, __m2);
|
return _mm_subs_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_subs_pu8(__m64 __m1, __m64 __m2) {
|
_mm_subs_pu8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psubusb(__m64 __m1, __m64 __m2) {
|
_m_psubusb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_subs_pu8 (__m1, __m2);
|
return _mm_subs_pu8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_subs_pu16(__m64 __m1, __m64 __m2) {
|
_mm_subs_pu16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psubusw(__m64 __m1, __m64 __m2) {
|
_m_psubusw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_subs_pu16 (__m1, __m2);
|
return _mm_subs_pu16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_madd_pi16(__m64 __m1, __m64 __m2) {
|
_mm_madd_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pmaddwd(__m64 __m1, __m64 __m2) {
|
_m_pmaddwd (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_madd_pi16 (__m1, __m2);
|
return _mm_madd_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) {
|
_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pmulhw(__m64 __m1, __m64 __m2) {
|
_m_pmulhw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_mulhi_pi16 (__m1, __m2);
|
return _mm_mulhi_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_mullo_pi16(__m64 __m1, __m64 __m2) {
|
_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pmullw(__m64 __m1, __m64 __m2) {
|
_m_pmullw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_mullo_pi16 (__m1, __m2);
|
return _mm_mullo_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sll_pi16(__m64 __m, __m64 __count) {
|
_mm_sll_pi16 (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
|
return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psllw(__m64 __m, __m64 __count) {
|
_m_psllw (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return _mm_sll_pi16 (__m, __count);
|
return _mm_sll_pi16 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_slli_pi16(__m64 __m, int __count) {
|
_mm_slli_pi16 (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
|
return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psllwi(__m64 __m, int __count) {
|
_m_psllwi (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return _mm_slli_pi16 (__m, __count);
|
return _mm_slli_pi16 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sll_pi32(__m64 __m, __m64 __count) {
|
_mm_sll_pi32 (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
|
return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pslld(__m64 __m, __m64 __count) {
|
_m_pslld (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return _mm_sll_pi32 (__m, __count);
|
return _mm_sll_pi32 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_slli_pi32(__m64 __m, int __count) {
|
_mm_slli_pi32 (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
|
return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pslldi(__m64 __m, int __count) {
|
_m_pslldi (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return _mm_slli_pi32 (__m, __count);
|
return _mm_slli_pi32 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sll_si64(__m64 __m, __m64 __count) {
|
_mm_sll_si64 (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
|
return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psllq(__m64 __m, __m64 __count) {
|
_m_psllq (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return _mm_sll_si64 (__m, __count);
|
return _mm_sll_si64 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_slli_si64(__m64 __m, int __count) {
|
_mm_slli_si64 (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
|
return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psllqi(__m64 __m, int __count) {
|
_m_psllqi (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return _mm_slli_si64 (__m, __count);
|
return _mm_slli_si64 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sra_pi16(__m64 __m, __m64 __count) {
|
_mm_sra_pi16 (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
|
return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psraw(__m64 __m, __m64 __count) {
|
_m_psraw (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return _mm_sra_pi16 (__m, __count);
|
return _mm_sra_pi16 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_srai_pi16(__m64 __m, int __count) {
|
_mm_srai_pi16 (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
|
return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psrawi(__m64 __m, int __count) {
|
_m_psrawi (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return _mm_srai_pi16 (__m, __count);
|
return _mm_srai_pi16 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_sra_pi32(__m64 __m, __m64 __count) {
|
_mm_sra_pi32 (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
|
return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psrad(__m64 __m, __m64 __count) {
|
_m_psrad (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return _mm_sra_pi32 (__m, __count);
|
return _mm_sra_pi32 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_srai_pi32(__m64 __m, int __count) {
|
_mm_srai_pi32 (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
|
return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psradi(__m64 __m, int __count) {
|
_m_psradi (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return _mm_srai_pi32 (__m, __count);
|
return _mm_srai_pi32 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_srl_pi16(__m64 __m, __m64 __count) {
|
_mm_srl_pi16 (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
|
return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psrlw(__m64 __m, __m64 __count) {
|
_m_psrlw (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return _mm_srl_pi16 (__m, __count);
|
return _mm_srl_pi16 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_srli_pi16(__m64 __m, int __count) {
|
_mm_srli_pi16 (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
|
return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psrlwi(__m64 __m, int __count) {
|
_m_psrlwi (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return _mm_srli_pi16 (__m, __count);
|
return _mm_srli_pi16 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_srl_pi32(__m64 __m, __m64 __count) {
|
_mm_srl_pi32 (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
|
return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psrld(__m64 __m, __m64 __count) {
|
_m_psrld (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return _mm_srl_pi32 (__m, __count);
|
return _mm_srl_pi32 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_srli_pi32(__m64 __m, int __count) {
|
_mm_srli_pi32 (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
|
return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psrldi(__m64 __m, int __count) {
|
_m_psrldi (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return _mm_srli_pi32 (__m, __count);
|
return _mm_srli_pi32 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_srl_si64(__m64 __m, __m64 __count) {
|
_mm_srl_si64 (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
|
return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psrlq(__m64 __m, __m64 __count) {
|
_m_psrlq (__m64 __m, __m64 __count)
|
||||||
|
{
|
||||||
return _mm_srl_si64 (__m, __count);
|
return _mm_srl_si64 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_srli_si64(__m64 __m, int __count) {
|
_mm_srli_si64 (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
|
return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_psrlqi(__m64 __m, int __count) {
|
_m_psrlqi (__m64 __m, int __count)
|
||||||
|
{
|
||||||
return _mm_srli_si64 (__m, __count);
|
return _mm_srli_si64 (__m, __count);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_and_si64(__m64 __m1, __m64 __m2) {
|
_mm_and_si64 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return __builtin_ia32_pand (__m1, __m2);
|
return __builtin_ia32_pand (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pand(__m64 __m1, __m64 __m2) {
|
_m_pand (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_and_si64 (__m1, __m2);
|
return _mm_and_si64 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_andnot_si64(__m64 __m1, __m64 __m2) {
|
_mm_andnot_si64 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return __builtin_ia32_pandn (__m1, __m2);
|
return __builtin_ia32_pandn (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pandn(__m64 __m1, __m64 __m2) {
|
_m_pandn (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_andnot_si64 (__m1, __m2);
|
return _mm_andnot_si64 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_or_si64(__m64 __m1, __m64 __m2) {
|
_mm_or_si64 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return __builtin_ia32_por (__m1, __m2);
|
return __builtin_ia32_por (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_por(__m64 __m1, __m64 __m2) {
|
_m_por (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_or_si64 (__m1, __m2);
|
return _mm_or_si64 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_xor_si64(__m64 __m1, __m64 __m2) {
|
_mm_xor_si64 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return __builtin_ia32_pxor (__m1, __m2);
|
return __builtin_ia32_pxor (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pxor(__m64 __m1, __m64 __m2) {
|
_m_pxor (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_xor_si64 (__m1, __m2);
|
return _mm_xor_si64 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) {
|
_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pcmpeqb(__m64 __m1, __m64 __m2) {
|
_m_pcmpeqb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_cmpeq_pi8 (__m1, __m2);
|
return _mm_cmpeq_pi8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) {
|
_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
|
return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pcmpgtb(__m64 __m1, __m64 __m2) {
|
_m_pcmpgtb (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_cmpgt_pi8 (__m1, __m2);
|
return _mm_cmpgt_pi8 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) {
|
_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pcmpeqw(__m64 __m1, __m64 __m2) {
|
_m_pcmpeqw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_cmpeq_pi16 (__m1, __m2);
|
return _mm_cmpeq_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) {
|
_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
|
return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pcmpgtw(__m64 __m1, __m64 __m2) {
|
_m_pcmpgtw (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_cmpgt_pi16 (__m1, __m2);
|
return _mm_cmpgt_pi16 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) {
|
_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
|
return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pcmpeqd(__m64 __m1, __m64 __m2) {
|
_m_pcmpeqd (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_cmpeq_pi32 (__m1, __m2);
|
return _mm_cmpeq_pi32 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) {
|
_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
|
return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _m_pcmpgtd(__m64 __m1, __m64 __m2) {
|
_m_pcmpgtd (__m64 __m1, __m64 __m2)
|
||||||
|
{
|
||||||
return _mm_cmpgt_pi32 (__m1, __m2);
|
return _mm_cmpgt_pi32 (__m1, __m2);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_setzero_si64(void) {
|
_mm_setzero_si64 (void)
|
||||||
|
{
|
||||||
return (__m64)0LL;
|
return (__m64)0LL;
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_set_pi32(int __i1, int __i0) {
|
_mm_set_pi32 (int __i1, int __i0)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
|
return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_set_pi16(short __w3, short __w2, short __w1, short __w0) {
|
_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
|
||||||
|
{
|
||||||
return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
|
return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3,
|
_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
|
||||||
char __b2, char __b1, char __b0) {
|
char __b3, char __b2, char __b1, char __b0)
|
||||||
return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, __b4, __b5,
|
{
|
||||||
__b6, __b7);
|
return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
|
||||||
|
__b4, __b5, __b6, __b7);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_setr_pi32(int __i0, int __i1) {
|
_mm_setr_pi32 (int __i0, int __i1)
|
||||||
|
{
|
||||||
return _mm_set_pi32 (__i1, __i0);
|
return _mm_set_pi32 (__i1, __i0);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
|
_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
|
||||||
|
{
|
||||||
return _mm_set_pi16 (__w3, __w2, __w1, __w0);
|
return _mm_set_pi16 (__w3, __w2, __w1, __w0);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3,
|
_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
|
||||||
char __b4, char __b5, char __b6, char __b7) {
|
char __b4, char __b5, char __b6, char __b7)
|
||||||
|
{
|
||||||
return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
|
return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_set1_pi32(int __i) {
|
_mm_set1_pi32 (int __i)
|
||||||
|
{
|
||||||
return _mm_set_pi32 (__i, __i);
|
return _mm_set_pi32 (__i, __i);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_set1_pi16(short __w) {
|
_mm_set1_pi16 (short __w)
|
||||||
|
{
|
||||||
return _mm_set_pi16 (__w, __w, __w, __w);
|
return _mm_set_pi16 (__w, __w, __w, __w);
|
||||||
}
|
}
|
||||||
|
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
__funline __m64 _mm_set1_pi8(char __b) {
|
_mm_set1_pi8 (char __b)
|
||||||
|
{
|
||||||
return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
|
return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
|
||||||
}
|
}
|
||||||
#ifdef __DISABLE_MMX__
|
#ifdef __DISABLE_MMX__
|
||||||
#undef __DISABLE_MMX__
|
#undef __DISABLE_MMX__
|
||||||
#pragma GCC pop_options
|
#pragma GCC pop_options
|
||||||
#endif /* __DISABLE_MMX__ */
|
#endif
|
||||||
|
#endif
|
||||||
#endif /* __x86_64__ */
|
#endif
|
||||||
#endif /* _MMINTRIN_H_INCLUDED */
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue