mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-05-23 05:42:29 +00:00
Upgrade to Cosmopolitan GCC 11.2.0 for x86_64
This commit is contained in:
parent
682b74ed88
commit
39f20dbb13
137 changed files with 48523 additions and 34001 deletions
1
Makefile
1
Makefile
|
@ -94,6 +94,7 @@ o/$(MODE): \
|
|||
rwc:/dev/shm \
|
||||
rx:build/bootstrap \
|
||||
rx:o/third_party/gcc \
|
||||
r:build/portcosmo.h \
|
||||
/proc/stat \
|
||||
rw:/dev/null \
|
||||
w:o/stack.log \
|
||||
|
|
|
@ -88,11 +88,15 @@ ARCH = x86_64
|
|||
HOSTS ?= freebsd openbsd netbsd rhel7 rhel5 xnu win10
|
||||
endif
|
||||
|
||||
PORTCOSMO_CCFLAGS = -fportcosmo -include build/portcosmo.h
|
||||
|
||||
ifneq ("$(wildcard o/third_party/gcc/bin/x86_64-pc-linux-gnu-*)","")
|
||||
PREFIX = o/third_party/gcc/bin/x86_64-pc-linux-gnu-
|
||||
DEFAULT_CPPFLAGS += $(PORTCOSMO_CCFLAGS)
|
||||
else
|
||||
IGNORE := $(shell build/bootstrap/unbundle.com)
|
||||
PREFIX = o/third_party/gcc/bin/x86_64-linux-musl-
|
||||
DEFAULT_CPPFLAGS += $(PORTCOSMO_CCFLAGS)
|
||||
endif
|
||||
ifeq ($(ARCH), aarch64)
|
||||
PREFIX = o/third_party/gcc/bin/aarch64-linux-musl-
|
||||
|
@ -163,7 +167,7 @@ TRADITIONAL = \
|
|||
-Wno-return-type \
|
||||
-Wno-pointer-sign
|
||||
|
||||
DEFAULT_CCFLAGS = \
|
||||
DEFAULT_CCFLAGS += \
|
||||
-Wall \
|
||||
-Werror \
|
||||
-fdebug-prefix-map='$(PWD)'= \
|
||||
|
@ -206,7 +210,7 @@ MATHEMATICAL = \
|
|||
-O3 \
|
||||
-fwrapv
|
||||
|
||||
DEFAULT_CPPFLAGS = \
|
||||
DEFAULT_CPPFLAGS += \
|
||||
-DCOSMO \
|
||||
-DMODE='"$(MODE)"' \
|
||||
-DIMAGE_BASE_VIRTUAL=$(IMAGE_BASE_VIRTUAL) \
|
||||
|
|
361
build/portcosmo.h
Normal file
361
build/portcosmo.h
Normal file
|
@ -0,0 +1,361 @@
|
|||
#ifndef ACTUALLY_MODS
|
||||
#define ACTUALLY_MODS
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
static const int __tmpcosmo_AF_ALG = -15823936;
|
||||
static const int __tmpcosmo_AF_APPLETALK = -15823820;
|
||||
static const int __tmpcosmo_AF_ASH = -15823924;
|
||||
static const int __tmpcosmo_AF_ATMPVC = -15824070;
|
||||
static const int __tmpcosmo_AF_ATMSVC = -15824056;
|
||||
static const int __tmpcosmo_AF_AX25 = -15824014;
|
||||
static const int __tmpcosmo_AF_BLUETOOTH = -15823992;
|
||||
static const int __tmpcosmo_AF_BRIDGE = -15823812;
|
||||
static const int __tmpcosmo_AF_CAIF = -15823850;
|
||||
static const int __tmpcosmo_AF_CAN = -15823868;
|
||||
static const int __tmpcosmo_AF_ECONET = -15823852;
|
||||
static const int __tmpcosmo_AF_FILE = -15824118;
|
||||
static const int __tmpcosmo_AF_IB = -15823966;
|
||||
static const int __tmpcosmo_AF_IEEE802154 = -15823906;
|
||||
static const int __tmpcosmo_AF_IPX = -15824002;
|
||||
static const int __tmpcosmo_AF_IRDA = -15823860;
|
||||
static const int __tmpcosmo_AF_ISDN = -15823978;
|
||||
static const int __tmpcosmo_AF_IUCV = -15824106;
|
||||
static const int __tmpcosmo_AF_KCM = -15824024;
|
||||
static const int __tmpcosmo_AF_KEY = -15823948;
|
||||
static const int __tmpcosmo_AF_LINK = -15823878;
|
||||
static const int __tmpcosmo_AF_LLC = -15823824;
|
||||
static const int __tmpcosmo_AF_LOCAL = -15823928;
|
||||
static const int __tmpcosmo_AF_MAX = -15824082;
|
||||
static const int __tmpcosmo_AF_MPLS = -15824026;
|
||||
static const int __tmpcosmo_AF_NETBEUI = -15824124;
|
||||
static const int __tmpcosmo_AF_NETLINK = -15824004;
|
||||
static const int __tmpcosmo_AF_NETROM = -15823886;
|
||||
static const int __tmpcosmo_AF_NFC = -15824142;
|
||||
static const int __tmpcosmo_AF_PACKET = -15824028;
|
||||
static const int __tmpcosmo_AF_PHONET = -15823830;
|
||||
static const int __tmpcosmo_AF_PPPOX = -15823876;
|
||||
static const int __tmpcosmo_AF_ROSE = -15824016;
|
||||
static const int __tmpcosmo_AF_ROUTE = -15824100;
|
||||
static const int __tmpcosmo_AF_RXRPC = -15823926;
|
||||
static const int __tmpcosmo_AF_SECURITY = -15824136;
|
||||
static const int __tmpcosmo_AF_SNA = -15823950;
|
||||
static const int __tmpcosmo_AF_TIPC = -15824034;
|
||||
static const int __tmpcosmo_AF_VSOCK = -15824146;
|
||||
static const int __tmpcosmo_AF_WANPIPE = -15823960;
|
||||
static const int __tmpcosmo_AF_X25 = -15823864;
|
||||
static const int __tmpcosmo_E2BIG = -15823698;
|
||||
static const int __tmpcosmo_EACCES = -15823580;
|
||||
static const int __tmpcosmo_EADDRINUSE = -15823756;
|
||||
static const int __tmpcosmo_EADDRNOTAVAIL = -15823592;
|
||||
static const int __tmpcosmo_EADV = -15823574;
|
||||
static const int __tmpcosmo_EAFNOSUPPORT = -15823748;
|
||||
static const int __tmpcosmo_EAGAIN = -15823506;
|
||||
static const int __tmpcosmo_EALREADY = -15823530;
|
||||
static const int __tmpcosmo_EAUTH = -15823702;
|
||||
static const int __tmpcosmo_EBADARCH = -15823738;
|
||||
static const int __tmpcosmo_EBADE = -15823740;
|
||||
static const int __tmpcosmo_EBADEXEC = -15823684;
|
||||
static const int __tmpcosmo_EBADF = -15823744;
|
||||
static const int __tmpcosmo_EBADFD = -15823554;
|
||||
static const int __tmpcosmo_EBADMACHO = -15823618;
|
||||
static const int __tmpcosmo_EBADMSG = -15823650;
|
||||
static const int __tmpcosmo_EBADR = -15823570;
|
||||
static const int __tmpcosmo_EBADRPC = -15823626;
|
||||
static const int __tmpcosmo_EBADRQC = -15823688;
|
||||
static const int __tmpcosmo_EBADSLT = -15823788;
|
||||
static const int __tmpcosmo_EBUSY = -15823550;
|
||||
static const int __tmpcosmo_ECANCELED = -15823676;
|
||||
static const int __tmpcosmo_ECHILD = -15823662;
|
||||
static const int __tmpcosmo_ECHRNG = -15823722;
|
||||
static const int __tmpcosmo_ECOMM = -15823634;
|
||||
static const int __tmpcosmo_ECONNABORTED = -15823616;
|
||||
static const int __tmpcosmo_ECONNREFUSED = -15823556;
|
||||
static const int __tmpcosmo_ECONNRESET = -15823548;
|
||||
static const int __tmpcosmo_EDEADLK = -15823718;
|
||||
static const int __tmpcosmo_EDESTADDRREQ = -15823658;
|
||||
static const int __tmpcosmo_EDEVERR = -15823518;
|
||||
static const int __tmpcosmo_EDOM = -15823798;
|
||||
static const int __tmpcosmo_EDOTDOT = -15823726;
|
||||
static const int __tmpcosmo_EDQUOT = -15823620;
|
||||
static const int __tmpcosmo_EEXIST = -15823594;
|
||||
static const int __tmpcosmo_EFAULT = -15823686;
|
||||
static const int __tmpcosmo_EFBIG = -15823768;
|
||||
static const int __tmpcosmo_EFTYPE = -15823568;
|
||||
static const int __tmpcosmo_EHOSTDOWN = -15823596;
|
||||
static const int __tmpcosmo_EHOSTUNREACH = -15823742;
|
||||
static const int __tmpcosmo_EHWPOISON = -15823680;
|
||||
static const int __tmpcosmo_EIDRM = -15823644;
|
||||
static const int __tmpcosmo_EILSEQ = -15823540;
|
||||
static const int __tmpcosmo_EINPROGRESS = -15823720;
|
||||
static const int __tmpcosmo_EINTR = -15823710;
|
||||
static const int __tmpcosmo_EINVAL = -15823624;
|
||||
static const int __tmpcosmo_EIO = -15823544;
|
||||
static const int __tmpcosmo_EISCONN = -15823704;
|
||||
static const int __tmpcosmo_EISDIR = -15823758;
|
||||
static const int __tmpcosmo_EISNAM = -15823682;
|
||||
static const int __tmpcosmo_EKEYEXPIRED = -15823520;
|
||||
static const int __tmpcosmo_EKEYREJECTED = -15823712;
|
||||
static const int __tmpcosmo_EKEYREVOKED = -15823780;
|
||||
static const int __tmpcosmo_EL2HLT = -15823510;
|
||||
static const int __tmpcosmo_EL2NSYNC = -15823670;
|
||||
static const int __tmpcosmo_EL3HLT = -15823792;
|
||||
static const int __tmpcosmo_EL3RST = -15823654;
|
||||
static const int __tmpcosmo_ELIBACC = -15823708;
|
||||
static const int __tmpcosmo_ELIBBAD = -15823564;
|
||||
static const int __tmpcosmo_ELIBEXEC = -15823696;
|
||||
static const int __tmpcosmo_ELIBMAX = -15823724;
|
||||
static const int __tmpcosmo_ELIBSCN = -15823786;
|
||||
static const int __tmpcosmo_ELNRNG = -15823732;
|
||||
static const int __tmpcosmo_ELOOP = -15823672;
|
||||
static const int __tmpcosmo_EMEDIUMTYPE = -15823508;
|
||||
static const int __tmpcosmo_EMFILE = -15823762;
|
||||
static const int __tmpcosmo_EMLINK = -15823694;
|
||||
static const int __tmpcosmo_EMSGSIZE = -15823536;
|
||||
static const int __tmpcosmo_EMULTIHOP = -15823750;
|
||||
static const int __tmpcosmo_ENAMETOOLONG = -15823600;
|
||||
static const int __tmpcosmo_ENAVAIL = -15823656;
|
||||
static const int __tmpcosmo_ENEEDAUTH = -15823766;
|
||||
static const int __tmpcosmo_ENETDOWN = -15823730;
|
||||
static const int __tmpcosmo_ENETRESET = -15823604;
|
||||
static const int __tmpcosmo_ENETUNREACH = -15823524;
|
||||
static const int __tmpcosmo_ENFILE = -15823700;
|
||||
static const int __tmpcosmo_ENOANO = -15823734;
|
||||
static const int __tmpcosmo_ENOATTR = -15823606;
|
||||
static const int __tmpcosmo_ENOBUFS = -15823628;
|
||||
static const int __tmpcosmo_ENOCSI = -15823760;
|
||||
static const int __tmpcosmo_ENODATA = -15823516;
|
||||
static const int __tmpcosmo_ENODEV = -15823774;
|
||||
static const int __tmpcosmo_ENOENT = -15823590;
|
||||
static const int __tmpcosmo_ENOEXEC = -15823512;
|
||||
static const int __tmpcosmo_ENOKEY = -15823764;
|
||||
static const int __tmpcosmo_ENOLCK = -15823782;
|
||||
static const int __tmpcosmo_ENOLINK = -15823538;
|
||||
static const int __tmpcosmo_ENOMEDIUM = -15823598;
|
||||
static const int __tmpcosmo_ENOMEM = -15823514;
|
||||
static const int __tmpcosmo_ENOMSG = -15823796;
|
||||
static const int __tmpcosmo_ENONET = -15823642;
|
||||
static const int __tmpcosmo_ENOPKG = -15823664;
|
||||
static const int __tmpcosmo_ENOPOLICY = -15823716;
|
||||
static const int __tmpcosmo_ENOPROTOOPT = -15823608;
|
||||
static const int __tmpcosmo_ENOSPC = -15823646;
|
||||
static const int __tmpcosmo_ENOSR = -15823558;
|
||||
static const int __tmpcosmo_ENOSTR = -15823706;
|
||||
static const int __tmpcosmo_ENOSYS = -15823636;
|
||||
static const int __tmpcosmo_ENOTBLK = -15823640;
|
||||
static const int __tmpcosmo_ENOTCONN = -15823778;
|
||||
static const int __tmpcosmo_ENOTDIR = -15823648;
|
||||
static const int __tmpcosmo_ENOTEMPTY = -15823552;
|
||||
static const int __tmpcosmo_ENOTNAM = -15823532;
|
||||
static const int __tmpcosmo_ENOTRECOVERABLE = -15823746;
|
||||
static const int __tmpcosmo_ENOTSOCK = -15823582;
|
||||
static const int __tmpcosmo_ENOTSUP = -15823602;
|
||||
static const int __tmpcosmo_ENOTTY = -15823528;
|
||||
static const int __tmpcosmo_ENOTUNIQ = -15823790;
|
||||
static const int __tmpcosmo_ENXIO = -15823622;
|
||||
static const int __tmpcosmo_EOPNOTSUPP = -15823588;
|
||||
static const int __tmpcosmo_EOVERFLOW = -15823736;
|
||||
static const int __tmpcosmo_EOWNERDEAD = -15823562;
|
||||
static const int __tmpcosmo_EPERM = -15823754;
|
||||
static const int __tmpcosmo_EPFNOSUPPORT = -15823690;
|
||||
static const int __tmpcosmo_EPIPE = -15823534;
|
||||
static const int __tmpcosmo_EPROCLIM = -15823610;
|
||||
static const int __tmpcosmo_EPROCUNAVAIL = -15823546;
|
||||
static const int __tmpcosmo_EPROGMISMATCH = -15823572;
|
||||
static const int __tmpcosmo_EPROGUNAVAIL = -15823526;
|
||||
static const int __tmpcosmo_EPROTO = -15823678;
|
||||
static const int __tmpcosmo_EPROTONOSUPPORT = -15823576;
|
||||
static const int __tmpcosmo_EPROTOTYPE = -15823614;
|
||||
static const int __tmpcosmo_EPWROFF = -15823692;
|
||||
static const int __tmpcosmo_ERANGE = -15823772;
|
||||
static const int __tmpcosmo_EREMCHG = -15823666;
|
||||
static const int __tmpcosmo_EREMOTE = -15823560;
|
||||
static const int __tmpcosmo_EREMOTEIO = -15823794;
|
||||
static const int __tmpcosmo_ERESTART = -15823728;
|
||||
static const int __tmpcosmo_ERFKILL = -15823612;
|
||||
static const int __tmpcosmo_EROFS = -15823566;
|
||||
static const int __tmpcosmo_ERPCMISMATCH = -15823542;
|
||||
static const int __tmpcosmo_ESHLIBVERS = -15823584;
|
||||
static const int __tmpcosmo_ESHUTDOWN = -15823660;
|
||||
static const int __tmpcosmo_ESOCKTNOSUPPORT = -15823776;
|
||||
static const int __tmpcosmo_ESPIPE = -15823652;
|
||||
static const int __tmpcosmo_ESRCH = -15823674;
|
||||
static const int __tmpcosmo_ESRMNT = -15823714;
|
||||
static const int __tmpcosmo_ESTALE = -15823632;
|
||||
static const int __tmpcosmo_ESTRPIPE = -15823770;
|
||||
static const int __tmpcosmo_ETIME = -15823630;
|
||||
static const int __tmpcosmo_ETIMEDOUT = -15823522;
|
||||
static const int __tmpcosmo_ETOOMANYREFS = -15823586;
|
||||
static const int __tmpcosmo_ETXTBSY = -15823638;
|
||||
static const int __tmpcosmo_EUCLEAN = -15823578;
|
||||
static const int __tmpcosmo_EUNATCH = -15823504;
|
||||
static const int __tmpcosmo_EUSERS = -15823668;
|
||||
static const int __tmpcosmo_EXDEV = -15823752;
|
||||
static const int __tmpcosmo_EXFULL = -15823784;
|
||||
static const int __tmpcosmo_F_DUPFD_CLOEXEC = -15823938;
|
||||
static const int __tmpcosmo_F_GETLEASE = -15823862;
|
||||
static const int __tmpcosmo_F_GETLK = -15823916;
|
||||
static const int __tmpcosmo_F_GETLK64 = -15823846;
|
||||
static const int __tmpcosmo_F_GETOWN = -15824116;
|
||||
static const int __tmpcosmo_F_GETPATH = -15824128;
|
||||
static const int __tmpcosmo_F_GETPIPE_SZ = -15824006;
|
||||
static const int __tmpcosmo_F_GETSIG = -15824112;
|
||||
static const int __tmpcosmo_F_MAXFD = -15823896;
|
||||
static const int __tmpcosmo_F_NOCACHE = -15824048;
|
||||
static const int __tmpcosmo_F_NOTIFY = -15823898;
|
||||
static const int __tmpcosmo_F_RDLCK = -15823826;
|
||||
static const int __tmpcosmo_F_SETLEASE = -15823884;
|
||||
static const int __tmpcosmo_F_SETLK = -15824088;
|
||||
static const int __tmpcosmo_F_SETLK64 = -15824154;
|
||||
static const int __tmpcosmo_F_SETLKW = -15824096;
|
||||
static const int __tmpcosmo_F_SETLKW64 = -15824104;
|
||||
static const int __tmpcosmo_F_SETOWN = -15823874;
|
||||
static const int __tmpcosmo_F_SETPIPE_SZ = -15823958;
|
||||
static const int __tmpcosmo_F_SETSIG = -15823832;
|
||||
static const int __tmpcosmo_F_UNLCK = -15824148;
|
||||
static const int __tmpcosmo_F_WRLCK = -15824058;
|
||||
static const int __tmpcosmo_IFF_ALLMULTI = -15824140;
|
||||
static const int __tmpcosmo_IFF_AUTOMEDIA = -15823962;
|
||||
static const int __tmpcosmo_IFF_DYNAMIC = -15823848;
|
||||
static const int __tmpcosmo_IFF_MASTER = -15823900;
|
||||
static const int __tmpcosmo_IFF_MULTICAST = -15824000;
|
||||
static const int __tmpcosmo_IFF_NOARP = -15823802;
|
||||
static const int __tmpcosmo_IFF_NOTRAILERS = -15824130;
|
||||
static const int __tmpcosmo_IFF_POINTOPOINT = -15824138;
|
||||
static const int __tmpcosmo_IFF_PORTSEL = -15824150;
|
||||
static const int __tmpcosmo_IFF_PROMISC = -15824010;
|
||||
static const int __tmpcosmo_IFF_RUNNING = -15824080;
|
||||
static const int __tmpcosmo_IFF_SLAVE = -15824022;
|
||||
static const int __tmpcosmo_LOCAL_PEERCRED = -15823986;
|
||||
static const int __tmpcosmo_SIGBUS = -15824132;
|
||||
static const int __tmpcosmo_SIGCHLD = -15824036;
|
||||
static const int __tmpcosmo_SIGCONT = -15823836;
|
||||
static const int __tmpcosmo_SIGEMT = -15823972;
|
||||
static const int __tmpcosmo_SIGINFO = -15824086;
|
||||
static const int __tmpcosmo_SIGIO = -15823912;
|
||||
static const int __tmpcosmo_SIGPOLL = -15823854;
|
||||
static const int __tmpcosmo_SIGPWR = -15824114;
|
||||
static const int __tmpcosmo_SIGRTMAX = -15824040;
|
||||
static const int __tmpcosmo_SIGRTMIN = -15824134;
|
||||
static const int __tmpcosmo_SIGSTKFLT = -15823934;
|
||||
static const int __tmpcosmo_SIGSTOP = -15824158;
|
||||
static const int __tmpcosmo_SIGSYS = -15823922;
|
||||
static const int __tmpcosmo_SIGTHR = -15823902;
|
||||
static const int __tmpcosmo_SIGTSTP = -15823988;
|
||||
static const int __tmpcosmo_SIGUNUSED = -15823970;
|
||||
static const int __tmpcosmo_SIGURG = -15823952;
|
||||
static const int __tmpcosmo_SIGUSR1 = -15824018;
|
||||
static const int __tmpcosmo_SIGUSR2 = -15823998;
|
||||
static const int __tmpcosmo_SIG_BLOCK = -15823800;
|
||||
static const int __tmpcosmo_SIG_SETMASK = -15824090;
|
||||
static const int __tmpcosmo_SIG_UNBLOCK = -15824078;
|
||||
static const int __tmpcosmo_SOL_AAL = -15823976;
|
||||
static const int __tmpcosmo_SOL_ALG = -15823956;
|
||||
static const int __tmpcosmo_SOL_ATM = -15823914;
|
||||
static const int __tmpcosmo_SOL_BLUETOOTH = -15824062;
|
||||
static const int __tmpcosmo_SOL_CAIF = -15823904;
|
||||
static const int __tmpcosmo_SOL_DCCP = -15823814;
|
||||
static const int __tmpcosmo_SOL_DECNET = -15823842;
|
||||
static const int __tmpcosmo_SOL_ICMPV6 = -15823908;
|
||||
static const int __tmpcosmo_SOL_IPV6 = -15823808;
|
||||
static const int __tmpcosmo_SOL_IRDA = -15823880;
|
||||
static const int __tmpcosmo_SOL_IUCV = -15824156;
|
||||
static const int __tmpcosmo_SOL_KCM = -15824092;
|
||||
static const int __tmpcosmo_SOL_LLC = -15823930;
|
||||
static const int __tmpcosmo_SOL_NETBEUI = -15823894;
|
||||
static const int __tmpcosmo_SOL_NETLINK = -15824012;
|
||||
static const int __tmpcosmo_SOL_NFC = -15823942;
|
||||
static const int __tmpcosmo_SOL_PACKET = -15823806;
|
||||
static const int __tmpcosmo_SOL_PNPIPE = -15823968;
|
||||
static const int __tmpcosmo_SOL_PPPOL2TP = -15823816;
|
||||
static const int __tmpcosmo_SOL_RAW = -15824044;
|
||||
static const int __tmpcosmo_SOL_RDS = -15824020;
|
||||
static const int __tmpcosmo_SOL_RXRPC = -15823984;
|
||||
static const int __tmpcosmo_SOL_SOCKET = -15824050;
|
||||
static const int __tmpcosmo_SOL_TIPC = -15823940;
|
||||
static const int __tmpcosmo_SOL_X25 = -15823856;
|
||||
static const int __tmpcosmo_SO_ACCEPTCONN = -15823872;
|
||||
static const int __tmpcosmo_SO_ATTACH_BPF = -15824072;
|
||||
static const int __tmpcosmo_SO_ATTACH_FILTER = -15824094;
|
||||
static const int __tmpcosmo_SO_ATTACH_REUSEPORT_CBPF = -15823964;
|
||||
static const int __tmpcosmo_SO_ATTACH_REUSEPORT_EBPF = -15824060;
|
||||
static const int __tmpcosmo_SO_BINDTODEVICE = -15823990;
|
||||
static const int __tmpcosmo_SO_BPF_EXTENSIONS = -15824030;
|
||||
static const int __tmpcosmo_SO_BROADCAST = -15823882;
|
||||
static const int __tmpcosmo_SO_BSDCOMPAT = -15824038;
|
||||
static const int __tmpcosmo_SO_BUSY_POLL = -15823944;
|
||||
static const int __tmpcosmo_SO_CNX_ADVICE = -15823828;
|
||||
static const int __tmpcosmo_SO_DETACH_BPF = -15824068;
|
||||
static const int __tmpcosmo_SO_DETACH_FILTER = -15824032;
|
||||
static const int __tmpcosmo_SO_DOMAIN = -15823980;
|
||||
static const int __tmpcosmo_SO_DONTROUTE = -15823918;
|
||||
static const int __tmpcosmo_SO_ERROR = -15823892;
|
||||
static const int __tmpcosmo_SO_EXCLUSIVEADDRUSE = -15823858;
|
||||
static const int __tmpcosmo_SO_GET_FILTER = -15823834;
|
||||
static const int __tmpcosmo_SO_INCOMING_CPU = -15824074;
|
||||
static const int __tmpcosmo_SO_KEEPALIVE = -15823890;
|
||||
static const int __tmpcosmo_SO_LINGER = -15824084;
|
||||
static const int __tmpcosmo_SO_LOCK_FILTER = -15823804;
|
||||
static const int __tmpcosmo_SO_MARK = -15824008;
|
||||
static const int __tmpcosmo_SO_MAX_PACING_RATE = -15824120;
|
||||
static const int __tmpcosmo_SO_NOFCS = -15823818;
|
||||
static const int __tmpcosmo_SO_NO_CHECK = -15824152;
|
||||
static const int __tmpcosmo_SO_OOBINLINE = -15823838;
|
||||
static const int __tmpcosmo_SO_PASSCRED = -15823888;
|
||||
static const int __tmpcosmo_SO_PASSSEC = -15823866;
|
||||
static const int __tmpcosmo_SO_PEEK_OFF = -15823870;
|
||||
static const int __tmpcosmo_SO_PEERCRED = -15823954;
|
||||
static const int __tmpcosmo_SO_PEERNAME = -15824042;
|
||||
static const int __tmpcosmo_SO_PEERSEC = -15823844;
|
||||
static const int __tmpcosmo_SO_PRIORITY = -15824122;
|
||||
static const int __tmpcosmo_SO_PROTOCOL = -15823982;
|
||||
static const int __tmpcosmo_SO_RCVBUF = -15823974;
|
||||
static const int __tmpcosmo_SO_RCVBUFFORCE = -15823994;
|
||||
static const int __tmpcosmo_SO_RCVLOWAT = -15824076;
|
||||
static const int __tmpcosmo_SO_RCVTIMEO = -15824046;
|
||||
static const int __tmpcosmo_SO_REUSEADDR = -15823810;
|
||||
static const int __tmpcosmo_SO_REUSEPORT = -15823822;
|
||||
static const int __tmpcosmo_SO_RXQ_OVFL = -15824066;
|
||||
static const int __tmpcosmo_SO_SECURITY_AUTHENTICATION = -15824098;
|
||||
static const int __tmpcosmo_SO_SECURITY_ENCRYPTION_NETWORK = -15824126;
|
||||
static const int __tmpcosmo_SO_SELECT_ERR_QUEUE = -15824052;
|
||||
static const int __tmpcosmo_SO_SETFIB = -15823920;
|
||||
static const int __tmpcosmo_SO_SNDBUF = -15824102;
|
||||
static const int __tmpcosmo_SO_SNDBUFFORCE = -15823840;
|
||||
static const int __tmpcosmo_SO_SNDLOWAT = -15823946;
|
||||
static const int __tmpcosmo_SO_SNDTIMEO = -15824064;
|
||||
static const int __tmpcosmo_SO_TIMESTAMP = -15823932;
|
||||
static const int __tmpcosmo_SO_TIMESTAMPING = -15824054;
|
||||
static const int __tmpcosmo_SO_TIMESTAMPNS = -15823910;
|
||||
static const int __tmpcosmo_SO_TYPE = -15824144;
|
||||
static const int __tmpcosmo_SO_USELOOPBACK = -15824110;
|
||||
static const int __tmpcosmo_SO_WIFI_STATUS = -15824108;
|
||||
static const unsigned int __tmpcosmo_B1000000 = 15823512;
|
||||
static const unsigned int __tmpcosmo_B110 = 15823518;
|
||||
static const unsigned int __tmpcosmo_B115200 = 15823540;
|
||||
static const unsigned int __tmpcosmo_B1152000 = 15823538;
|
||||
static const unsigned int __tmpcosmo_B1200 = 15823548;
|
||||
static const unsigned int __tmpcosmo_B134 = 15823510;
|
||||
static const unsigned int __tmpcosmo_B150 = 15823542;
|
||||
static const unsigned int __tmpcosmo_B1500000 = 15823508;
|
||||
static const unsigned int __tmpcosmo_B1800 = 15823522;
|
||||
static const unsigned int __tmpcosmo_B19200 = 15823546;
|
||||
static const unsigned int __tmpcosmo_B200 = 15823528;
|
||||
static const unsigned int __tmpcosmo_B2000000 = 15823524;
|
||||
static const unsigned int __tmpcosmo_B230400 = 15823516;
|
||||
static const unsigned int __tmpcosmo_B2400 = 15823526;
|
||||
static const unsigned int __tmpcosmo_B2500000 = 15823558;
|
||||
static const unsigned int __tmpcosmo_B300 = 15823534;
|
||||
static const unsigned int __tmpcosmo_B3000000 = 15823530;
|
||||
static const unsigned int __tmpcosmo_B3500000 = 15823544;
|
||||
static const unsigned int __tmpcosmo_B38400 = 15823514;
|
||||
static const unsigned int __tmpcosmo_B4000000 = 15823520;
|
||||
static const unsigned int __tmpcosmo_B4800 = 15823556;
|
||||
static const unsigned int __tmpcosmo_B50 = 15823532;
|
||||
static const unsigned int __tmpcosmo_B500000 = 15823550;
|
||||
static const unsigned int __tmpcosmo_B57600 = 15823552;
|
||||
static const unsigned int __tmpcosmo_B576000 = 15823506;
|
||||
static const unsigned int __tmpcosmo_B600 = 15823554;
|
||||
static const unsigned int __tmpcosmo_B75 = 15823536;
|
||||
static const unsigned int __tmpcosmo_B9600 = 15823504;
|
||||
static const unsigned short __tmpcosmo_AF_INET6 = 58236;
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* ACTUALLY_MODS */
|
|
@ -1,11 +1,27 @@
|
|||
#ifndef COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_
|
||||
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_
|
||||
|
||||
#ifdef COSMO
|
||||
#define timespec_get __timespec_get
|
||||
#define timespec_getres __timespec_getres
|
||||
#define timespec_cmp __timespec_cmp
|
||||
#define timespec_tomicros __timespec_tomicros
|
||||
#define timespec_tomillis __timespec_tomillis
|
||||
#define timespec_tonanos __timespec_tonanos
|
||||
#define timespec_add __timespec_add
|
||||
#define timespec_fromnanos __timespec_fromnanos
|
||||
#define timespec_frommicros __timespec_frommicros
|
||||
#define timespec_frommillis __timespec_frommillis
|
||||
#define timespec_real __timespec_real
|
||||
#define timespec_mono __timespec_mono
|
||||
#define timespec_sleep __timespec_sleep
|
||||
#define timespec_sleep_until __timespec_sleep_until
|
||||
#define timespec_sub __timespec_sub
|
||||
#endif /* COSMO */
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
#define timespec_zero ((struct timespec){0})
|
||||
#define timespec_max ((struct timespec){0x7fffffffffffffff, 999999999})
|
||||
|
||||
struct timespec {
|
||||
int64_t tv_sec;
|
||||
int64_t tv_nsec; /* nanoseconds */
|
||||
|
@ -18,9 +34,14 @@ int futimens(int, const struct timespec[2]);
|
|||
int nanosleep(const struct timespec *, struct timespec *);
|
||||
int sys_futex(int *, int, int, const struct timespec *, int *);
|
||||
int utimensat(int, const char *, const struct timespec[2], int);
|
||||
|
||||
#ifdef COSMO
|
||||
/* cosmopolitan libc's non-posix timespec library
|
||||
removed by default due to emacs codebase clash */
|
||||
#define timespec_zero ((struct timespec){0})
|
||||
#define timespec_max ((struct timespec){0x7fffffffffffffff, 999999999})
|
||||
int timespec_get(struct timespec *, int);
|
||||
int timespec_getres(struct timespec *, int);
|
||||
|
||||
int timespec_cmp(struct timespec, struct timespec) pureconst;
|
||||
int64_t timespec_tomicros(struct timespec) pureconst;
|
||||
int64_t timespec_tomillis(struct timespec) pureconst;
|
||||
|
@ -34,6 +55,7 @@ struct timespec timespec_mono(void);
|
|||
struct timespec timespec_sleep(struct timespec);
|
||||
int timespec_sleep_until(struct timespec);
|
||||
struct timespec timespec_sub(struct timespec, struct timespec) pureconst;
|
||||
#endif /* COSMO */
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -2,6 +2,16 @@
|
|||
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMEVAL_H_
|
||||
#include "libc/calls/struct/timespec.h"
|
||||
#include "libc/time/struct/timezone.h"
|
||||
|
||||
#ifdef COSMO
|
||||
#define timeval_cmp __timeval_cmp
|
||||
#define timeval_frommicros __timeval_frommicros
|
||||
#define timeval_frommillis __timeval_frommillis
|
||||
#define timeval_add __timeval_add
|
||||
#define timeval_sub __timeval_sub
|
||||
#define timeval_totimespec __timeval_totimespec
|
||||
#endif /* COSMO */
|
||||
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
COSMOPOLITAN_C_START_
|
||||
|
||||
|
@ -16,6 +26,9 @@ int gettimeofday(struct timeval *, struct timezone *);
|
|||
int lutimes(const char *, const struct timeval[2]);
|
||||
int utimes(const char *, const struct timeval[2]);
|
||||
|
||||
#ifdef COSMO
|
||||
/* cosmopolitan libc's non-posix timevals library
|
||||
removed by default due to emacs codebase clash */
|
||||
int timeval_cmp(struct timeval, struct timeval) pureconst;
|
||||
struct timeval timeval_frommicros(int64_t) pureconst;
|
||||
struct timeval timeval_frommillis(int64_t) pureconst;
|
||||
|
@ -23,6 +36,7 @@ struct timeval timeval_add(struct timeval, struct timeval) pureconst;
|
|||
struct timeval timeval_sub(struct timeval, struct timeval) pureconst;
|
||||
struct timeval timespec_totimeval(struct timespec) pureconst;
|
||||
struct timespec timeval_totimespec(struct timeval) pureconst;
|
||||
#endif /* COSMO */
|
||||
|
||||
COSMOPOLITAN_C_END_
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
|
|
|
@ -725,6 +725,9 @@ void abort(void) wontreturn;
|
|||
#endif /* GCC8+ */
|
||||
#if __GNUC__ + 0 >= 9
|
||||
#pragma GCC diagnostic ignored /* "always true" breaks dce */ "-Waddress"
|
||||
#if __GNUC__ >= 11
|
||||
#pragma GCC diagnostic ignored /* orwellian */ "-Wold-style-definition"
|
||||
#endif /* GCC11+ */
|
||||
#endif /* GCC9+ */
|
||||
#endif /* !C++ */
|
||||
#endif /* GCC && !LLVM */
|
||||
|
|
238
third_party/gcc/README.cosmo
vendored
238
third_party/gcc/README.cosmo
vendored
|
@ -1,232 +1,28 @@
|
|||
This is a modern statically-linked GNU C2X toolchain.
|
||||
DESCRIPTION
|
||||
|
||||
You have the freedom to obtain the original sources to these binaries,
|
||||
and build ones just like them, by visiting:
|
||||
Cosmopolitan GCC
|
||||
Prebuilt x86_64-linux binaries
|
||||
An APE-friendly C/C++ compiler
|
||||
|
||||
https://www.gnu.org/
|
||||
https://github.com/richfelker/musl-cross-make
|
||||
LICENSE
|
||||
|
||||
The musl-cross-make tool also produces libraries and header files. We've
|
||||
only vendored the statically-linked executable files, since Cosmopolitan
|
||||
won't depend on GPL-licensed headers / runtime libraries.
|
||||
GPLv3 and other licenses (see LICENSE.txt)
|
||||
|
||||
We haven't made any modifications to the original software. The versions
|
||||
we chose are documented in $PKG/LICENSE.txt. Here's our Musl
|
||||
build config for maximum transparency:
|
||||
ORIGIN
|
||||
|
||||
commit 38e52db8358c043ae82b346a2e6e66bc86a53bc1
|
||||
Author: Rich Felker <dalias@aerifal.cx>
|
||||
Date: Wed Dec 18 14:29:07 2019 -0500
|
||||
@ahgamut's musl-cross-make fork
|
||||
https://github.com/ahgamut/musl-cross-make/
|
||||
d0f33e2162cf5e5b30cdf3b3accc0d0f7756830c
|
||||
|
||||
switch linux kernel headers to 4.19.88 by default
|
||||
MODIFICATIONS
|
||||
|
||||
using slim headers-only version. this change is needed to support all
|
||||
future versions of musl on 32-bit archs, since prior to 4.16 the
|
||||
kernel headers had incompatibility with userspace time_t not matching
|
||||
the kernel's old (32-bit) time_t. support for older headers will be
|
||||
dropped entirely soon.
|
||||
ahgamut's musl-cross-make fork includes a 2kLOC patch that modifies
|
||||
GCC so it'll compile C code like `switch(errno){case EINVAL: etc.}`
|
||||
|
||||
TARGET = x86_64-linux-musl
|
||||
OUTPUT = /opt/cross9
|
||||
GCC_VER = 9.2.0
|
||||
export LANG=en_US.UTF-8
|
||||
export LC_CTYPE=en_US.UTF-8
|
||||
COMMON_CONFIG += CC="/opt/cross9/bin/x86_64-linux-musl-cc -static --static -g -Os -ftree-vectorize -fvect-cost-model=unlimited -mstringop-strategy=vector_loop -save-temps -fno-ident"
|
||||
COMMON_CONFIG += CXX="/opt/cross9/bin/x86_64-linux-musl-c++ -static --static -g -Os -ftree-vectorize -fvect-cost-model=unlimited -mstringop-strategy=vector_loop -save-temps -fno-ident"
|
||||
COMMON_CONFIG += LD="/opt/cross9/bin/x86_64-linux-musl-ld --build-id=none"
|
||||
COMMON_CONFIG += NM="/opt/cross9/bin/x86_64-linux-musl-nm"
|
||||
COMMON_CONFIG += LDFLAGS="-Wl,--build-id=none"
|
||||
COMMON_CONFIG += OBJCOPY="/opt/cross9/bin/x86_64-linux-musl-objcopy"
|
||||
COMMON_CONFIG += --disable-nls --disable-lto
|
||||
GCC_CONFIG += --enable-languages=c,c++
|
||||
GCC_CONFIG += --disable-multilib
|
||||
GCC_CONFIG += --with-gnu-as
|
||||
GCC_CONFIG += --with-gnu-ld
|
||||
GCC_CONFIG += --disable-multilib
|
||||
GCC_CONFIG += --enable-sjlj-exceptions
|
||||
GCC_CONFIG += --disable-threads
|
||||
GCC_CONFIG += --disable-tls
|
||||
COMMON_CONFIG += --with-debug-prefix-map=$(CURDIR)=
|
||||
SEE ALSO
|
||||
|
||||
#!/bin/sh
|
||||
set -e
|
||||
export LC_ALL=C
|
||||
export GUNZ="/bin/gzip --rsyncable -9 -c"
|
||||
BASE=/opt/cross9
|
||||
PKG=third_party/gcc
|
||||
VERS=9.2.0
|
||||
third_party/gcc/portcosmo.patch
|
||||
|
||||
if [ ! -d $BASE ]; then
|
||||
echo error: run make install >&2
|
||||
exit 1
|
||||
fi
|
||||
NOTES
|
||||
|
||||
if [ -d $BASE/$PKG ]; then
|
||||
rm -rf $BASE/$PKG
|
||||
fi
|
||||
|
||||
mkdir -p $BASE/$PKG/bin
|
||||
mkdir -p $BASE/$PKG/libexec/gcc/x86_64-linux-musl/$VERS
|
||||
mkdir -p $BASE/$PKG/x86_64-linux-musl/bin
|
||||
|
||||
cp $BASE/bin/x86_64-linux-musl-gcov-dump $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
||||
cp $BASE/bin/x86_64-linux-musl-cc $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
||||
cp $BASE/bin/x86_64-linux-musl-addr2line $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
||||
cp $BASE/bin/x86_64-linux-musl-ar $BASE/$PKG/bin/x86_64-linux-musl-ar
|
||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
||||
cp $BASE/bin/x86_64-linux-musl-c++ $BASE/$PKG/bin/x86_64-linux-musl-g++
|
||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/collect2 $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
||||
cp $BASE/bin/x86_64-linux-musl-gcc-nm $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
||||
cp $BASE/bin/x86_64-linux-musl-c++filt $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
||||
cp $BASE/bin/x86_64-linux-musl-elfedit $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
||||
cp $BASE/bin/x86_64-linux-musl-ld $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
||||
cp $BASE/bin/x86_64-linux-musl-size $BASE/$PKG/bin/x86_64-linux-musl-size
|
||||
cp $BASE/bin/x86_64-linux-musl-strings $BASE/$PKG/bin/x86_64-linux-musl-strings
|
||||
cp $BASE/bin/x86_64-linux-musl-objcopy $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
||||
cp $BASE/bin/x86_64-linux-musl-nm $BASE/$PKG/bin/x86_64-linux-musl-nm
|
||||
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/cc1 $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
||||
cp $BASE/bin/x86_64-linux-musl-readelf $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
||||
cp $BASE/bin/x86_64-linux-musl-objdump $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
||||
cp $BASE/bin/x86_64-linux-musl-gcc-ar $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
||||
cp $BASE/bin/x86_64-linux-musl-gcov $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
||||
cp $BASE/bin/x86_64-linux-musl-ranlib $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
||||
cp $BASE/bin/x86_64-linux-musl-as $BASE/$PKG/bin/x86_64-linux-musl-as
|
||||
cp $BASE/bin/x86_64-linux-musl-gcc-ranlib $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
||||
cp $BASE/bin/x86_64-linux-musl-cpp $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
||||
cp $BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strip
|
||||
cp $BASE/bin/x86_64-linux-musl-gprof $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
||||
cp $BASE/bin/x86_64-linux-musl-gcov-tool $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
||||
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-ar
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-g++
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-size
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strings
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-nm
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-as
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strip
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
||||
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
||||
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump >$BASE/$PKG/bin/x86_64-linux-musl-gcov-dump.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc >$BASE/$PKG/bin/x86_64-linux-musl-gcc.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-addr2line >$BASE/$PKG/bin/x86_64-linux-musl-addr2line.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-ar >$BASE/$PKG/bin/x86_64-linux-musl-ar.gz
|
||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-g++ >$BASE/$PKG/bin/x86_64-linux-musl-g++.gz
|
||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2 >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm >$BASE/$PKG/bin/x86_64-linux-musl-gcc-nm.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-c++filt >$BASE/$PKG/bin/x86_64-linux-musl-c++filt.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-elfedit >$BASE/$PKG/bin/x86_64-linux-musl-elfedit.gz
|
||||
$GUNZ $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd >$BASE/$PKG/x86_64-linux-musl/bin/ld.bfd.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-size >$BASE/$PKG/bin/x86_64-linux-musl-size.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-strings >$BASE/$PKG/bin/x86_64-linux-musl-strings.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-objcopy >$BASE/$PKG/bin/x86_64-linux-musl-objcopy.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-nm >$BASE/$PKG/bin/x86_64-linux-musl-nm.gz
|
||||
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1 >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-readelf >$BASE/$PKG/bin/x86_64-linux-musl-readelf.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-objdump >$BASE/$PKG/bin/x86_64-linux-musl-objdump.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar >$BASE/$PKG/bin/x86_64-linux-musl-gcc-ar.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov >$BASE/$PKG/bin/x86_64-linux-musl-gcov.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-ranlib >$BASE/$PKG/bin/x86_64-linux-musl-ranlib.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-as >$BASE/$PKG/bin/x86_64-linux-musl-as.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib >$BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-cpp >$BASE/$PKG/bin/x86_64-linux-musl-cpp.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-strip >$BASE/$PKG/bin/x86_64-linux-musl-strip.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gprof >$BASE/$PKG/bin/x86_64-linux-musl-gprof.gz
|
||||
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool >$BASE/$PKG/bin/x86_64-linux-musl-gcov-tool.gz
|
||||
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-addr2line
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-ar
|
||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-g++
|
||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-c++filt
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-elfedit
|
||||
rm $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-size
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-strings
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-objcopy
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-nm
|
||||
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-readelf
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-objdump
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-ranlib
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-as
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-cpp
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-strip
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gprof
|
||||
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
|
||||
|
||||
ln -s x86_64-linux-musl-gcc $BASE/$PKG/bin/x86_64-linux-musl-cc
|
||||
ln -s x86_64-linux-musl-gcc $BASE/$PKG/bin/x86_64-linux-musl-gcc-9.2.0
|
||||
ln -s ../../bin/x86_64-linux-musl-ar $BASE/$PKG/x86_64-linux-musl/bin/ar
|
||||
ln -s x86_64-linux-musl-g++ $BASE/$PKG/bin/x86_64-linux-musl-c++
|
||||
ln -s ld.bfd $BASE/$PKG/x86_64-linux-musl/bin/ld
|
||||
ln -s ../x86_64-linux-musl/bin/ld.bfd $BASE/$PKG/bin/x86_64-linux-musl-ld.bfd
|
||||
ln -s ../x86_64-linux-musl/bin/ld.bfd $BASE/$PKG/bin/x86_64-linux-musl-ld
|
||||
ln -s ../../bin/x86_64-linux-musl-objcopy $BASE/$PKG/x86_64-linux-musl/bin/objcopy
|
||||
ln -s ../../bin/x86_64-linux-musl-nm $BASE/$PKG/x86_64-linux-musl/bin/nm
|
||||
ln -s ../../bin/x86_64-linux-musl-readelf $BASE/$PKG/x86_64-linux-musl/bin/readelf
|
||||
ln -s ../../bin/x86_64-linux-musl-objdump $BASE/$PKG/x86_64-linux-musl/bin/objdump
|
||||
ln -s ../../bin/x86_64-linux-musl-ranlib $BASE/$PKG/x86_64-linux-musl/bin/ranlib
|
||||
ln -s ../../bin/x86_64-linux-musl-as $BASE/$PKG/x86_64-linux-musl/bin/as
|
||||
ln -s ../../bin/x86_64-linux-musl-strip $BASE/$PKG/x86_64-linux-musl/bin/strip
|
||||
|
||||
{
|
||||
cat <<'EOF'
|
||||
This is a modern statically-linked GNU C2X toolchain.
|
||||
|
||||
You have the freedom to obtain the original sources to these binaries,
|
||||
and build ones just like them, by visiting:
|
||||
|
||||
https://www.gnu.org/
|
||||
https://github.com/richfelker/musl-cross-make
|
||||
|
||||
The musl-cross-make tool also produces libraries and header files. We've
|
||||
only vendored the statically-linked executable files, since Cosmopolitan
|
||||
won't depend on GPL-licensed headers / runtime libraries.
|
||||
|
||||
We haven't made any modifications to the original software. The versions
|
||||
we chose are documented in $PKG/LICENSE.txt. Here's our Musl
|
||||
build config for maximum transparency:
|
||||
|
||||
EOF
|
||||
git show --quiet
|
||||
echo
|
||||
cat config.mak
|
||||
echo
|
||||
cat bundle.sh
|
||||
} >$BASE/$PKG/README.cosmo
|
||||
|
||||
{
|
||||
for f in $(find . -iname \*copying\* -or -iname \*license\* | sort); do
|
||||
printf '\n'
|
||||
printf '%s\n' "$f"
|
||||
printf '========================================================================\n'
|
||||
cat "$f"
|
||||
done
|
||||
} >$BASE/$PKG/LICENSE.txt
|
||||
My name is Justine Tunney and I approve of these binaries.
|
||||
|
|
BIN
third_party/gcc/bin/x86_64-linux-musl-addr2line.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-addr2line.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-ar.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-ar.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-as.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-as.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-c++filt.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-c++filt.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-cpp.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-cpp.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-elfedit.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-elfedit.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-g++.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-g++.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ar.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ar.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-nm.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-nm.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ranlib.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc-ranlib.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcc.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-dump.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-dump.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-tool.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov-tool.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gcov.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-gprof.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-gprof.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-nm.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-nm.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-objcopy.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-objcopy.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-objdump.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-objdump.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-ranlib.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-ranlib.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-readelf.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-readelf.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-size.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-size.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-strings.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-strings.gz
vendored
Binary file not shown.
BIN
third_party/gcc/bin/x86_64-linux-musl-strip.gz
vendored
BIN
third_party/gcc/bin/x86_64-linux-musl-strip.gz
vendored
Binary file not shown.
97
third_party/gcc/config.mak
vendored
Normal file
97
third_party/gcc/config.mak
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
#
|
||||
# config.mak.dist - sample musl-cross-make configuration
|
||||
#
|
||||
# Copy to config.mak and edit as desired.
|
||||
#
|
||||
|
||||
# There is no default TARGET; you must select one here or on the make
|
||||
# command line. Some examples:
|
||||
|
||||
# TARGET = i486-linux-musl
|
||||
TARGET = aarch64-linux-musl
|
||||
# TARGET = arm-linux-musleabi
|
||||
# TARGET = arm-linux-musleabihf
|
||||
# TARGET = sh2eb-linux-muslfdpic
|
||||
# TARGET = powerpc64le-linux-musl
|
||||
# TARGET = aarch64-linux-musl
|
||||
|
||||
# By default, cross compilers are installed to ./output under the top-level
|
||||
# musl-cross-make directory and can later be moved wherever you want them.
|
||||
# To install directly to a specific location, set it here. Multiple targets
|
||||
# can safely be installed in the same location. Some examples:
|
||||
|
||||
OUTPUT = /opt/cross11portcosmo
|
||||
# OUTPUT = /usr/local
|
||||
|
||||
# By default, latest supported release versions of musl and the toolchain
|
||||
# components are used. You can override those here, but the version selected
|
||||
# must be supported (under hashes/ and patches/) to work. For musl, you
|
||||
# can use "git-refname" (e.g. git-master) instead of a release. Setting a
|
||||
# blank version for gmp, mpc, mpfr and isl will suppress download and
|
||||
# in-tree build of these libraries and instead depend on pre-installed
|
||||
# libraries when available (isl is optional and not set by default).
|
||||
# Setting a blank version for linux will suppress installation of kernel
|
||||
# headers, which are not needed unless compiling programs that use them.
|
||||
|
||||
# BINUTILS_VER = 2.25.1
|
||||
GCC_VER = 11.2.0
|
||||
# MUSL_VER = git-master
|
||||
# GMP_VER =
|
||||
# MPC_VER =
|
||||
# MPFR_VER =
|
||||
# ISL_VER =
|
||||
# LINUX_VER =
|
||||
|
||||
# By default source archives are downloaded with wget. curl is also an option.
|
||||
|
||||
# DL_CMD = wget -c -O
|
||||
# DL_CMD = curl -C - -L -o
|
||||
|
||||
# Check sha-1 hashes of downloaded source archives. On gnu systems this is
|
||||
# usually done with sha1sum.
|
||||
|
||||
# SHA1_CMD = sha1sum -c
|
||||
# SHA1_CMD = sha1 -c
|
||||
# SHA1_CMD = shasum -a 1 -c
|
||||
|
||||
# Something like the following can be used to produce a static-linked
|
||||
# toolchain that's deployable to any system with matching arch, using
|
||||
# an existing musl-targeted cross compiler. This only works if the
|
||||
# system you build on can natively (or via binfmt_misc and qemu) run
|
||||
# binaries produced by the existing toolchain (in this example, i486).
|
||||
|
||||
# MUSL_CONFIG += --enable-debug
|
||||
# MUSL_CONFIG += CFLAGS="-Os -fno-omit-frame-pointer -fno-optimize-sibling-calls -mno-omit-leaf-frame-pointer"
|
||||
MUSL_CONFIG += CFLAGS="-Os"
|
||||
|
||||
COMMON_CONFIG += CC="/opt/cross/bin/x86_64-linux-musl-gcc -static --static"
|
||||
COMMON_CONFIG += CXX="/opt/cross/bin/x86_64-linux-musl-g++ -static --static"
|
||||
# COMMON_CONFIG += CC="gcc -static --static"
|
||||
# COMMON_CONFIG += CXX="g++ -static --static"
|
||||
|
||||
# Recommended options for smaller build for deploying binaries:
|
||||
|
||||
COMMON_CONFIG += CFLAGS="-Os -g0"
|
||||
COMMON_CONFIG += CXXFLAGS="-Os -g0"
|
||||
COMMON_CONFIG += LDFLAGS="-s"
|
||||
|
||||
# Options you can add for faster/simpler build at the expense of features:
|
||||
|
||||
COMMON_CONFIG += --disable-nls
|
||||
GCC_CONFIG += --disable-libquadmath --disable-decimal-float
|
||||
GCC_CONFIG += --disable-libitm
|
||||
GCC_CONFIG += --disable-fixed-point
|
||||
GCC_CONFIG += --disable-lto
|
||||
|
||||
# By default C and C++ are the only languages enabled, and these are
|
||||
# the only ones tested and known to be supported. You can uncomment the
|
||||
# following and add other languages if you want to try getting them to
|
||||
# work too.
|
||||
|
||||
GCC_CONFIG += --enable-languages=c,c++ #--enable-plugin
|
||||
|
||||
# You can keep the local build path out of your toolchain binaries and
|
||||
# target libraries with the following, but then gdb needs to be told
|
||||
# where to look for source files.
|
||||
|
||||
# COMMON_CONFIG += --with-debug-prefix-map=$(CURDIR)=
|
0
third_party/gcc/lib/gcc/x86_64-linux-musl/11.2.0/specs
vendored
Normal file
0
third_party/gcc/lib/gcc/x86_64-linux-musl/11.2.0/specs
vendored
Normal file
|
@ -1,141 +0,0 @@
|
|||
*asm:
|
||||
%{m16|m32:--32} %{m16|m32:;:--64} %{msse2avx:%{!mavx:-msse2avx}}
|
||||
|
||||
*asm_debug:
|
||||
%{%:debug-level-gt(0):%{gstabs*:--gstabs}%{!gstabs*:%{g*:--gdwarf2}}} %{fdebug-prefix-map=*:--debug-prefix-map %*}
|
||||
|
||||
*asm_final:
|
||||
%{gsplit-dwarf:
|
||||
objcopy --extract-dwo %{c:%{o*:%*}%{!o*:%b%O}}%{!c:%U%O} %{c:%{o*:%:replace-extension(%{o*:%*} .dwo)}%{!o*:%b.dwo}}%{!c:%b.dwo}
|
||||
objcopy --strip-dwo %{c:%{o*:%*}%{!o*:%b%O}}%{!c:%U%O} }
|
||||
|
||||
*asm_options:
|
||||
%{-target-help:%:print-asm-header()} %{v} %{w:-W} %{I*} %{gz|gz=zlib:--compress-debug-sections=zlib} %{gz=none:--compress-debug-sections=none} %{gz=zlib-gnu:--compress-debug-sections=zlib-gnu} %a %Y %{c:%W{o*}%{!o*:-o %w%b%O}}%{!c:-o %d%w%u%O}
|
||||
|
||||
*invoke_as:
|
||||
%{!fwpa*: %{fcompare-debug=*|fdump-final-insns=*:%:compare-debug-dump-opt()} %{!S:-o %|.s |
|
||||
as %(asm_options) %m.s %A } }
|
||||
|
||||
*cpp:
|
||||
%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}
|
||||
|
||||
*cpp_options:
|
||||
%(cpp_unique_options) %1 %{m*} %{std*&ansi&trigraphs} %{W*&pedantic*} %{w} %{f*} %{g*:%{%:debug-level-gt(0):%{g*} %{!fno-working-directory:-fworking-directory}}} %{O*} %{undef} %{save-temps*:-fpch-preprocess}
|
||||
|
||||
*cpp_debug_options:
|
||||
%{d*}
|
||||
|
||||
*cpp_unique_options:
|
||||
%{!Q:-quiet} %{nostdinc*} %{C} %{CC} %{v} %@{I*&F*} %{P} %I %{MD:-MD %{!o:%b.d}%{o*:%.d%*}} %{MMD:-MMD %{!o:%b.d}%{o*:%.d%*}} %{M} %{MM} %{MF*} %{MG} %{MP} %{MQ*} %{MT*} %{!E:%{!M:%{!MM:%{!MT:%{!MQ:%{MD|MMD:%{o*:-MQ %*}}}}}}} %{remap} %{g3|ggdb3|gstabs3|gxcoff3|gvms3:-dD} %{!iplugindir*:%{fplugin*:%:find-plugindir()}} %{H} %C %{D*&U*&A*} %{i*} %Z %i %{E|M|MM:%W{o*}}
|
||||
|
||||
*trad_capable_cpp:
|
||||
cc1 -E %{traditional|traditional-cpp:-traditional-cpp}
|
||||
|
||||
*cc1:
|
||||
%{!mandroid|tno-android-cc:%(cc1_cpu) %{profile:-p};:%(cc1_cpu) %{profile:-p} %{!fno-pic:%{!fno-PIC:%{!fpic:%{!fPIC: -fPIC}}}}}
|
||||
|
||||
*cc1_options:
|
||||
%{pg:%{fomit-frame-pointer:%e-pg and -fomit-frame-pointer are incompatible}} %{!iplugindir*:%{fplugin*:%:find-plugindir()}} %1 %{!Q:-quiet} %{!dumpbase:-dumpbase %B} %{d*} %{m*} %{aux-info*} %{fcompare-debug-second:%:compare-debug-auxbase-opt(%b)} %{!fcompare-debug-second:%{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}}%{!c:%{!S:-auxbase %b}} %{g*} %{O*} %{W*&pedantic*} %{w} %{std*&ansi&trigraphs} %{v:-version} %{pg:-p} %{p} %{f*} %{undef} %{Qn:-fno-ident} %{Qy:} %{-help:--help} %{-target-help:--target-help} %{-version:--version} %{-help=*:--help=%*} %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %b.s}}} %{fsyntax-only:-o %j} %{-param*} %{coverage:-fprofile-arcs -ftest-coverage} %{fprofile-arcs|fprofile-generate*|coverage: %{!fprofile-update=single: %{pthread:-fprofile-update=prefer-atomic}}}
|
||||
|
||||
*cc1plus:
|
||||
|
||||
|
||||
*link_gcc_c_sequence:
|
||||
%{static|static-pie:--start-group} %G %{!nolibc:%L} %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}
|
||||
|
||||
*link_ssp:
|
||||
%{fstack-protector|fstack-protector-all|fstack-protector-strong|fstack-protector-explicit:-lssp_nonshared}
|
||||
|
||||
*endfile:
|
||||
--push-state --pop-state
|
||||
|
||||
*link:
|
||||
%{!mandroid|tno-android-ld:%{m16|m32:;:-m elf_x86_64} %{m16|m32:-m elf_i386} %{shared:-shared} %{!shared: %{!static: %{!static-pie: %{rdynamic:-export-dynamic} }} %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}};:%{m16|m32:;:-m elf_x86_64} %{m16|m32:-m elf_i386} %{mx32:-m elf32_x86_64} %{shared:-shared} %{!shared: %{!static: %{!static-pie: %{rdynamic:-export-dynamic} %{m16|m32:-dynamic-linker } %{m16|m32:;:-dynamic-linker} }} %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}} %{shared: -Bsymbolic}}
|
||||
|
||||
*lib:
|
||||
--push-state --pop-state
|
||||
|
||||
*link_gomp:
|
||||
|
||||
|
||||
*libgcc:
|
||||
--push-state --pop-state
|
||||
|
||||
*startfile:
|
||||
--push-state --pop-state
|
||||
|
||||
*cross_compile:
|
||||
1
|
||||
|
||||
*version:
|
||||
9.2.0
|
||||
|
||||
*multilib:
|
||||
. ;
|
||||
|
||||
*multilib_defaults:
|
||||
m64
|
||||
|
||||
*multilib_extra:
|
||||
|
||||
|
||||
*multilib_matches:
|
||||
|
||||
|
||||
*multilib_exclusions:
|
||||
|
||||
|
||||
*multilib_options:
|
||||
|
||||
|
||||
*multilib_reuse:
|
||||
|
||||
|
||||
*linker:
|
||||
collect2
|
||||
|
||||
*linker_plugin_file:
|
||||
|
||||
|
||||
*lto_wrapper:
|
||||
|
||||
|
||||
*lto_gcc:
|
||||
|
||||
|
||||
*post_link:
|
||||
|
||||
|
||||
*link_libgcc:
|
||||
%D
|
||||
|
||||
*md_exec_prefix:
|
||||
|
||||
|
||||
*md_startfile_prefix:
|
||||
|
||||
|
||||
*md_startfile_prefix_1:
|
||||
|
||||
|
||||
*startfile_prefix_spec:
|
||||
|
||||
|
||||
*sysroot_spec:
|
||||
--sysroot=%R
|
||||
|
||||
*sysroot_suffix_spec:
|
||||
|
||||
|
||||
*sysroot_hdrs_suffix_spec:
|
||||
|
||||
|
||||
*self_spec:
|
||||
|
||||
|
||||
*cc1_cpu:
|
||||
%{march=native:%>march=native %:local_cpu_detect(arch) %{!mtune=*:%>mtune=native %:local_cpu_detect(tune)}} %{mtune=native:%>mtune=native %:local_cpu_detect(tune)}
|
||||
|
||||
*link_command:
|
||||
%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S: %(linker) %{fuse-linker-plugin: %e-fuse-linker-plugin is not supported in this configuration}%{flto|flto=*:%<fcompare-debug*} %{flto} %{fno-lto} %{flto=*} %l %{shared|r:;pie|static-pie:-pie %{static|static-pie:--no-dynamic-linker -z text -Bsymbolic}} %{fuse-ld=*:-fuse-ld=%*} %{gz|gz=zlib:--compress-debug-sections=zlib} %{gz=none:--compress-debug-sections=none} %{gz=zlib-gnu:--compress-debug-sections=zlib-gnu} %X %{o*} %{e*} %{N} %{n} %{r} %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!r:%{!nostartfiles:%S}}} %{static|no-pie|static-pie:} %@{L*} %(mfwrap) %(link_libgcc) %{fvtable-verify=none:} %{fvtable-verify=std: %e-fvtable-verify=std is not supported in this configuration} %{fvtable-verify=preinit: %e-fvtable-verify=preinit is not supported in this configuration} %{!nostdlib:%{!r:%{!nodefaultlibs:%{%:sanitize(address):%{!shared:libasan_preinit%O%s} %{static-libasan:%{!shared:-Bstatic --whole-archive -lasan --no-whole-archive -Bdynamic}}%{!static-libasan:-lasan}} %{%:sanitize(thread):%{!shared:libtsan_preinit%O%s} %{static-libtsan:%{!shared:-Bstatic --whole-archive -ltsan --no-whole-archive -Bdynamic}}%{!static-libtsan:-ltsan}} %{%:sanitize(leak):%{!shared:liblsan_preinit%O%s} %{static-liblsan:%{!shared:-Bstatic --whole-archive -llsan --no-whole-archive -Bdynamic}}%{!static-liblsan:-llsan}}}}} %o %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*:%*} 1): %:include(libgomp.spec)%(link_gomp)} %{fgnu-tm:%:include(libitm.spec)%(link_itm)} %(mflib) %{fsplit-stack: --wrap=pthread_create} %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} %{!nostdlib:%{!r:%{!nodefaultlibs:%{%:sanitize(address): %{static-libasan|static:%:include(libsanitizer.spec)%(link_libasan)} %{static:%ecannot specify -static with -fsanitize=address}} %{%:sanitize(thread): %{static-libtsan|static:%:include(libsanitizer.spec)%(link_libtsan)} %{static:%ecannot specify -static with -fsanitize=thread}} %{%:sanitize(undefined):%{static-libubsan:-Bstatic} -lubsan %{static-libubsan:-Bdynamic} %{static-libubsan|static:%:include(libsanitizer.spec)%(link_libubsan)}} %{%:sanitize(leak): %{static-liblsan|static:%:include(libsanitizer.spec)%(link_liblsan)}}}}} %{!nostdlib:%{!r:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}} %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*}
|
||||
%(post_link) }}}}}}
|
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/cc1.gz
vendored
Normal file
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/cc1.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/collect2.gz
vendored
Normal file
BIN
third_party/gcc/libexec/gcc/x86_64-linux-musl/11.2.0/collect2.gz
vendored
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
1869
third_party/gcc/portcosmo.patch
vendored
Normal file
1869
third_party/gcc/portcosmo.patch
vendored
Normal file
File diff suppressed because it is too large
Load diff
46
third_party/gcc/upgrade-cosmo-gcc.sh
vendored
Executable file
46
third_party/gcc/upgrade-cosmo-gcc.sh
vendored
Executable file
|
@ -0,0 +1,46 @@
|
|||
#!/bin/sh
|
||||
|
||||
ARCH=${1:-x86_64}
|
||||
IMPORT=${2:-/opt/cross11portcosmo}
|
||||
PREFIX=third_party/gcc/
|
||||
OLDVERSION=9.2.0
|
||||
NEWVERSION=11.2.0
|
||||
|
||||
rm -rf o/third_party/gcc
|
||||
mv $PREFIX/libexec/gcc/$ARCH-linux-musl/$OLDVERSION $PREFIX/libexec/gcc/$ARCH-linux-musl/$NEWVERSION
|
||||
mv $PREFIX/lib/gcc/$ARCH-linux-musl/$OLDVERSION $PREFIX/lib/gcc/$ARCH-linux-musl/$NEWVERSION
|
||||
sed -i -e "s/$OLDVERSION/$NEWVERSION/g" $(find $PREFIX -name \*.sym | grep $ARCH)
|
||||
|
||||
FILES="
|
||||
$ARCH-linux-musl/bin/ld.bfd
|
||||
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/collect2
|
||||
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/cc1
|
||||
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/cc1plus
|
||||
bin/$ARCH-linux-musl-elfedit
|
||||
bin/$ARCH-linux-musl-nm
|
||||
bin/$ARCH-linux-musl-objcopy
|
||||
bin/$ARCH-linux-musl-gcc
|
||||
bin/$ARCH-linux-musl-c++filt
|
||||
bin/$ARCH-linux-musl-gcc-ranlib
|
||||
bin/$ARCH-linux-musl-addr2line
|
||||
bin/$ARCH-linux-musl-objdump
|
||||
bin/$ARCH-linux-musl-gcov
|
||||
bin/$ARCH-linux-musl-ranlib
|
||||
bin/$ARCH-linux-musl-gcc-nm
|
||||
bin/$ARCH-linux-musl-strip
|
||||
bin/$ARCH-linux-musl-gcov-tool
|
||||
bin/$ARCH-linux-musl-gprof
|
||||
bin/$ARCH-linux-musl-strings
|
||||
bin/$ARCH-linux-musl-gcov-dump
|
||||
bin/$ARCH-linux-musl-cpp
|
||||
bin/$ARCH-linux-musl-ar
|
||||
bin/$ARCH-linux-musl-readelf
|
||||
bin/$ARCH-linux-musl-size
|
||||
bin/$ARCH-linux-musl-as
|
||||
bin/$ARCH-linux-musl-g++
|
||||
bin/$ARCH-linux-musl-gcc-ar
|
||||
"
|
||||
|
||||
for f in $FILES; do
|
||||
gzip -9 <$IMPORT/$f >$PREFIX/$f.gz || exit
|
||||
done
|
BIN
third_party/gcc/x86_64-linux-musl/bin/ld.bfd.gz
vendored
BIN
third_party/gcc/x86_64-linux-musl/bin/ld.bfd.gz
vendored
Binary file not shown.
74
third_party/intel/adxintrin.internal.h
vendored
74
third_party/intel/adxintrin.internal.h
vendored
|
@ -1,43 +1,53 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <adxintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <adxintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _ADXINTRIN_H_INCLUDED
|
||||
#define _ADXINTRIN_H_INCLUDED
|
||||
|
||||
__funline unsigned char _subborrow_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_sbb_u32(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_subborrow_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_sbb_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
__funline unsigned char _addcarry_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_addcarryx_u32(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarry_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
__funline unsigned char _addcarryx_u32(unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P) {
|
||||
return __builtin_ia32_addcarryx_u32(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarryx_u32 (unsigned char __CF, unsigned int __X,
|
||||
unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline unsigned char _subborrow_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_sbb_u64(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_subborrow_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_sbb_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
__funline unsigned char _addcarry_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_addcarryx_u64(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarry_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
|
||||
__funline unsigned char _addcarryx_u64(unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
return __builtin_ia32_addcarryx_u64(__CF, __X, __Y, __P);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_addcarryx_u64 (unsigned char __CF, unsigned long long __X,
|
||||
unsigned long long __Y, unsigned long long *__P)
|
||||
{
|
||||
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ADXINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
|
|
68
third_party/intel/ammintrin.internal.h
vendored
68
third_party/intel/ammintrin.internal.h
vendored
|
@ -1,58 +1,54 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _AMMINTRIN_H_INCLUDED
|
||||
#define _AMMINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
#include "third_party/intel/pmmintrin.internal.h"
|
||||
|
||||
#ifndef __SSE4A__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4a")
|
||||
#define __DISABLE_SSE4A__
|
||||
#endif /* __SSE4A__ */
|
||||
|
||||
__funline void _mm_stream_sd(double* __P, __m128d __Y) {
|
||||
__builtin_ia32_movntsd(__P, (__v2df)__Y);
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_sd (double * __P, __m128d __Y)
|
||||
{
|
||||
__builtin_ia32_movntsd (__P, (__v2df) __Y);
|
||||
}
|
||||
|
||||
__funline void _mm_stream_ss(float* __P, __m128 __Y) {
|
||||
__builtin_ia32_movntss(__P, (__v4sf)__Y);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_stream_ss (float * __P, __m128 __Y)
|
||||
{
|
||||
__builtin_ia32_movntss (__P, (__v4sf) __Y);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_extract_si64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_extrq((__v2di)__X, (__v16qi)__Y);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extract_si64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m128i _mm_extracti_si64(__m128i __X, unsigned const int __I,
|
||||
unsigned const int __L) {
|
||||
return (__m128i)__builtin_ia32_extrqi((__v2di)__X, __I, __L);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
|
||||
}
|
||||
#else
|
||||
#define _mm_extracti_si64(X, I, L) \
|
||||
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(X), (unsigned int)(I), \
|
||||
(unsigned int)(L)))
|
||||
#define _mm_extracti_si64(X, I, L) ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), (unsigned int)(I), (unsigned int)(L)))
|
||||
#endif
|
||||
|
||||
__funline __m128i _mm_insert_si64(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_insertq((__v2di)__X, (__v2di)__Y);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_insert_si64 (__m128i __X,__m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m128i _mm_inserti_si64(__m128i __X, __m128i __Y,
|
||||
unsigned const int __I,
|
||||
unsigned const int __L) {
|
||||
return (__m128i)__builtin_ia32_insertqi((__v2di)__X, (__v2di)__Y, __I, __L);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
|
||||
}
|
||||
#else
|
||||
#define _mm_inserti_si64(X, Y, I, L) \
|
||||
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(X), \
|
||||
(__v2di)(__m128i)(Y), (unsigned int)(I), \
|
||||
(unsigned int)(L)))
|
||||
#define _mm_inserti_si64(X, Y, I, L) ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), (__v2di)(__m128i)(Y), (unsigned int)(I), (unsigned int)(L)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_SSE4A__
|
||||
#undef __DISABLE_SSE4A__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4A__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _AMMINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
22
third_party/intel/amxbf16intrin.internal.h
vendored
Normal file
22
third_party/intel/amxbf16intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AMXBF16INTRIN_H_INCLUDED
|
||||
#define _AMXBF16INTRIN_H_INCLUDED
|
||||
#if !defined(__AMX_BF16__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-bf16")
|
||||
#define __DISABLE_AMX_BF16__
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__AMX_BF16__)
|
||||
#define _tile_dpbf16ps_internal(dst,src1,src2) __asm__ volatile ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
#define _tile_dpbf16ps(dst,src1,src2) _tile_dpbf16ps_internal (dst, src1, src2)
|
||||
#endif
|
||||
#ifdef __DISABLE_AMX_BF16__
|
||||
#undef __DISABLE_AMX_BF16__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
25
third_party/intel/amxint8intrin.internal.h
vendored
Normal file
25
third_party/intel/amxint8intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AMXINT8INTRIN_H_INCLUDED
|
||||
#define _AMXINT8INTRIN_H_INCLUDED
|
||||
#if !defined(__AMX_INT8__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-int8")
|
||||
#define __DISABLE_AMX_INT8__
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__AMX_INT8__)
|
||||
#define _tile_int8_dp_internal(name,dst,src1,src2) __asm__ volatile ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
|
||||
#define _tile_dpbssd(dst,src1,src2) _tile_int8_dp_internal (tdpbssd, dst, src1, src2)
|
||||
#define _tile_dpbsud(dst,src1,src2) _tile_int8_dp_internal (tdpbsud, dst, src1, src2)
|
||||
#define _tile_dpbusd(dst,src1,src2) _tile_int8_dp_internal (tdpbusd, dst, src1, src2)
|
||||
#define _tile_dpbuud(dst,src1,src2) _tile_int8_dp_internal (tdpbuud, dst, src1, src2)
|
||||
#endif
|
||||
#ifdef __DISABLE_AMX_INT8__
|
||||
#undef __DISABLE_AMX_INT8__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
46
third_party/intel/amxtileintrin.internal.h
vendored
Normal file
46
third_party/intel/amxtileintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AMXTILEINTRIN_H_INCLUDED
|
||||
#define _AMXTILEINTRIN_H_INCLUDED
|
||||
#if !defined(__AMX_TILE__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("amx-tile")
|
||||
#define __DISABLE_AMX_TILE__
|
||||
#endif
|
||||
#if defined(__x86_64__) && defined(__AMX_TILE__)
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_loadconfig (const void *__config)
|
||||
{
|
||||
__asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_storeconfig (void *__config)
|
||||
{
|
||||
__asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tile_release (void)
|
||||
{
|
||||
__asm__ volatile ("tilerelease" ::);
|
||||
}
|
||||
#define _tile_loadd(dst,base,stride) _tile_loadd_internal (dst, base, stride)
|
||||
#define _tile_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
|
||||
#define _tile_stream_loadd(dst,base,stride) _tile_stream_loadd_internal (dst, base, stride)
|
||||
#define _tile_stream_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
|
||||
#define _tile_stored(dst,base,stride) _tile_stored_internal (dst, base, stride)
|
||||
#define _tile_stored_internal(src,base,stride) __asm__ volatile ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" :: "r" ((void*) base), "r" ((long) stride) : "memory")
|
||||
#define _tile_zero(dst) _tile_zero_internal (dst)
|
||||
#define _tile_zero_internal(dst) __asm__ volatile ("tilezero\t%%tmm"#dst ::)
|
||||
#endif
|
||||
#ifdef __DISABLE_AMX_TILE__
|
||||
#undef __DISABLE_AMX_TILE__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
2090
third_party/intel/avx2intrin.internal.h
vendored
2090
third_party/intel/avx2intrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
246
third_party/intel/avx5124fmapsintrin.internal.h
vendored
246
third_party/intel/avx5124fmapsintrin.internal.h
vendored
|
@ -1,112 +1,180 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||
#define _AVX5124FMAPSINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX5124FMAPS__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx5124fmaps")
|
||||
#define __DISABLE_AVX5124FMAPS__
|
||||
#endif /* __AVX5124FMAPS__ */
|
||||
|
||||
__funline __m512 _mm512_4fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
||||
__m512 __E, __m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps((__v16sf)__B, (__v16sf)__C,
|
||||
(__v16sf)__D, (__v16sf)__E,
|
||||
(__v16sf)__A, (const __v4sf *)__F);
|
||||
#endif
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_4fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_4fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fmadd_ps (__mmask16 __U,
|
||||
__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_4fmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
|
||||
__m128 __E, __m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
|
||||
(__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_mask_4fmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_maskz_4fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) _mm_setzero_ps (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_4fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D,
|
||||
__m512 __E, __m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps((__v16sf)__B, (__v16sf)__C,
|
||||
(__v16sf)__D, (__v16sf)__E,
|
||||
(__v16sf)__A, (const __v4sf *)__F);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4fnmadd_ps (__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_4fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_4fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
|
||||
__m512 __C, __m512 __D, __m512 __E,
|
||||
__m128 *__F) {
|
||||
return (__m512)__builtin_ia32_4fnmaddps_mask(
|
||||
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
|
||||
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4fnmadd_ps (__mmask16 __U,
|
||||
__m512 __A, __m512 __B, __m512 __C,
|
||||
__m512 __D, __m512 __E, __m128 *__F)
|
||||
{
|
||||
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
|
||||
(__v16sf) __C,
|
||||
(__v16sf) __D,
|
||||
(__v16sf) __E,
|
||||
(__v16sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_4fnmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
|
||||
__m128 __E, __m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
|
||||
(__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_4fnmadd_ss (__m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_mask_4fnmadd_ss(__m128 __A, __mmask8 __U, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_4fnmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) __A,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_maskz_4fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B,
|
||||
__m128 __C, __m128 __D, __m128 __E,
|
||||
__m128 *__F) {
|
||||
return (__m128)__builtin_ia32_4fnmaddss_mask(
|
||||
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A,
|
||||
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U);
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_4fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
|
||||
__m128 __D, __m128 __E, __m128 *__F)
|
||||
{
|
||||
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
|
||||
(__v4sf) __C,
|
||||
(__v4sf) __D,
|
||||
(__v4sf) __E,
|
||||
(__v4sf) __A,
|
||||
(const __v4sf *) __F,
|
||||
(__v4sf) _mm_setzero_ps (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX5124FMAPS__
|
||||
#undef __DISABLE_AVX5124FMAPS__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX5124FMAPS__ */
|
||||
|
||||
#endif /* _AVX5124FMAPSINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
137
third_party/intel/avx5124vnniwintrin.internal.h
vendored
137
third_party/intel/avx5124vnniwintrin.internal.h
vendored
|
@ -1,69 +1,102 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||
#define _AVX5124VNNIWINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX5124VNNIW__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx5124vnniw")
|
||||
#define __DISABLE_AVX5124VNNIW__
|
||||
#endif /* __AVX5124VNNIW__ */
|
||||
|
||||
__funline __m512i _mm512_4dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd((__v16si)__B, (__v16si)__C,
|
||||
(__v16si)__D, (__v16si)__E,
|
||||
(__v16si)__A, (const __v4si *)__F);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_4dpwssd_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B, __m512i __C, __m512i __D,
|
||||
__m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssd_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_4dpwssd_epi32(__mmask16 __U, __m512i __A,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssd_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssd_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_4dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds((__v16si)__B, (__v16si)__C,
|
||||
(__v16si)__D, (__v16si)__E,
|
||||
(__v16si)__A, (const __v4si *)__F);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_4dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E, __m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_4dpwssds_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_4dpwssds_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) __A,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_4dpwssds_epi32(__mmask16 __U, __m512i __A,
|
||||
__m512i __B, __m512i __C,
|
||||
__m512i __D, __m512i __E,
|
||||
__m128i *__F) {
|
||||
return (__m512i)__builtin_ia32_vp4dpwssds_mask(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A,
|
||||
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_4dpwssds_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
|
||||
__m512i __C, __m512i __D, __m512i __E,
|
||||
__m128i *__F)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
|
||||
(__v16si) __C,
|
||||
(__v16si) __D,
|
||||
(__v16si) __E,
|
||||
(__v16si) __A,
|
||||
(const __v4si *) __F,
|
||||
(__v16si) _mm512_setzero_ps (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX5124VNNIW__
|
||||
#undef __DISABLE_AVX5124VNNIW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX5124VNNIW__ */
|
||||
|
||||
#endif /* _AVX5124VNNIWINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
74
third_party/intel/avx512bf16intrin.internal.h
vendored
Normal file
74
third_party/intel/avx512bf16intrin.internal.h
vendored
Normal file
|
@ -0,0 +1,74 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVX512BF16INTRIN_H_INCLUDED
|
||||
#define _AVX512BF16INTRIN_H_INCLUDED
|
||||
#ifndef __AVX512BF16__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bf16")
|
||||
#define __DISABLE_AVX512BF16__
|
||||
#endif
|
||||
typedef short __v32bh __attribute__ ((__vector_size__ (64)));
|
||||
typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
|
||||
}
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
|
||||
}
|
||||
extern __inline __m512bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
|
||||
{
|
||||
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_cvtneps_pbh (__m512 __A)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
|
||||
}
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
|
||||
}
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
extern __inline __m512
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
|
||||
{
|
||||
return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512BF16__
|
||||
#undef __DISABLE_AVX512BF16__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
130
third_party/intel/avx512bf16vlintrin.internal.h
vendored
Normal file
130
third_party/intel/avx512bf16vlintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,130 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
|
||||
#define _AVX512BF16VLINTRIN_H_INCLUDED
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bf16,avx512vl")
|
||||
#define __DISABLE_AVX512BF16VL__
|
||||
#endif
|
||||
typedef short __v16bh __attribute__ ((__vector_size__ (32)));
|
||||
typedef short __v8bh __attribute__ ((__vector_size__ (16)));
|
||||
typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
|
||||
typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
|
||||
}
|
||||
extern __inline __m256bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtneps_pbh (__m256 __A)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtneps_pbh (__m128 __A)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
|
||||
}
|
||||
extern __inline __m128bh
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
|
||||
{
|
||||
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
|
||||
{
|
||||
return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
|
||||
{
|
||||
return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512BF16VL__
|
||||
#undef __DISABLE_AVX512BF16VL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
293
third_party/intel/avx512bitalgintrin.internal.h
vendored
293
third_party/intel/avx512bitalgintrin.internal.h
vendored
|
@ -1,172 +1,231 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512BITALGINTRIN_H_INCLUDED
|
||||
#define _AVX512BITALGINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512BITALG__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg")
|
||||
#define __DISABLE_AVX512BITALG__
|
||||
#endif /* __AVX512BITALG__ */
|
||||
|
||||
__funline __m512i _mm512_popcnt_epi8(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi((__v64qi)__A);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi8 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_popcnt_epi16(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi((__v32hi)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi16 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALG__
|
||||
#undef __DISABLE_AVX512BITALG__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALG__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512bw")
|
||||
#define __DISABLE_AVX512BITALGBW__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
__funline __m512i _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask(
|
||||
(__v64qi)__A, (__v64qi)_mm512_setzero_si512(), (__mmask64)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask64) __U);
|
||||
}
|
||||
__funline __m512i _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__B, (__mmask32)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
|
||||
(__v32hi) __W,
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)_mm512_setzero_si512(), (__mmask32)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
|
||||
(__v32hi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
__funline __mmask64 _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) {
|
||||
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
|
||||
extern __inline __mmask64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
__funline __mmask64 _mm512_mask_bitshuffle_epi64_mask(__mmask64 __M, __m512i __A,
|
||||
__m512i __B) {
|
||||
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask(
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__M);
|
||||
extern __inline __mmask64
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
|
||||
{
|
||||
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALGBW__
|
||||
#undef __DISABLE_AVX512BITALGBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGBW__ */
|
||||
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || \
|
||||
!defined(__AVX512BW__)
|
||||
#endif
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512vl,avx512bw")
|
||||
#define __DISABLE_AVX512BITALGVLBW__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
__funline __m256i _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask(
|
||||
(__v32qi)__A, (__v32qi)_mm256_setzero_si256(), (__mmask32)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __U);
|
||||
}
|
||||
|
||||
__funline __mmask32 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) {
|
||||
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
|
||||
extern __inline __mmask32
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
__funline __mmask32 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __M, __m256i __A,
|
||||
__m256i __B) {
|
||||
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask(
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__M);
|
||||
extern __inline __mmask32
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512BITALGVLBW__
|
||||
#undef __DISABLE_AVX512BITALGVLBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGVLBW__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512bitalg,avx512vl")
|
||||
#define __DISABLE_AVX512BITALGVL__
|
||||
#endif /* __AVX512VLBW__ */
|
||||
|
||||
__funline __mmask16 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) {
|
||||
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
|
||||
#endif
|
||||
extern __inline __mmask16
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __mmask16 _mm_mask_bitshuffle_epi64_mask(__mmask16 __M, __m128i __A,
|
||||
__m128i __B) {
|
||||
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__M);
|
||||
extern __inline __mmask16
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_popcnt_epi8(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountb_v32qi((__v32qi)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi8 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_popcnt_epi16(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi((__v16hi)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi16 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_popcnt_epi8(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi((__v16qi)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi8 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_popcnt_epi16(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi((__v8hi)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi16 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
|
||||
(__v16hi)__A, (__v16hi)__B, (__mmask16)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
|
||||
(__v16hi) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask(
|
||||
(__v16hi)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
|
||||
(__v16hi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask(
|
||||
(__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
__funline __m128i _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask((__v8hi)__A, (__v8hi)__B,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
|
||||
(__v8hi) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask(
|
||||
(__v8hi)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
|
||||
(__v8hi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512BITALGVL__
|
||||
#undef __DISABLE_AVX512BITALGVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512BITALGBW__ */
|
||||
|
||||
#endif /* _AVX512BITALGINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
4770
third_party/intel/avx512bwintrin.internal.h
vendored
4770
third_party/intel/avx512bwintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
184
third_party/intel/avx512cdintrin.internal.h
vendored
184
third_party/intel/avx512cdintrin.internal.h
vendored
|
@ -1,100 +1,140 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512CDINTRIN_H_INCLUDED
|
||||
#define _AVX512CDINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512CD__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512cd")
|
||||
#define __DISABLE_AVX512CD__
|
||||
#endif /* __AVX512CD__ */
|
||||
|
||||
typedef long long __v8di __attribute__((__vector_size__(64)));
|
||||
typedef int __v16si __attribute__((__vector_size__(64)));
|
||||
|
||||
typedef long long __m512i __attribute__((__vector_size__(64), __may_alias__));
|
||||
typedef double __m512d __attribute__((__vector_size__(64), __may_alias__));
|
||||
|
||||
#endif
|
||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
__funline __m512i _mm512_conflict_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_conflict_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)__W, (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_conflict_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_conflict_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask((__v8di)__A, (__v8di)__W,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_lzcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_lzcnt_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask((__v8di)__A, (__v8di)__W,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
|
||||
(__v8di) _mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_lzcnt_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_lzcnt_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U,
|
||||
__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask((__v16si)__A, (__v16si)__W,
|
||||
(__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i)
|
||||
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
|
||||
(__v16si) _mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_broadcastmb_epi64(__mmask8 __A) {
|
||||
return (__m512i)__builtin_ia32_broadcastmb512(__A);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_broadcastmb_epi64 (__mmask8 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmb512 (__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_broadcastmw_epi32(__mmask16 __A) {
|
||||
return (__m512i)__builtin_ia32_broadcastmw512(__A);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_broadcastmw_epi32 (__mmask16 __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_broadcastmw512 (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512CD__
|
||||
#undef __DISABLE_AVX512CD__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512CD__ */
|
||||
|
||||
#endif /* _AVX512CDINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
3894
third_party/intel/avx512dqintrin.internal.h
vendored
3894
third_party/intel/avx512dqintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
536
third_party/intel/avx512erintrin.internal.h
vendored
536
third_party/intel/avx512erintrin.internal.h
vendored
|
@ -1,281 +1,357 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512ERINTRIN_H_INCLUDED
|
||||
#define _AVX512ERINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512ER__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512er")
|
||||
#define __DISABLE_AVX512ER__
|
||||
#endif /* __AVX512ER__ */
|
||||
|
||||
typedef double __v8df __attribute__((__vector_size__(64)));
|
||||
typedef float __v16sf __attribute__((__vector_size__(64)));
|
||||
|
||||
typedef float __m512 __attribute__((__vector_size__(64), __may_alias__));
|
||||
typedef double __m512d __attribute__((__vector_size__(64), __may_alias__));
|
||||
|
||||
#endif
|
||||
typedef double __v8df __attribute__ ((__vector_size__ (64)));
|
||||
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
|
||||
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m512d _mm512_exp2a23_round_pd(__m512d __A, int __R) {
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_mask_exp2a23_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_maskz_exp2a23_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_exp2pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_exp2a23_round_ps(__m512 __A, int __R) {
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_exp2a23_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_exp2a23_round_ps(__m512 __W, __mmask16 __U,
|
||||
__m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_exp2a23_round_ps(__mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_exp2ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_rcp28_round_pd(__m512d __A, int __R) {
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_mask_rcp28_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_maskz_rcp28_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_rcp28pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_rcp28_round_ps(__m512 __A, int __R) {
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rcp28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_rcp28_round_ps(__m512 __W, __mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_rcp28_round_ps(__mmask16 __U, __m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_rcp28ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_rcp28_round_sd(__m128d __A, __m128d __B, int __R) {
|
||||
return (__m128d)__builtin_ia32_rcp28sd_round((__v2df)__B, (__v2df)__A, __R);
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_rcp28_round_ss(__m128 __A, __m128 __B, int __R) {
|
||||
return (__m128)__builtin_ia32_rcp28ss_round((__v4sf)__B, (__v4sf)__A, __R);
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||
__m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_rsqrt28_round_pd(__m512d __A, int __R) {
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||
__m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
|
||||
{
|
||||
__m512d __W;
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)-1, __R);
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_mask_rsqrt28_round_pd(__m512d __W, __mmask8 __U,
|
||||
__m512d __A, int __R) {
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W,
|
||||
(__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) __W,
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512d _mm512_maskz_rsqrt28_round_pd(__mmask8 __U, __m512d __A,
|
||||
int __R) {
|
||||
return (__m512d)__builtin_ia32_rsqrt28pd_mask(
|
||||
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R);
|
||||
extern __inline __m512d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
|
||||
{
|
||||
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
|
||||
(__v8df) _mm512_setzero_pd (),
|
||||
(__mmask8) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_rsqrt28_round_ps(__m512 __A, int __R) {
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_rsqrt28_round_ps (__m512 __A, int __R)
|
||||
{
|
||||
__m512 __W;
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)-1, __R);
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) -1, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_mask_rsqrt28_round_ps(__m512 __W, __mmask16 __U,
|
||||
__m512 __A, int __R) {
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W,
|
||||
(__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) __W,
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m512 _mm512_maskz_rsqrt28_round_ps(__mmask16 __U, __m512 __A,
|
||||
int __R) {
|
||||
return (__m512)__builtin_ia32_rsqrt28ps_mask(
|
||||
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R);
|
||||
extern __inline __m512
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
|
||||
{
|
||||
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
|
||||
(__v16sf) _mm512_setzero_ps (),
|
||||
(__mmask16) __U, __R);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_rsqrt28_round_sd(__m128d __A, __m128d __B, int __R) {
|
||||
return (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)__B, (__v2df)__A, __R);
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
__R);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) {
|
||||
return (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)__B, (__v4sf)__A, __R);
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
|
||||
__m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
|
||||
(__v2df) __A,
|
||||
(__v2df)
|
||||
_mm_setzero_pd (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
|
||||
__m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf) __W,
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
extern __inline __m128
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
|
||||
{
|
||||
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
|
||||
(__v4sf) __A,
|
||||
(__v4sf)
|
||||
_mm_setzero_ps (),
|
||||
__U,
|
||||
__R);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_exp2a23_round_pd(A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_pd(U, A, C) \
|
||||
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_exp2a23_round_ps(A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_ps(U, A, C) \
|
||||
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_rcp28_round_pd(A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rcp28_round_pd(U, A, C) \
|
||||
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_rcp28_round_ps(A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rcp28_round_ps(U, A, C) \
|
||||
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_rsqrt28_round_pd(A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
|
||||
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
|
||||
#define _mm512_rsqrt28_round_ps(A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, W, U, C)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
|
||||
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
|
||||
#define _mm512_exp2a23_round_pd(A, C) __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) __builtin_ia32_exp2pd_mask(A, W, U, C)
|
||||
#define _mm512_maskz_exp2a23_round_pd(U, A, C) __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
#define _mm512_exp2a23_round_ps(A, C) __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) __builtin_ia32_exp2ps_mask(A, W, U, C)
|
||||
#define _mm512_maskz_exp2a23_round_ps(U, A, C) __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
#define _mm512_rcp28_round_pd(A, C) __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
#define _mm512_mask_rcp28_round_pd(W, U, A, C) __builtin_ia32_rcp28pd_mask(A, W, U, C)
|
||||
#define _mm512_maskz_rcp28_round_pd(U, A, C) __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
#define _mm512_rcp28_round_ps(A, C) __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
#define _mm512_mask_rcp28_round_ps(W, U, A, C) __builtin_ia32_rcp28ps_mask(A, W, U, C)
|
||||
#define _mm512_maskz_rcp28_round_ps(U, A, C) __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
#define _mm512_rsqrt28_round_pd(A, C) __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
|
||||
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
|
||||
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
|
||||
#define _mm512_rsqrt28_round_ps(A, C) __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
|
||||
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
|
||||
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
|
||||
#define _mm_rcp28_round_sd(A, B, R) __builtin_ia32_rcp28sd_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rcp28_round_sd(W, U, A, B, R) __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
|
||||
#define _mm_maskz_rcp28_round_sd(U, A, B, R) __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (R))
|
||||
#define _mm_rcp28_round_ss(A, B, R) __builtin_ia32_rcp28ss_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rcp28_round_ss(W, U, A, B, R) __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
|
||||
#define _mm_maskz_rcp28_round_ss(U, A, B, R) __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (R))
|
||||
#define _mm_rsqrt28_round_sd(A, B, R) __builtin_ia32_rsqrt28sd_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
|
||||
#define _mm_maskz_rsqrt28_round_sd(U, A, B, R) __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (R))
|
||||
#define _mm_rsqrt28_round_ss(A, B, R) __builtin_ia32_rsqrt28ss_round(A, B, R)
|
||||
|
||||
#define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
|
||||
#define _mm_maskz_rsqrt28_round_ss(U, A, B, R) __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (R))
|
||||
#endif
|
||||
|
||||
#define _mm512_exp2a23_pd(A) \
|
||||
_mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_pd(W, U, A) \
|
||||
_mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_pd(U, A) \
|
||||
_mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_exp2a23_ps(A) \
|
||||
_mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_ps(W, U, A) \
|
||||
_mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_ps(U, A) \
|
||||
_mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rcp28_sd(W, U, A, B) _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_maskz_rcp28_sd(U, A, B) _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_mask_rcp28_ss(W, U, A, B) _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_maskz_rcp28_ss(U, A, B) _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_mask_rsqrt28_sd(W, U, A, B) _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_maskz_rsqrt28_sd(U, A, B) _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_mask_rsqrt28_ss(W, U, A, B) _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_maskz_rsqrt28_ss(U, A, B) _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_exp2a23_pd(A) _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_mask_exp2a23_pd(W, U, A) _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_exp2a23_pd(U, A) _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_exp2a23_ps(A) _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_mask_exp2a23_ps(W, U, A) _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_exp2a23_ps(U, A) _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_rcp28_pd(A) _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_pd(W, U, A) \
|
||||
_mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_pd(U, A) \
|
||||
_mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_pd(W, U, A) _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_rcp28_pd(U, A) _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_rcp28_ps(A) _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_ps(W, U, A) \
|
||||
_mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_ps(U, A) \
|
||||
_mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_pd(A) \
|
||||
_mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_pd(W, U, A) \
|
||||
_mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_pd(U, A) \
|
||||
_mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_ps(A) \
|
||||
_mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_ps(W, U, A) \
|
||||
_mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_ps(U, A) \
|
||||
_mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_sd(A, B) \
|
||||
__builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_ss(A, B) \
|
||||
__builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_sd(A, B) \
|
||||
__builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_ss(A, B) \
|
||||
__builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_ps(W, U, A) _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_rcp28_ps(U, A) _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_rsqrt28_pd(A) _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_mask_rsqrt28_pd(W, U, A) _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_rsqrt28_pd(U, A) _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_rsqrt28_ps(A) _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_mask_rsqrt28_ps(W, U, A) _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm512_maskz_rsqrt28_ps(U, A) _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_rcp28_sd(A, B) __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_rcp28_ss(A, B) __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_rsqrt28_sd(A, B) __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#define _mm_rsqrt28_ss(A, B) __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
|
||||
#ifdef __DISABLE_AVX512ER__
|
||||
#undef __DISABLE_AVX512ER__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512ER__ */
|
||||
|
||||
#endif /* _AVX512ERINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
21702
third_party/intel/avx512fintrin.internal.h
vendored
21702
third_party/intel/avx512fintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
91
third_party/intel/avx512ifmaintrin.internal.h
vendored
91
third_party/intel/avx512ifmaintrin.internal.h
vendored
|
@ -1,53 +1,74 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512IFMAINTRIN_H_INCLUDED
|
||||
#define _AVX512IFMAINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512IFMA__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512ifma")
|
||||
#define __DISABLE_AVX512IFMA__
|
||||
#endif /* __AVX512IFMA__ */
|
||||
|
||||
__funline __m512i _mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_mask((__v8di)__X, (__v8di)__Y,
|
||||
(__v8di)__Z, (__mmask8)-1);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_mask((__v8di)__X, (__v8di)__Y,
|
||||
(__v8di)__Z, (__mmask8)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_mask(
|
||||
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
|
||||
(__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_mask(
|
||||
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
|
||||
__m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
|
||||
(__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X,
|
||||
__m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52luq512_maskz(
|
||||
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X,
|
||||
__m512i __Y, __m512i __Z) {
|
||||
return (__m512i)__builtin_ia32_vpmadd52huq512_maskz(
|
||||
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
|
||||
(__v8di) __Y,
|
||||
(__v8di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512IFMA__
|
||||
#undef __DISABLE_AVX512IFMA__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512IFMA__ */
|
||||
|
||||
#endif /* _AVX512IFMAINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
170
third_party/intel/avx512ifmavlintrin.internal.h
vendored
170
third_party/intel/avx512ifmavlintrin.internal.h
vendored
|
@ -1,88 +1,128 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
|
||||
#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
|
||||
#define _AVX512IFMAVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512ifma,avx512vl")
|
||||
#define __DISABLE_AVX512IFMAVL__
|
||||
#endif /* __AVX512IFMAVL__ */
|
||||
|
||||
__funline __m128i _mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_mask((__v2di)__X, (__v2di)__Y,
|
||||
(__v2di)__Z, (__mmask8)-1);
|
||||
#endif
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_mask((__v2di)__X, (__v2di)__Y,
|
||||
(__v2di)__Z, (__mmask8)-1);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_mask((__v4di)__X, (__v4di)__Y,
|
||||
(__v4di)__Z, (__mmask8)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_mask((__v4di)__X, (__v4di)__Y,
|
||||
(__v4di)__Z, (__mmask8)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_mask(
|
||||
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
|
||||
(__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_mask(
|
||||
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
|
||||
(__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_mask(
|
||||
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
|
||||
(__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_mask(
|
||||
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
|
||||
__m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
|
||||
(__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
|
||||
__m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52luq128_maskz(
|
||||
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y,
|
||||
__m128i __Z) {
|
||||
return (__m128i)__builtin_ia32_vpmadd52huq128_maskz(
|
||||
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
|
||||
(__v2di) __Y,
|
||||
(__v2di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X,
|
||||
__m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52luq256_maskz(
|
||||
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X,
|
||||
__m256i __Y, __m256i __Z) {
|
||||
return (__m256i)__builtin_ia32_vpmadd52huq256_maskz(
|
||||
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
|
||||
(__v4di) __Y,
|
||||
(__v4di) __Z,
|
||||
(__mmask8) __M);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512IFMAVL__
|
||||
#undef __DISABLE_AVX512IFMAVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512IFMAVL__ */
|
||||
|
||||
#endif /* _AVX512IFMAVLINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
302
third_party/intel/avx512pfintrin.internal.h
vendored
302
third_party/intel/avx512pfintrin.internal.h
vendored
|
@ -1,190 +1,170 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512PFINTRIN_H_INCLUDED
|
||||
#define _AVX512PFINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512PF__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512pf")
|
||||
#define __DISABLE_AVX512PF__
|
||||
#endif /* __AVX512PF__ */
|
||||
|
||||
typedef long long __v8di __attribute__((__vector_size__(64)));
|
||||
typedef int __v16si __attribute__((__vector_size__(64)));
|
||||
typedef long long __m512i __attribute__((__vector_size__(64), __may_alias__));
|
||||
#endif
|
||||
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
|
||||
typedef int __v16si __attribute__ ((__vector_size__ (64)));
|
||||
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
|
||||
typedef unsigned char __mmask8;
|
||||
typedef unsigned short __mmask16;
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline void _mm512_prefetch_i32gather_pd(__m256i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i32gather_ps(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfdps((__mmask16)0xFFFF, (__v16si)__index, __addr,
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i32gather_pd(__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i32gather_ps(__m512i __index,
|
||||
__mmask16 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfdps(__mask, (__v16si)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i64gather_pd(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i64gather_ps(__m512i __index, void const *__addr,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i64gather_pd(__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i64gather_ps(__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_gatherpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i32scatter_pd(void *__addr, __m256i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i32scatter_ps(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfdps((__mmask16)0xFFFF, (__v16si)__index, __addr,
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i32scatter_pd(void *__addr, __mmask8 __mask,
|
||||
__m256i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i32scatter_ps(void *__addr, __mmask16 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfdps(__mask, (__v16si)__index, __addr, __scale,
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i64scatter_pd(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_prefetch_i64scatter_ps(void *__addr, __m512i __index,
|
||||
int __scale, int __hint) {
|
||||
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
|
||||
int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i64scatter_pd(void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
|
||||
void const *__addr, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_prefetch_i64scatter_ps(void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale,
|
||||
int __hint) {
|
||||
__builtin_ia32_scatterpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
|
||||
__m256i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
|
||||
int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
|
||||
__scale, __hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask8 __mask,
|
||||
__m512i __index, int __scale, int __hint)
|
||||
{
|
||||
__builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
|
||||
__hint);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
|
||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
|
||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
|
||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
|
||||
(void const *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
|
||||
(void *)ADDR, (int)SCALE, (int)HINT)
|
||||
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_AVX512PF__
|
||||
#undef __DISABLE_AVX512PF__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512PF__ */
|
||||
|
||||
#endif /* _AVX512PFINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
680
third_party/intel/avx512vbmi2intrin.internal.h
vendored
680
third_party/intel/avx512vbmi2intrin.internal.h
vendored
|
@ -1,381 +1,407 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED
|
||||
#define __AVX512VBMI2INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VBMI2__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi2")
|
||||
#define __DISABLE_AVX512VBMI2__
|
||||
#endif /* __AVX512VBMI2__ */
|
||||
|
||||
#endif
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m512i _mm512_shrdi_epi16(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi((__v32hi)__A, (__v32hi)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi16 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shrdi_epi32(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si((__v16si)__A, (__v16si)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi32 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shrdi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
|
||||
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__C,
|
||||
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdi_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask(
|
||||
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
|
||||
(__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__B,
|
||||
(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shrdi_epi64(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di((__v8di)__A, (__v8di)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdi_epi64 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shrdi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask((__v8di)__C, (__v8di)__D, __E,
|
||||
(__v8di)__A, (__mmask8)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__C, (__v8di) __D,
|
||||
__E, (__v8di) __A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask(
|
||||
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
|
||||
(__mmask8)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__B, (__v8di) __C,
|
||||
__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldi_epi16(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi((__v32hi)__A, (__v32hi)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi16 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldi_epi32(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si((__v16si)__A, (__v16si)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi32 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v16si ((__v16si)__A, (__v16si) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldi_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
|
||||
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__C,
|
||||
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldi_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask(
|
||||
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(),
|
||||
(__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__B,
|
||||
(__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldi_epi64(__m512i __A, __m512i __B, int __C) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di((__v8di)__A, (__v8di)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldi_epi64 (__m512i __A, __m512i __B, int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshld_v8di ((__v8di)__A, (__v8di) __B, __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldi_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask((__v8di)__C, (__v8di)__D, __E,
|
||||
(__v8di)__A, (__mmask8)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__C, (__v8di) __D,
|
||||
__E, (__v8di) __A, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldi_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask(
|
||||
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(),
|
||||
(__mmask8)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__B, (__v8di) __C,
|
||||
__D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
|
||||
}
|
||||
#else
|
||||
#define _mm512_shrdi_epi16(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)(__m512i)(B),(int)(C))
|
||||
#define _mm512_shrdi_epi32(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B),(int)(C))
|
||||
#define _mm512_mask_shrdi_epi32(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), \
|
||||
(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B))
|
||||
#define _mm512_maskz_shrdi_epi32(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), \
|
||||
(__v16si)(__m512i)(C),(int)(D), \
|
||||
(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A))
|
||||
#define _mm512_shrdi_epi64(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B),(int)(C))
|
||||
#define _mm512_mask_shrdi_epi64(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), \
|
||||
(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
|
||||
#define _mm512_maskz_shrdi_epi64(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), \
|
||||
(__v8di)(__m512i)(C),(int)(D), \
|
||||
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
|
||||
#define _mm512_shldi_epi16(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), \
|
||||
(__v32hi)(__m512i)(B),(int)(C))
|
||||
#define _mm512_shldi_epi32(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), \
|
||||
(__v16si)(__m512i)(B),(int)(C))
|
||||
#define _mm512_mask_shldi_epi32(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), \
|
||||
(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B))
|
||||
#define _mm512_maskz_shldi_epi32(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), \
|
||||
(__v16si)(__m512i)(C),(int)(D), \
|
||||
(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A))
|
||||
#define _mm512_shldi_epi64(A, B, C) \
|
||||
((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), \
|
||||
(__v8di)(__m512i)(B),(int)(C))
|
||||
#define _mm512_mask_shldi_epi64(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), \
|
||||
(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
|
||||
#define _mm512_maskz_shldi_epi64(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), \
|
||||
(__v8di)(__m512i)(C),(int)(D), \
|
||||
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
|
||||
#define _mm512_shrdi_epi16(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), (__v32hi)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_shrdi_epi32(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shrdi_epi32(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), (__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A), (__mmask16)(B)))
|
||||
#define _mm512_maskz_shrdi_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), (__v16si)(__m512i)(C),(int)(D), (__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)))
|
||||
#define _mm512_shrdi_epi64(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shrdi_epi64(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), (__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A), (__mmask8)(B)))
|
||||
#define _mm512_maskz_shrdi_epi64(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), (__v8di)(__m512i)(C),(int)(D), (__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A)))
|
||||
#define _mm512_shldi_epi16(A, B, C) ((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), (__v32hi)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_shldi_epi32(A, B, C) ((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B),(int)(C)))
|
||||
#define _mm512_mask_shldi_epi32(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), (__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A), (__mmask16)(B)))
|
||||
#define _mm512_maskz_shldi_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), (__v16si)(__m512i)(C),(int)(D), (__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)))
|
||||
#define _mm512_shldi_epi64(A, B, C) ((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_shldi_epi64(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), (__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A), (__mmask8)(B)))
|
||||
#define _mm512_maskz_shldi_epi64(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), (__v8di)(__m512i)(C),(int)(D), (__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A)))
|
||||
#endif
|
||||
|
||||
__funline __m512i _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi16 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
(__v32hi) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdv_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shrdv_epi64 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshrdv_v8di ((__v8di)__A, (__v8di) __B,
|
||||
(__v8di) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask((__v8di)__A, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz((__v8di)__B, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__A);
|
||||
}
|
||||
__funline __m512i _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi((__v32hi)__A, (__v32hi)__B,
|
||||
(__v32hi)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi16 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v32hi ((__v32hi)__A, (__v32hi) __B,
|
||||
(__v32hi) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldv_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di((__v8di)__A, (__v8di)__B,
|
||||
(__v8di)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_shldv_epi64 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpshldv_v8di ((__v8di)__A, (__v8di) __B,
|
||||
(__v8di) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_mask((__v8di)__A, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_mask ((__v8di)__A, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz((__v8di)__B, (__v8di)__C,
|
||||
(__v8di)__D, (__mmask8)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz ((__v8di)__B, (__v8di) __C,
|
||||
(__v8di) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI2__
|
||||
#undef __DISABLE_AVX512VBMI2__
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI2__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi2,avx512bw")
|
||||
#define __DISABLE_AVX512VBMI2BW__
|
||||
#endif /* __AVX512VBMI2BW__ */
|
||||
|
||||
__funline __m512i _mm512_mask_compress_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_compressqi512_mask((__v64qi)__C, (__v64qi)__A,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_compress_epi8(__mmask64 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_compressqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_compressstoreu_epi8(void *__A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
__builtin_ia32_compressstoreuqi512_mask((__v64qi *)__A, (__v64qi)__C,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_compress_epi16(__m512i __A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_compresshi512_mask((__v32hi)__C, (__v32hi)__A,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_compress_epi16(__mmask32 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_compresshi512_mask(
|
||||
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
__funline void _mm512_mask_compressstoreu_epi16(void *__A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
__builtin_ia32_compressstoreuhi512_mask((__v32hi *)__A, (__v32hi)__C,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_expand_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_expandqi512_mask((__v64qi)__C, (__v64qi)__A,
|
||||
(__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_expand_epi8(__mmask64 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_expandqi512_maskz(
|
||||
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_expandloadu_epi8(__m512i __A, __mmask64 __B,
|
||||
const void *__C) {
|
||||
return (__m512i)__builtin_ia32_expandloadqi512_mask(
|
||||
(const __v64qi *)__C, (__v64qi)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_expandloadu_epi8(__mmask64 __A, const void *__B) {
|
||||
return (__m512i)__builtin_ia32_expandloadqi512_maskz(
|
||||
(const __v64qi *)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_expand_epi16(__m512i __A, __mmask32 __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_expandhi512_mask((__v32hi)__C, (__v32hi)__A,
|
||||
(__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_expand_epi16(__mmask32 __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_expandhi512_maskz(
|
||||
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_expandloadu_epi16(__m512i __A, __mmask32 __B,
|
||||
const void *__C) {
|
||||
return (__m512i)__builtin_ia32_expandloadhi512_mask(
|
||||
(const __v32hi *)__C, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_expandloadu_epi16(__mmask32 __A, const void *__B) {
|
||||
return (__m512i)__builtin_ia32_expandloadhi512_maskz(
|
||||
(const __v32hi *)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m512i _mm512_mask_shrdi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
|
||||
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdi_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask(
|
||||
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
|
||||
(__mmask32)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldi_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D, int __E) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
|
||||
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldi_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, int __D) {
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask(
|
||||
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(),
|
||||
(__mmask32)__A);
|
||||
}
|
||||
|
||||
#else
|
||||
#define _mm512_mask_shrdi_epi16(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), \
|
||||
(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B))
|
||||
#define _mm512_maskz_shrdi_epi16(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), \
|
||||
(__v32hi)(__m512i)(C),(int)(D), \
|
||||
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
|
||||
#define _mm512_mask_shldi_epi16(A, B, C, D, E) \
|
||||
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), \
|
||||
(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B))
|
||||
#define _mm512_maskz_shldi_epi16(A, B, C, D) \
|
||||
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), \
|
||||
(__v32hi)(__m512i)(C),(int)(D), \
|
||||
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
|
||||
#endif
|
||||
|
||||
__funline __m512i _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compress_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__C,
|
||||
(__v64qi)__A, (__mmask64)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shrdv_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz(
|
||||
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_compress_epi8 (__mmask64 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__B,
|
||||
(__v64qi)_mm512_setzero_si512 (), (__mmask64)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __B, __m512i __C,
|
||||
__m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask(
|
||||
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compressstoreu_epi8 (void * __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
__builtin_ia32_compressstoreuqi512_mask ((__v64qi *) __A, (__v64qi) __C,
|
||||
(__mmask64) __B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_shldv_epi16(__mmask32 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz(
|
||||
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compress_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__C,
|
||||
(__v32hi)__A, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_compress_epi16 (__mmask32 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__B,
|
||||
(__v32hi)_mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_compressstoreu_epi16 (void * __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
__builtin_ia32_compressstoreuhi512_mask ((__v32hi *) __A, (__v32hi) __C,
|
||||
(__mmask32) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expand_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __C,
|
||||
(__v64qi) __A,
|
||||
(__mmask64) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expand_epi8 (__mmask64 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandqi512_maskz ((__v64qi) __B,
|
||||
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expandloadu_epi8 (__m512i __A, __mmask64 __B, const void * __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *) __C,
|
||||
(__v64qi) __A, (__mmask64) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expandloadu_epi8 (__mmask64 __A, const void * __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadqi512_maskz ((const __v64qi *) __B,
|
||||
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expand_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __C,
|
||||
(__v32hi) __A,
|
||||
(__mmask32) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expand_epi16 (__mmask32 __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandhi512_maskz ((__v32hi) __B,
|
||||
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_expandloadu_epi16 (__m512i __A, __mmask32 __B, const void * __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *) __C,
|
||||
(__v32hi) __A, (__mmask32) __B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_expandloadu_epi16 (__mmask32 __A, const void * __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_expandloadhi512_maskz ((const __v32hi *) __B,
|
||||
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
|
||||
}
|
||||
#ifdef __OPTIMIZE__
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__C,
|
||||
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__B,
|
||||
(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
|
||||
int __E)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__C,
|
||||
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__B,
|
||||
(__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
|
||||
}
|
||||
#else
|
||||
#define _mm512_mask_shrdi_epi16(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), (__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A), (__mmask32)(B)))
|
||||
#define _mm512_maskz_shrdi_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), (__v32hi)(__m512i)(C),(int)(D), (__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A)))
|
||||
#define _mm512_mask_shldi_epi16(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), (__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A), (__mmask32)(B)))
|
||||
#define _mm512_maskz_shldi_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), (__v32hi)(__m512i)(C),(int)(D), (__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A)))
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shrdv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask ((__v32hi)__A,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shrdv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz ((__v32hi)__B,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_shldv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask ((__v32hi)__A,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
|
||||
}
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_shldv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz ((__v32hi)__B,
|
||||
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI2BW__
|
||||
#undef __DISABLE_AVX512VBMI2BW__
|
||||
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI2BW__ */
|
||||
|
||||
#endif /* __AVX512VBMI2INTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
1246
third_party/intel/avx512vbmi2vlintrin.internal.h
vendored
1246
third_party/intel/avx512vbmi2vlintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
170
third_party/intel/avx512vbmiintrin.internal.h
vendored
170
third_party/intel/avx512vbmiintrin.internal.h
vendored
|
@ -1,90 +1,124 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VBMIINTRIN_H_INCLUDED
|
||||
#define _AVX512VBMIINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512VBMI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi")
|
||||
#define __DISABLE_AVX512VBMI__
|
||||
#endif /* __AVX512VBMI__ */
|
||||
|
||||
__funline __m512i _mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M,
|
||||
__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)__W, (__mmask64)__M);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X,
|
||||
__m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_setzero_si512(),
|
||||
(__mmask64)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
|
||||
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
|
||||
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_undefined_epi32(),
|
||||
(__mmask64)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
|
||||
(__v64qi) __Y,
|
||||
(__v64qi)
|
||||
_mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_undefined_epi32(),
|
||||
(__mmask64)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_undefined_epi32 (),
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_setzero_si512(),
|
||||
(__mmask64)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi)
|
||||
_mm512_setzero_si512(),
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M,
|
||||
__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_permvarqi512_mask(
|
||||
(__v64qi)__B, (__v64qi)__A, (__v64qi)__W, (__mmask64)__M);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
|
||||
__m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __W,
|
||||
(__mmask64) __M);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_permutex2var_epi8(__m512i __A, __m512i __I,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||
,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64) -1);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U,
|
||||
__m512i __I, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_mask(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
|
||||
__m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
|
||||
,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I,
|
||||
__mmask64 __U, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermi2varqi512_mask((__v64qi)__A,
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__B,
|
||||
(__mmask64)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
|
||||
__mmask64 __U, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
|
||||
(__v64qi) __I
|
||||
,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A,
|
||||
__m512i __I, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpermt2varqi512_maskz(
|
||||
(__v64qi)__I
|
||||
/* idx */,
|
||||
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
|
||||
__m512i __I, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
|
||||
,
|
||||
(__v64qi) __A,
|
||||
(__v64qi) __B,
|
||||
(__mmask64)
|
||||
__U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMI__
|
||||
#undef __DISABLE_AVX512VBMI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMI__ */
|
||||
|
||||
#endif /* _AVX512VBMIINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
326
third_party/intel/avx512vbmivlintrin.internal.h
vendored
326
third_party/intel/avx512vbmivlintrin.internal.h
vendored
|
@ -1,159 +1,229 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VBMIVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vbmi,avx512vl")
|
||||
#define __DISABLE_AVX512VBMIVL__
|
||||
#endif /* __AVX512VBMIVL__ */
|
||||
|
||||
__funline __m256i _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M,
|
||||
__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)__W, (__mmask32)__M);
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X,
|
||||
__m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_setzero_si256(),
|
||||
(__mmask32)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
|
||||
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
|
||||
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_undefined_si256(),
|
||||
(__mmask32)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
|
||||
(__v32qi) __Y,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M,
|
||||
__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)__W, (__mmask16)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X,
|
||||
__m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
|
||||
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
|
||||
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_undefined_si128(),
|
||||
(__mmask16)-1);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
|
||||
(__v16qi) __Y,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_undefined_si256(),
|
||||
(__mmask32)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_undefined_si256 (),
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_setzero_si256(),
|
||||
(__mmask32)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M,
|
||||
__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_permvarqi256_mask(
|
||||
(__v32qi)__B, (__v32qi)__A, (__v32qi)__W, (__mmask32)__M);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
|
||||
__m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __W,
|
||||
(__mmask32) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_permutexvar_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_undefined_si128(),
|
||||
(__mmask16)-1);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_undefined_si128 (),
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A,
|
||||
__m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M,
|
||||
__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_permvarqi128_mask(
|
||||
(__v16qi)__B, (__v16qi)__A, (__v16qi)__W, (__mmask16)__M);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __W,
|
||||
(__mmask16) __M);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_permutex2var_epi8(__m256i __A, __m256i __I,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||
,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32) -1);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U,
|
||||
__m256i __I, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_mask(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
|
||||
__m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
|
||||
,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I,
|
||||
__mmask32 __U, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermi2varqi256_mask((__v32qi)__A,
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__B,
|
||||
(__mmask32)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
|
||||
__mmask32 __U, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
|
||||
(__v32qi) __I
|
||||
,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A,
|
||||
__m256i __I, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpermt2varqi256_maskz(
|
||||
(__v32qi)__I
|
||||
/* idx */,
|
||||
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
|
||||
__m256i __I, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
|
||||
,
|
||||
(__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
(__mmask32)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||
,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16) -1);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U,
|
||||
__m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_mask(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
|
||||
,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I,
|
||||
__mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermi2varqi128_mask((__v16qi)__A,
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__B,
|
||||
(__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
|
||||
(__v16qi) __I
|
||||
,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A,
|
||||
__m128i __I, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpermt2varqi128_maskz(
|
||||
(__v16qi)__I
|
||||
/* idx */,
|
||||
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
|
||||
__m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
|
||||
,
|
||||
(__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
(__mmask16)
|
||||
__U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VBMIVL__
|
||||
#undef __DISABLE_AVX512VBMIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VBMIVL__ */
|
||||
|
||||
#endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
6705
third_party/intel/avx512vlbwintrin.internal.h
vendored
6705
third_party/intel/avx512vlbwintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
2791
third_party/intel/avx512vldqintrin.internal.h
vendored
2791
third_party/intel/avx512vldqintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
19034
third_party/intel/avx512vlintrin.internal.h
vendored
19034
third_party/intel/avx512vlintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
147
third_party/intel/avx512vnniintrin.internal.h
vendored
147
third_party/intel/avx512vnniintrin.internal.h
vendored
|
@ -1,87 +1,108 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512VNNIINTRIN_H_INCLUDED
|
||||
#define __AVX512VNNIINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vnni")
|
||||
#define __DISABLE_AVX512VNNI__
|
||||
#endif /* __AVX512VNNI__ */
|
||||
|
||||
__funline __m512i _mm512_dpbusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbusd_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_dpbusd_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbusd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_dpbusd_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbusd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_dpbusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpbusds_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpbusds_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_dpbusds_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpbusds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_dpbusds_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpbusds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssd_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_dpwssd_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpwssd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_dpwssd_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpwssd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si((__v16si)__A, (__v16si)__B,
|
||||
(__v16si)__C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpdpwssds_v16si ((__v16si)__A, (__v16si) __B,
|
||||
(__v16si) __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_dpwssds_epi32(__m512i __A, __mmask16 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_dpwssds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask ((__v16si)__A,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_dpwssds_epi32(__mmask16 __A, __m512i __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz(
|
||||
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_dpwssds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz ((__v16si)__B,
|
||||
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VNNI__
|
||||
#undef __DISABLE_AVX512VNNI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VNNI__ */
|
||||
|
||||
#endif /* __AVX512VNNIINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
240
third_party/intel/avx512vnnivlintrin.internal.h
vendored
240
third_party/intel/avx512vnnivlintrin.internal.h
vendored
|
@ -1,154 +1,140 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#error "Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VNNIVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vnni,avx512vl")
|
||||
#define __DISABLE_AVX512VNNIVL__
|
||||
#endif /* __AVX512VNNIVL__ */
|
||||
|
||||
__funline __m256i _mm256_dpbusd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
#endif
|
||||
#define _mm256_dpbusd_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbusd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
||||
(__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_dpbusd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask((__v8si)__A, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__B);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_dpbusd_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
#define _mm_dpbusd_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbusd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
||||
(__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_dpbusd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_dpbusd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask((__v4si)__A, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__B);
|
||||
#define _mm256_dpbusds_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpbusds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask ((__v8si)__A,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_dpbusd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_dpbusds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
#define _mm_dpbusds_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpbusds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask ((__v4si)__A,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_dpbusds_epi32(__m256i __A, __mmask8 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask(
|
||||
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_dpbusds_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
#define _mm256_dpwssd_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpwssd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C,
|
||||
(__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_dpbusds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_dpbusds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask(
|
||||
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
|
||||
#define _mm_dpwssd_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpwssd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C,
|
||||
(__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_dpbusds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_dpwssd_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
#define _mm256_dpwssds_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_dpwssds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask ((__v8si)__A,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_dpwssd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
|
||||
__m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask((__v8si)__A, (__v8si)__C,
|
||||
(__v8si)__D, (__mmask8)__B);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz ((__v8si)__B,
|
||||
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_dpwssd_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
#define _mm_dpwssds_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_dpwssds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask ((__v4si)__A,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_dpwssd_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_dpwssd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask((__v4si)__A, (__v4si)__C,
|
||||
(__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_dpwssd_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_dpwssds_epi32(__m256i __A, __m256i __B, __m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si((__v8si)__A, (__v8si)__B,
|
||||
(__v8si)__C);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_dpwssds_epi32(__m256i __A, __mmask8 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask(
|
||||
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_dpwssds_epi32(__mmask8 __A, __m256i __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz(
|
||||
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_dpwssds_epi32(__m128i __A, __m128i __B, __m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si((__v4si)__A, (__v4si)__B,
|
||||
(__v4si)__C);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_dpwssds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask(
|
||||
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_dpwssds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz(
|
||||
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_dpwssds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz ((__v4si)__B,
|
||||
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512VNNIVL__
|
||||
#undef __DISABLE_AVX512VNNIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VNNIVL__ */
|
||||
#endif /* __DISABLE_AVX512VNNIVL__ */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
32
third_party/intel/avx512vp2intersectintrin.internal.h
vendored
Normal file
32
third_party/intel/avx512vp2intersectintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
|
||||
#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
|
||||
#if !defined(__AVX512VP2INTERSECT__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vp2intersect")
|
||||
#define __DISABLE_AVX512VP2INTERSECT__
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_2intersect_epi32 (__m512i __A, __m512i __B, __mmask16 *__U,
|
||||
__mmask16 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd512 (__U, __M, (__v16si) __A, (__v16si) __B);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_2intersect_epi64 (__m512i __A, __m512i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq512 (__U, __M, (__v8di) __A, (__v8di) __B);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512VP2INTERSECT__
|
||||
#undef __DISABLE_AVX512VP2INTERSECT__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
44
third_party/intel/avx512vp2intersectvlintrin.internal.h
vendored
Normal file
44
third_party/intel/avx512vp2intersectvlintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
|
||||
#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vp2intersect,avx512vl")
|
||||
#define __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_2intersect_epi32 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd128 (__U, __M, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_2intersect_epi32 (__m256i __A, __m256i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectd256 (__U, __M, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_2intersect_epi64 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq128 (__U, __M, (__v2di) __A, (__v2di) __B);
|
||||
}
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_2intersect_epi64 (__m256i __A, __m256i __B, __mmask8 *__U,
|
||||
__mmask8 *__M)
|
||||
{
|
||||
__builtin_ia32_2intersectq256 (__U, __M, (__v4di) __A, (__v4di) __B);
|
||||
}
|
||||
#ifdef __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#undef __DISABLE_AVX512VP2INTERSECTVL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
|
@ -1,50 +1,64 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
||||
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __AVX512VPOPCNTDQ__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vpopcntdq")
|
||||
#define __DISABLE_AVX512VPOPCNTDQ__
|
||||
#endif /* __AVX512VPOPCNTDQ__ */
|
||||
|
||||
__funline __m512i _mm512_popcnt_epi32(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si((__v16si)__A);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi32 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si ((__v16si) __A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_popcnt_epi32(__m512i __A, __mmask16 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)__B, (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
|
||||
(__v16si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask(
|
||||
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi32 (__mmask16 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
|
||||
(__v16si)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_popcnt_epi64(__m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di((__v8di)__A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_popcnt_epi64 (__m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di ((__v8di) __A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_popcnt_epi64(__m512i __A, __mmask8 __U,
|
||||
__m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask((__v8di)__A, (__v8di)__B,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_popcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
|
||||
(__v8di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) {
|
||||
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask(
|
||||
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_popcnt_epi64 (__mmask8 __U, __m512i __A)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
|
||||
(__v8di)
|
||||
_mm512_setzero_si512 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VPOPCNTDQ__
|
||||
#undef __DISABLE_AVX512VPOPCNTDQ__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VPOPCNTDQ__ */
|
||||
|
||||
#endif /* _AVX512VPOPCNTDQINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
142
third_party/intel/avx512vpopcntdqvlintrin.internal.h
vendored
142
third_party/intel/avx512vpopcntdqvlintrin.internal.h
vendored
|
@ -1,78 +1,110 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
||||
# error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
||||
#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx512vpopcntdq,avx512vl")
|
||||
#define __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#endif /* __AVX512VPOPCNTDQVL__ */
|
||||
|
||||
__funline __m128i _mm_popcnt_epi32(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si((__v4si)__A);
|
||||
#endif
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi32 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_popcnt_epi32(__m128i __A, __mmask16 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask((__v4si)__A, (__v4si)__B,
|
||||
(__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi32 (__m128i __W, __mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
|
||||
(__v4si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_popcnt_epi32(__mmask16 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask(
|
||||
(__v4si)__A, (__v4si)_mm_setzero_si128(), (__mmask16)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
|
||||
(__v4si)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_popcnt_epi32(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si((__v8si)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi32 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_popcnt_epi32(__m256i __A, __mmask16 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask((__v8si)__A, (__v8si)__B,
|
||||
(__mmask16)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi32 (__m256i __W, __mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
|
||||
(__v8si) __W,
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_popcnt_epi32(__mmask16 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask(
|
||||
(__v8si)__A, (__v8si)_mm256_setzero_si256(), (__mmask16)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
|
||||
(__v8si)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask16) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_popcnt_epi64(__m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di((__v2di)__A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_popcnt_epi64 (__m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_popcnt_epi64(__m128i __A, __mmask8 __U, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask((__v2di)__A, (__v2di)__B,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_popcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
|
||||
(__v2di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
|
||||
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask(
|
||||
(__v2di)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
|
||||
(__v2di)
|
||||
_mm_setzero_si128 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_popcnt_epi64(__m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di((__v4di)__A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_popcnt_epi64 (__m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_popcnt_epi64(__m256i __A, __mmask8 __U,
|
||||
__m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask((__v4di)__A, (__v4di)__B,
|
||||
(__mmask8)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_popcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
|
||||
(__v4di) __W,
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
|
||||
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask(
|
||||
(__v4di)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
|
||||
(__v4di)
|
||||
_mm256_setzero_si256 (),
|
||||
(__mmask8) __U);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#undef __DISABLE_AVX512VPOPCNTDQVL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */
|
||||
|
||||
#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
1630
third_party/intel/avxintrin.internal.h
vendored
1630
third_party/intel/avxintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
82
third_party/intel/avxvnniintrin.internal.h
vendored
Normal file
82
third_party/intel/avxvnniintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avxvnniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _AVXVNNIINTRIN_H_INCLUDED
|
||||
#define _AVXVNNIINTRIN_H_INCLUDED
|
||||
#if !defined(__AVXVNNI__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avxvnni")
|
||||
#define __DISABLE_AVXVNNIVL__
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbusd_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbusd_avx_epi32(__m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbusds_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbusds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpwssd_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpwssd_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpwssds_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) __A,
|
||||
(__v8si) __B,
|
||||
(__v8si) __C);
|
||||
}
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpwssds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) __A,
|
||||
(__v4si) __B,
|
||||
(__v4si) __C);
|
||||
}
|
||||
#ifdef __DISABLE_AVXVNNIVL__
|
||||
#undef __DISABLE_AVXVNNIVL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
103
third_party/intel/bmi2intrin.internal.h
vendored
103
third_party/intel/bmi2intrin.internal.h
vendored
|
@ -1,67 +1,74 @@
|
|||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <bmi2intrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _BMI2INTRIN_H_INCLUDED
|
||||
#define _BMI2INTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __BMI2__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("bmi2")
|
||||
#define __DISABLE_BMI2__
|
||||
#endif /* __BMI2__ */
|
||||
|
||||
__funline unsigned int _bzhi_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_bzhi_si(__X, __Y);
|
||||
#endif
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_si (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned int _pdep_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_pdep_si(__X, __Y);
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_pdep_si (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned int _pext_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_pext_si(__X, __Y);
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_pext_si (__X, __Y);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
__funline unsigned long long _bzhi_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_bzhi_di(__X, __Y);
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_bzhi_di (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned long long _pdep_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_pdep_di(__X, __Y);
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pdep_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_pdep_di (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned long long _pext_u64(unsigned long long __X,
|
||||
unsigned long long __Y) {
|
||||
return __builtin_ia32_pext_di(__X, __Y);
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_pext_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_pext_di (__X, __Y);
|
||||
}
|
||||
|
||||
__funline unsigned long long _mulx_u64(unsigned long long __X,
|
||||
unsigned long long __Y,
|
||||
unsigned long long *__P) {
|
||||
unsigned __int128 __res = (unsigned __int128)__X * __Y;
|
||||
*__P = (unsigned long long)(__res >> 64);
|
||||
return (unsigned long long)__res;
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
|
||||
unsigned long long *__P)
|
||||
{
|
||||
unsigned __int128 __res = (unsigned __int128) __X * __Y;
|
||||
*__P = (unsigned long long) (__res >> 64);
|
||||
return (unsigned long long) __res;
|
||||
}
|
||||
|
||||
#else /* !__x86_64__ */
|
||||
|
||||
__funline unsigned int _mulx_u32(unsigned int __X, unsigned int __Y,
|
||||
unsigned int *__P) {
|
||||
unsigned long long __res = (unsigned long long)__X * __Y;
|
||||
*__P = (unsigned int)(__res >> 32);
|
||||
return (unsigned int)__res;
|
||||
#else
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
|
||||
{
|
||||
unsigned long long __res = (unsigned long long) __X * __Y;
|
||||
*__P = (unsigned int) (__res >> 32);
|
||||
return (unsigned int) __res;
|
||||
}
|
||||
|
||||
#endif /* !__x86_64__ */
|
||||
|
||||
#endif
|
||||
#ifdef __DISABLE_BMI2__
|
||||
#undef __DISABLE_BMI2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_BMI2__ */
|
||||
|
||||
#endif /* _BMI2INTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
211
third_party/intel/bmiintrin.internal.h
vendored
211
third_party/intel/bmiintrin.internal.h
vendored
|
@ -1,160 +1,135 @@
|
|||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <bmiintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _BMIINTRIN_H_INCLUDED
|
||||
#define _BMIINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __BMI__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("bmi")
|
||||
#define __DISABLE_BMI__
|
||||
#endif /* __BMI__ */
|
||||
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u16(unsigned short __X) {
|
||||
return __builtin_ia32_tzcnt_u16(__X);
|
||||
#endif
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u16 (unsigned short __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u16 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u32(unsigned int __X, unsigned int __Y) {
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u32(unsigned int __X, unsigned int __Y) {
|
||||
return __builtin_ia32_bextr_u32(__X, __Y);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u32 (unsigned int __X, unsigned int __Y)
|
||||
{
|
||||
return __builtin_ia32_bextr_u32 (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u32(unsigned int __X, unsigned int __Y, unsigned __Z) {
|
||||
return __builtin_ia32_bextr_u32(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
|
||||
{
|
||||
return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u32(unsigned int __X) {
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u32 (unsigned int __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u32(unsigned int __X) {
|
||||
return __blsi_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsi_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u32(unsigned int __X) {
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u32 (unsigned int __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u32(unsigned int __X) {
|
||||
return __blsmsk_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsmsk_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u32(unsigned int __X) {
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u32 (unsigned int __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u32(unsigned int __X) {
|
||||
return __blsr_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u32 (unsigned int __X)
|
||||
{
|
||||
return __blsr_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ia32_tzcnt_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ia32_tzcnt_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__andn_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return ~__X & __Y;
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u64(unsigned long long __X, unsigned long long __Y) {
|
||||
return __builtin_ia32_bextr_u64(__X, __Y);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bextr_u64 (unsigned long long __X, unsigned long long __Y)
|
||||
{
|
||||
return __builtin_ia32_bextr_u64 (__X, __Y);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) {
|
||||
return __builtin_ia32_bextr_u64(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
|
||||
{
|
||||
return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u64(unsigned long long __X) {
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsi_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X & -__X;
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u64(unsigned long long __X) {
|
||||
return __blsi_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsi_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsi_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u64(unsigned long long __X) {
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsmsk_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X ^ (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u64(unsigned long long __X) {
|
||||
return __blsmsk_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsmsk_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsmsk_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u64(unsigned long long __X) {
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__blsr_u64 (unsigned long long __X)
|
||||
{
|
||||
return __X & (__X - 1);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u64(unsigned long long __X) {
|
||||
return __blsr_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_blsr_u64 (unsigned long long __X)
|
||||
{
|
||||
return __blsr_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ia32_tzcnt_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__tzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ia32_tzcnt_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_tzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_tzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#endif
|
||||
#ifdef __DISABLE_BMI__
|
||||
#undef __DISABLE_BMI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_BMI__ */
|
||||
|
||||
#endif /* _BMIINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
108
third_party/intel/cetintrin.internal.h
vendored
108
third_party/intel/cetintrin.internal.h
vendored
|
@ -1,73 +1,95 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <cetintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <cetintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CETINTRIN_H_INCLUDED
|
||||
#define _CETINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __SHSTK__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("shstk")
|
||||
#pragma GCC target ("shstk")
|
||||
#define __DISABLE_SHSTK__
|
||||
#endif /* __SHSTK__ */
|
||||
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
__funline unsigned long long _get_ssp(void) {
|
||||
return __builtin_ia32_rdsspq();
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_get_ssp (void)
|
||||
{
|
||||
return __builtin_ia32_rdsspq ();
|
||||
}
|
||||
#else
|
||||
__funline unsigned int _get_ssp(void) {
|
||||
return __builtin_ia32_rdsspd();
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_get_ssp (void)
|
||||
{
|
||||
return __builtin_ia32_rdsspd ();
|
||||
}
|
||||
#endif
|
||||
|
||||
__funline void _inc_ssp(unsigned int __B) {
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_inc_ssp (unsigned int __B)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
__builtin_ia32_incsspq((unsigned long long)__B);
|
||||
__builtin_ia32_incsspq ((unsigned long long) __B);
|
||||
#else
|
||||
__builtin_ia32_incsspd(__B);
|
||||
__builtin_ia32_incsspd (__B);
|
||||
#endif
|
||||
}
|
||||
|
||||
__funline void _saveprevssp(void) {
|
||||
__builtin_ia32_saveprevssp();
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_saveprevssp (void)
|
||||
{
|
||||
__builtin_ia32_saveprevssp ();
|
||||
}
|
||||
|
||||
__funline void _rstorssp(void *__B) {
|
||||
__builtin_ia32_rstorssp(__B);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_rstorssp (void *__B)
|
||||
{
|
||||
__builtin_ia32_rstorssp (__B);
|
||||
}
|
||||
|
||||
__funline void _wrssd(unsigned int __B, void *__C) {
|
||||
__builtin_ia32_wrssd(__B, __C);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrssd (unsigned int __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrssd (__B, __C);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void _wrssq(unsigned long long __B, void *__C) {
|
||||
__builtin_ia32_wrssq(__B, __C);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrssq (unsigned long long __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrssq (__B, __C);
|
||||
}
|
||||
#endif
|
||||
|
||||
__funline void _wrussd(unsigned int __B, void *__C) {
|
||||
__builtin_ia32_wrussd(__B, __C);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrussd (unsigned int __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrussd (__B, __C);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void _wrussq(unsigned long long __B, void *__C) {
|
||||
__builtin_ia32_wrussq(__B, __C);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_wrussq (unsigned long long __B, void *__C)
|
||||
{
|
||||
__builtin_ia32_wrussq (__B, __C);
|
||||
}
|
||||
#endif
|
||||
|
||||
__funline void _setssbsy(void) {
|
||||
__builtin_ia32_setssbsy();
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_setssbsy (void)
|
||||
{
|
||||
__builtin_ia32_setssbsy ();
|
||||
}
|
||||
|
||||
__funline void _clrssbsy(void *__B) {
|
||||
__builtin_ia32_clrssbsy(__B);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_clrssbsy (void *__B)
|
||||
{
|
||||
__builtin_ia32_clrssbsy (__B);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SHSTK__
|
||||
#undef __DISABLE_SHSTK__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SHSTK__ */
|
||||
|
||||
#endif /* _CETINTRIN_H_INCLUDED. */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
23
third_party/intel/cldemoteintrin.internal.h
vendored
23
third_party/intel/cldemoteintrin.internal.h
vendored
|
@ -1,21 +1,24 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <cldemoteintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <cldemoteintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLDEMOTE_H_INCLUDED
|
||||
#define _CLDEMOTE_H_INCLUDED
|
||||
|
||||
#ifndef __CLDEMOTE__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("cldemote")
|
||||
#define __DISABLE_CLDEMOTE__
|
||||
#endif /* __CLDEMOTE__ */
|
||||
__funline void _cldemote(void *__A) {
|
||||
__builtin_ia32_cldemote(__A);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cldemote (void *__A)
|
||||
{
|
||||
__builtin_ia32_cldemote (__A);
|
||||
}
|
||||
#ifdef __DISABLE_CLDEMOTE__
|
||||
#undef __DISABLE_CLDEMOTE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLDEMOTE__ */
|
||||
|
||||
#endif /* _CLDEMOTE_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
25
third_party/intel/clflushoptintrin.internal.h
vendored
25
third_party/intel/clflushoptintrin.internal.h
vendored
|
@ -1,23 +1,24 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <clflushoptintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <clflushoptintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
|
||||
#define _CLFLUSHOPTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __CLFLUSHOPT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clflushopt")
|
||||
#define __DISABLE_CLFLUSHOPT__
|
||||
#endif /* __CLFLUSHOPT__ */
|
||||
|
||||
__funline void _mm_clflushopt(void *__A) {
|
||||
__builtin_ia32_clflushopt(__A);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clflushopt (void *__A)
|
||||
{
|
||||
__builtin_ia32_clflushopt (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLFLUSHOPT__
|
||||
#undef __DISABLE_CLFLUSHOPT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLFLUSHOPT__ */
|
||||
|
||||
#endif /* _CLFLUSHOPTINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
25
third_party/intel/clwbintrin.internal.h
vendored
25
third_party/intel/clwbintrin.internal.h
vendored
|
@ -1,23 +1,24 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <clwbintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _CLWBINTRIN_H_INCLUDED
|
||||
#define _CLWBINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __CLWB__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clwb")
|
||||
#define __DISABLE_CLWB__
|
||||
#endif /* __CLWB__ */
|
||||
|
||||
__funline void _mm_clwb(void *__A) {
|
||||
__builtin_ia32_clwb(__A);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clwb (void *__A)
|
||||
{
|
||||
__builtin_ia32_clwb (__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLWB__
|
||||
#undef __DISABLE_CLWB__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLWB__ */
|
||||
|
||||
#endif /* _CLWBINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
21
third_party/intel/clzerointrin.internal.h
vendored
21
third_party/intel/clzerointrin.internal.h
vendored
|
@ -1,21 +1,20 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _CLZEROINTRIN_H_INCLUDED
|
||||
#define _CLZEROINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
|
||||
#ifndef __CLZERO__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("clzero")
|
||||
#define __DISABLE_CLZERO__
|
||||
#endif /* __CLZERO__ */
|
||||
|
||||
__funline void _mm_clzero(void* __I) {
|
||||
__builtin_ia32_clzero(__I);
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_clzero (void * __I)
|
||||
{
|
||||
__builtin_ia32_clzero (__I);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_CLZERO__
|
||||
#undef __DISABLE_CLZERO__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_CLZERO__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _CLZEROINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
143
third_party/intel/cpuid.internal.h
vendored
143
third_party/intel/cpuid.internal.h
vendored
|
@ -1,8 +1,10 @@
|
|||
#ifndef COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_
|
||||
#define COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_
|
||||
#ifdef __x86_64__
|
||||
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _CPUID_H_INCLUDED
|
||||
#define _CPUID_H_INCLUDED
|
||||
#define bit_AVXVNNI (1 << 4)
|
||||
#define bit_AVX512BF16 (1 << 5)
|
||||
#define bit_HRESET (1 << 22)
|
||||
#define bit_SSE3 (1 << 0)
|
||||
#define bit_PCLMUL (1 << 1)
|
||||
#define bit_LZCNT (1 << 5)
|
||||
|
@ -19,14 +21,12 @@
|
|||
#define bit_AVX (1 << 28)
|
||||
#define bit_F16C (1 << 29)
|
||||
#define bit_RDRND (1 << 30)
|
||||
|
||||
#define bit_CMPXCHG8B (1 << 8)
|
||||
#define bit_CMOV (1 << 15)
|
||||
#define bit_MMX (1 << 23)
|
||||
#define bit_FXSAVE (1 << 24)
|
||||
#define bit_SSE (1 << 25)
|
||||
#define bit_SSE2 (1 << 26)
|
||||
|
||||
#define bit_LAHF_LM (1 << 0)
|
||||
#define bit_ABM (1 << 5)
|
||||
#define bit_SSE4a (1 << 6)
|
||||
|
@ -36,15 +36,12 @@
|
|||
#define bit_FMA4 (1 << 16)
|
||||
#define bit_TBM (1 << 21)
|
||||
#define bit_MWAITX (1 << 29)
|
||||
|
||||
#define bit_MMXEXT (1 << 22)
|
||||
#define bit_LM (1 << 29)
|
||||
#define bit_3DNOWP (1 << 30)
|
||||
#define bit_3DNOW (1u << 31)
|
||||
|
||||
#define bit_CLZERO (1 << 0)
|
||||
#define bit_WBNOINVD (1 << 9)
|
||||
|
||||
#define bit_FSGSBASE (1 << 0)
|
||||
#define bit_SGX (1 << 2)
|
||||
#define bit_BMI (1 << 3)
|
||||
|
@ -66,7 +63,6 @@
|
|||
#define bit_SHA (1 << 29)
|
||||
#define bit_AVX512BW (1 << 30)
|
||||
#define bit_AVX512VL (1u << 31)
|
||||
|
||||
#define bit_PREFETCHWT1 (1 << 0)
|
||||
#define bit_AVX512VBMI (1 << 1)
|
||||
#define bit_PKU (1 << 3)
|
||||
|
@ -83,101 +79,80 @@
|
|||
#define bit_RDPID (1 << 22)
|
||||
#define bit_MOVDIRI (1 << 27)
|
||||
#define bit_MOVDIR64B (1 << 28)
|
||||
#define bit_ENQCMD (1 << 29)
|
||||
#define bit_CLDEMOTE (1 << 25)
|
||||
|
||||
#define bit_KL (1 << 23)
|
||||
#define bit_AVX5124VNNIW (1 << 2)
|
||||
#define bit_AVX5124FMAPS (1 << 3)
|
||||
#define bit_AVX512VP2INTERSECT (1 << 8)
|
||||
#define bit_IBT (1 << 20)
|
||||
#define bit_UINTR (1 << 5)
|
||||
#define bit_PCONFIG (1 << 18)
|
||||
|
||||
#define bit_SERIALIZE (1 << 14)
|
||||
#define bit_TSXLDTRK (1 << 16)
|
||||
#define bit_AMX_BF16 (1 << 22)
|
||||
#define bit_AMX_TILE (1 << 24)
|
||||
#define bit_AMX_INT8 (1 << 25)
|
||||
#define bit_BNDREGS (1 << 3)
|
||||
#define bit_BNDCSR (1 << 4)
|
||||
|
||||
#define bit_XSAVEOPT (1 << 0)
|
||||
#define bit_XSAVEC (1 << 1)
|
||||
#define bit_XSAVES (1 << 3)
|
||||
|
||||
#define bit_PTWRITE (1 << 4)
|
||||
|
||||
#define bit_AESKLE ( 1<<0 )
|
||||
#define bit_WIDEKL ( 1<<2 )
|
||||
#define signature_AMD_ebx 0x68747541
|
||||
#define signature_AMD_ecx 0x444d4163
|
||||
#define signature_AMD_edx 0x69746e65
|
||||
|
||||
#define signature_CENTAUR_ebx 0x746e6543
|
||||
#define signature_CENTAUR_ecx 0x736c7561
|
||||
#define signature_CENTAUR_edx 0x48727561
|
||||
|
||||
#define signature_CYRIX_ebx 0x69727943
|
||||
#define signature_CYRIX_ecx 0x64616574
|
||||
#define signature_CYRIX_edx 0x736e4978
|
||||
|
||||
#define signature_INTEL_ebx 0x756e6547
|
||||
#define signature_INTEL_ecx 0x6c65746e
|
||||
#define signature_INTEL_edx 0x49656e69
|
||||
|
||||
#define signature_TM1_ebx 0x6e617254
|
||||
#define signature_TM1_ecx 0x55504361
|
||||
#define signature_TM1_edx 0x74656d73
|
||||
|
||||
#define signature_TM2_ebx 0x756e6547
|
||||
#define signature_TM2_ecx 0x3638784d
|
||||
#define signature_TM2_edx 0x54656e69
|
||||
|
||||
#define signature_NSC_ebx 0x646f6547
|
||||
#define signature_NSC_ecx 0x43534e20
|
||||
#define signature_NSC_edx 0x79622065
|
||||
|
||||
#define signature_NEXGEN_ebx 0x4778654e
|
||||
#define signature_NEXGEN_ecx 0x6e657669
|
||||
#define signature_NEXGEN_edx 0x72446e65
|
||||
|
||||
#define signature_RISE_ebx 0x65736952
|
||||
#define signature_RISE_ecx 0x65736952
|
||||
#define signature_RISE_edx 0x65736952
|
||||
|
||||
#define signature_SIS_ebx 0x20536953
|
||||
#define signature_SIS_ecx 0x20536953
|
||||
#define signature_SIS_edx 0x20536953
|
||||
|
||||
#define signature_UMC_ebx 0x20434d55
|
||||
#define signature_UMC_ecx 0x20434d55
|
||||
#define signature_UMC_edx 0x20434d55
|
||||
|
||||
#define signature_VIA_ebx 0x20414956
|
||||
#define signature_VIA_ecx 0x20414956
|
||||
#define signature_VIA_edx 0x20414956
|
||||
|
||||
#define signature_VORTEX_ebx 0x74726f56
|
||||
#define signature_VORTEX_ecx 0x436f5320
|
||||
#define signature_VORTEX_edx 0x36387865
|
||||
|
||||
#ifndef __x86_64__
|
||||
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
do { \
|
||||
if (__builtin_constant_p(level) && (level) != 1) \
|
||||
__asm__("cpuid\n\t" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(level)); \
|
||||
else \
|
||||
__asm__("cpuid\n\t" \
|
||||
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \
|
||||
: "0"(level), "1"(0), "2"(0)); \
|
||||
} while (0)
|
||||
#define __cpuid(level, a, b, c, d) do { if (__builtin_constant_p (level) && (level) != 1) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level)); else __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level), "1" (0), "2" (0)); } while (0)
|
||||
#else
|
||||
#define __cpuid(level, a, b, c, d) \
|
||||
__asm__("cpuid\n\t" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(level))
|
||||
#define __cpuid(level, a, b, c, d) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level))
|
||||
#endif
|
||||
|
||||
#define __cpuid_count(level, count, a, b, c, d) \
|
||||
__asm__("cpuid\n\t" \
|
||||
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \
|
||||
: "0"(level), "2"(count))
|
||||
|
||||
static __inline unsigned int __get_cpuid_max(unsigned int __ext,
|
||||
unsigned int *__sig) {
|
||||
#define __cpuid_count(level, count, a, b, c, d) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level), "2" (count))
|
||||
static __inline unsigned int
|
||||
__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
|
||||
{
|
||||
unsigned int __eax, __ebx, __ecx, __edx;
|
||||
#ifndef __x86_64__
|
||||
#if __GNUC__ >= 3
|
||||
__asm__("pushf{l|d}\n\t"
|
||||
__asm__ ("pushf{l|d}\n\t"
|
||||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"mov{l}\t{%0, %1|%1, %0}\n\t"
|
||||
|
@ -187,10 +162,10 @@ static __inline unsigned int __get_cpuid_max(unsigned int __ext,
|
|||
"pushf{l|d}\n\t"
|
||||
"pop{l}\t%0\n\t"
|
||||
"popf{l|d}\n\t"
|
||||
: "=&r"(__eax), "=&r"(__ebx)
|
||||
: "i"(0x00200000));
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#else
|
||||
__asm__("pushfl\n\t"
|
||||
__asm__ ("pushfl\n\t"
|
||||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"movl\t%0, %1\n\t"
|
||||
|
@ -200,38 +175,46 @@ static __inline unsigned int __get_cpuid_max(unsigned int __ext,
|
|||
"pushfl\n\t"
|
||||
"popl\t%0\n\t"
|
||||
"popfl\n\t"
|
||||
: "=&r"(__eax), "=&r"(__ebx)
|
||||
: "i"(0x00200000));
|
||||
: "=&r" (__eax), "=&r" (__ebx)
|
||||
: "i" (0x00200000));
|
||||
#endif
|
||||
if (!((__eax ^ __ebx) & 0x00200000)) return 0;
|
||||
if (!((__eax ^ __ebx) & 0x00200000))
|
||||
return 0;
|
||||
#endif
|
||||
__cpuid(__ext, __eax, __ebx, __ecx, __edx);
|
||||
if (__sig) *__sig = __ebx;
|
||||
__cpuid (__ext, __eax, __ebx, __ecx, __edx);
|
||||
if (__sig)
|
||||
*__sig = __ebx;
|
||||
return __eax;
|
||||
}
|
||||
|
||||
static __inline int __get_cpuid(unsigned int __leaf, unsigned int *__eax,
|
||||
unsigned int *__ebx, unsigned int *__ecx,
|
||||
unsigned int *__edx) {
|
||||
unsigned int __ext = __leaf & 0x80000000;
|
||||
unsigned int __maxlevel = __get_cpuid_max(__ext, 0);
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf) return 0;
|
||||
__cpuid(__leaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static __inline int __get_cpuid_count(unsigned int __leaf,
|
||||
unsigned int __subleaf,
|
||||
static __inline int
|
||||
__get_cpuid (unsigned int __leaf,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx,
|
||||
unsigned int *__edx) {
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
unsigned int __ext = __leaf & 0x80000000;
|
||||
unsigned int __maxlevel = __get_cpuid_max(__ext, 0);
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf) return 0;
|
||||
__cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf)
|
||||
return 0;
|
||||
__cpuid (__leaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_ */
|
||||
static __inline int
|
||||
__get_cpuid_count (unsigned int __leaf, unsigned int __subleaf,
|
||||
unsigned int *__eax, unsigned int *__ebx,
|
||||
unsigned int *__ecx, unsigned int *__edx)
|
||||
{
|
||||
unsigned int __ext = __leaf & 0x80000000;
|
||||
unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
|
||||
if (__maxlevel == 0 || __maxlevel < __leaf)
|
||||
return 0;
|
||||
__cpuid_count (__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
|
||||
return 1;
|
||||
}
|
||||
static __inline void
|
||||
__cpuidex (int __cpuid_info[4], int __leaf, int __subleaf)
|
||||
{
|
||||
__cpuid_count (__leaf, __subleaf, __cpuid_info[0], __cpuid_info[1],
|
||||
__cpuid_info[2], __cpuid_info[3]);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
1731
third_party/intel/emmintrin.internal.h
vendored
1731
third_party/intel/emmintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
30
third_party/intel/enqcmdintrin.internal.h
vendored
Normal file
30
third_party/intel/enqcmdintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <enqcmdintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _ENQCMDINTRIN_H_INCLUDED
|
||||
#define _ENQCMDINTRIN_H_INCLUDED
|
||||
#ifndef __ENQCMD__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("enqcmd")
|
||||
#define __DISABLE_ENQCMD__
|
||||
#endif
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enqcmd (void * __P, const void * __Q)
|
||||
{
|
||||
return __builtin_ia32_enqcmd (__P, __Q);
|
||||
}
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_enqcmds (void * __P, const void * __Q)
|
||||
{
|
||||
return __builtin_ia32_enqcmds (__P, __Q);
|
||||
}
|
||||
#ifdef __DISABLE_ENQCMD__
|
||||
#undef __DISABLE_ENQCMD__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
95
third_party/intel/f16cintrin.internal.h
vendored
95
third_party/intel/f16cintrin.internal.h
vendored
|
@ -1,75 +1,58 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
#error \
|
||||
"Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
|
||||
# error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _F16CINTRIN_H_INCLUDED
|
||||
#define _F16CINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __F16C__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("f16c")
|
||||
#define __DISABLE_F16C__
|
||||
#endif /* __F16C__ */
|
||||
|
||||
__funline float _cvtsh_ss(unsigned short __S) {
|
||||
__v8hi __H = __extension__(__v8hi){(short)__S, 0, 0, 0, 0, 0, 0, 0};
|
||||
__v4sf __A = __builtin_ia32_vcvtph2ps(__H);
|
||||
return __builtin_ia32_vec_ext_v4sf(__A, 0);
|
||||
#endif
|
||||
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cvtsh_ss (unsigned short __S)
|
||||
{
|
||||
__v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
|
||||
__v4sf __A = __builtin_ia32_vcvtph2ps (__H);
|
||||
return __builtin_ia32_vec_ext_v4sf (__A, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts four half-precision (16-bit) floating point values to
|
||||
* single-precision floating point values.
|
||||
*/
|
||||
__funline __m128 _mm_cvtph_ps(__m128i __A) {
|
||||
return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__A);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtph_ps (__m128i __A)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts eight half-precision (16-bit) floating point values to
|
||||
* single-precision floating point values.
|
||||
*/
|
||||
__funline __m256 _mm256_cvtph_ps(__m128i __A) {
|
||||
return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__A);
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtph_ps (__m128i __A)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline unsigned short _cvtss_sh(float __F, const int __I) {
|
||||
__v4sf __A = __extension__(__v4sf){__F, 0, 0, 0};
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph(__A, __I);
|
||||
return (unsigned short)__builtin_ia32_vec_ext_v8hi(__H, 0);
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_cvtss_sh (float __F, const int __I)
|
||||
{
|
||||
__v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 };
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
|
||||
return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_cvtps_ph(__m128 __A, const int __I) {
|
||||
return (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__A, __I);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_cvtps_ph (__m128 __A, const int __I)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts eight single-precision floating point values to
|
||||
* half-precision (16-bit) floating point values.
|
||||
*/
|
||||
__funline __m128i _mm256_cvtps_ph(__m256 __A, const int __I) {
|
||||
return (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__A, __I);
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_cvtps_ph (__m256 __A, const int __I)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
|
||||
}
|
||||
#else
|
||||
#define _cvtss_sh(__F, __I) \
|
||||
(__extension__({ \
|
||||
__v4sf __A = __extension__(__v4sf){__F, 0, 0, 0}; \
|
||||
__v8hi __H = __builtin_ia32_vcvtps2ph(__A, __I); \
|
||||
(unsigned short)__builtin_ia32_vec_ext_v8hi(__H, 0); \
|
||||
}))
|
||||
|
||||
#define _mm_cvtps_ph(A, I) \
|
||||
((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)A, (int)(I)))
|
||||
|
||||
#define _mm256_cvtps_ph(A, I) \
|
||||
((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)A, (int)(I)))
|
||||
#endif /* __OPTIMIZE */
|
||||
|
||||
#define _cvtss_sh(__F, __I) (__extension__ ({ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); }))
|
||||
#define _mm_cvtps_ph(A, I) ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) (A), (int) (I)))
|
||||
#define _mm256_cvtps_ph(A, I) ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) (A), (int) (I)))
|
||||
#endif
|
||||
#ifdef __DISABLE_F16C__
|
||||
#undef __DISABLE_F16C__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_F16C__ */
|
||||
|
||||
#endif /* _F16CINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
311
third_party/intel/fma4intrin.internal.h
vendored
311
third_party/intel/fma4intrin.internal.h
vendored
|
@ -1,184 +1,179 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86INTRIN_H_INCLUDED
|
||||
#error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
|
||||
# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FMA4INTRIN_H_INCLUDED
|
||||
#define _FMA4INTRIN_H_INCLUDED
|
||||
|
||||
#include "third_party/intel/ammintrin.internal.h"
|
||||
|
||||
#ifndef __FMA4__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fma4")
|
||||
#define __DISABLE_FMA4__
|
||||
#endif /* __FMA4__ */
|
||||
|
||||
__funline __m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
#endif
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
/* 256b Floating point multiply/add type instructions. */
|
||||
__funline __m256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
|
||||
}
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
|
||||
}
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_FMA4__
|
||||
#undef __DISABLE_FMA4__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FMA4__ */
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
295
third_party/intel/fmaintrin.internal.h
vendored
295
third_party/intel/fmaintrin.internal.h
vendored
|
@ -1,177 +1,246 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
|
||||
# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FMAINTRIN_H_INCLUDED
|
||||
#define _FMAINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __FMA__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fma")
|
||||
#define __DISABLE_FMA__
|
||||
#endif /* __FMA__ */
|
||||
|
||||
__funline __m128d _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
|
||||
#endif
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B,
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmsubsd3((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmsubss3((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B,
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B,
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd3((__v2df)__A, (__v2df)__B,
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmaddss3((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B,
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B,
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd3((__v2df)__A, (__v2df)__B,
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfnmsubss3((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
||||
(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
|
||||
(__v4df)__B,
|
||||
(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
|
||||
(__v8sf)__B,
|
||||
(__v8sf)__C);
|
||||
}
|
||||
|
||||
__funline __m128d _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
|
||||
{
|
||||
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
|
||||
-(__v2df)__C);
|
||||
}
|
||||
|
||||
__funline __m256d _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) {
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B,
|
||||
extern __inline __m256d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
|
||||
{
|
||||
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
|
||||
(__v4df)__B,
|
||||
-(__v4df)__C);
|
||||
}
|
||||
|
||||
__funline __m128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
|
||||
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
|
||||
{
|
||||
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
|
||||
-(__v4sf)__C);
|
||||
}
|
||||
|
||||
__funline __m256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) {
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B,
|
||||
extern __inline __m256
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
|
||||
{
|
||||
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
|
||||
(__v8sf)__B,
|
||||
-(__v8sf)__C);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_FMA__
|
||||
#undef __DISABLE_FMA__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FMA__ */
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
49
third_party/intel/fxsrintrin.internal.h
vendored
49
third_party/intel/fxsrintrin.internal.h
vendored
|
@ -1,37 +1,44 @@
|
|||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <fxsrintrin.h> directly; include <immintrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <fxsrintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _FXSRINTRIN_H_INCLUDED
|
||||
#define _FXSRINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __FXSR__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fxsr")
|
||||
#define __DISABLE_FXSR__
|
||||
#endif /* __FXSR__ */
|
||||
|
||||
__funline void _fxsave(void *__P) {
|
||||
__builtin_ia32_fxsave(__P);
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxsave (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxsave (__P);
|
||||
}
|
||||
|
||||
__funline void _fxrstor(void *__P) {
|
||||
__builtin_ia32_fxrstor(__P);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxrstor (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxrstor (__P);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void _fxsave64(void *__P) {
|
||||
__builtin_ia32_fxsave64(__P);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxsave64 (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxsave64 (__P);
|
||||
}
|
||||
|
||||
__funline void _fxrstor64(void *__P) {
|
||||
__builtin_ia32_fxrstor64(__P);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_fxrstor64 (void *__P)
|
||||
{
|
||||
__builtin_ia32_fxrstor64 (__P);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_FXSR__
|
||||
#undef __DISABLE_FXSR__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FXSR__ */
|
||||
|
||||
#endif /* _FXSRINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
457
third_party/intel/gfniintrin.internal.h
vendored
457
third_party/intel/gfniintrin.internal.h
vendored
|
@ -1,311 +1,310 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _GFNIINTRIN_H_INCLUDED
|
||||
#define _GFNIINTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__GFNI__) || !defined(__SSE2__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,sse2")
|
||||
#define __DISABLE_GFNI__
|
||||
#endif /* __GFNI__ */
|
||||
|
||||
__funline __m128i _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi((__v16qi)__A, (__v16qi)__B);
|
||||
#endif
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
|
||||
(__v16qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m128i _mm_gf2p8affineinv_epi64_epi8(__m128i __A, __m128i __B,
|
||||
const int __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)__A,
|
||||
(__v16qi)__B, __C);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
|
||||
(__v16qi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_gf2p8affine_epi64_epi8(__m128i __A, __m128i __B,
|
||||
const int __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)__A,
|
||||
(__v16qi)__B, __C);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
|
||||
(__v16qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi( \
|
||||
(__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||
#define _mm_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi( \
|
||||
(__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||
#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||
#define _mm_gf2p8affine_epi64_epi8(A, B, C) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNI__
|
||||
#undef __DISABLE_GFNI__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_GFNI__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__GFNI__) || !defined(__AVX__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx")
|
||||
#define __DISABLE_GFNIAVX__
|
||||
#endif /* __GFNIAVX__ */
|
||||
|
||||
__funline __m256i _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi((__v32qi)__A, (__v32qi)__B);
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m256i _mm256_gf2p8affineinv_epi64_epi8(__m256i __A, __m256i __B,
|
||||
const int __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)__A,
|
||||
(__v32qi)__B, __C);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B,
|
||||
__C);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_gf2p8affine_epi64_epi8(__m256i __A, __m256i __B,
|
||||
const int __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)__A,
|
||||
(__v32qi)__B, __C);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
|
||||
(__v32qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi( \
|
||||
(__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi( \
|
||||
(__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
||||
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
|
||||
#define _mm256_gf2p8affine_epi64_epi8(A, B, C) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A), ( __v32qi)(__m256i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX__
|
||||
#undef __DISABLE_GFNIAVX__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__GFNI__) || !defined(__AVX512VL__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512vl")
|
||||
#define __DISABLE_GFNIAVX512VL__
|
||||
#endif /* __GFNIAVX512VL__ */
|
||||
|
||||
__funline __m128i _mm_mask_gf2p8mul_epi8(__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, (__v16qi)__A, __B);
|
||||
#endif
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D,
|
||||
(__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_gf2p8mul_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, (__v16qi)_mm_setzero_si128(), __A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m128i _mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A, __mmask16 __B,
|
||||
__m128i __C, __m128i __D,
|
||||
const int __E) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D, const int __E)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D,
|
||||
__E,
|
||||
(__v16qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C,
|
||||
const int __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, __D,
|
||||
(__v16qi) _mm_setzero_si128 (),
|
||||
__A);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_mask_gf2p8affine_epi64_epi8(__m128i __A, __mmask16 __B,
|
||||
__m128i __C, __m128i __D,
|
||||
const int __E) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
|
||||
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
|
||||
__m128i __D, const int __E)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
|
||||
(__v16qi) __D, __E, (__v16qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m128i _mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A, __m128i __B,
|
||||
__m128i __C, const int __D) {
|
||||
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask(
|
||||
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A);
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
|
||||
(__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
|
||||
}
|
||||
#else
|
||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), \
|
||||
(__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), \
|
||||
(__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)(A)))
|
||||
#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), \
|
||||
(__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( \
|
||||
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), \
|
||||
(__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)(A)))
|
||||
#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( (__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( (__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
|
||||
#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
|
||||
#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512VL__
|
||||
#undef __DISABLE_GFNIAVX512VL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512VL__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512vl,avx512bw")
|
||||
#define __DISABLE_GFNIAVX512VLBW__
|
||||
#endif /* __GFNIAVX512VLBW__ */
|
||||
|
||||
__funline __m256i _mm256_mask_gf2p8mul_epi8(__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, (__v32qi)__A, __B);
|
||||
#endif
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
|
||||
__m256i __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
(__v32qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_gf2p8mul_epi8(__mmask32 __A, __m256i __B,
|
||||
__m256i __C) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m256i _mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A,
|
||||
__mmask32 __B,
|
||||
__m256i __C, __m256i __D,
|
||||
const int __E) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D, const int __E)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
__E,
|
||||
(__v32qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A,
|
||||
__m256i __B, __m256i __C,
|
||||
const int __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
|
||||
__m256i __C, const int __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, __D,
|
||||
(__v32qi) _mm256_setzero_si256 (), __A);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_mask_gf2p8affine_epi64_epi8(__m256i __A, __mmask32 __B,
|
||||
__m256i __C, __m256i __D,
|
||||
const int __E) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
|
||||
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
|
||||
__m256i __D, const int __E)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
|
||||
(__v32qi) __D,
|
||||
__E,
|
||||
(__v32qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
__funline __m256i _mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A, __m256i __B,
|
||||
__m256i __C,
|
||||
const int __D) {
|
||||
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask(
|
||||
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A);
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
|
||||
__m256i __C, const int __D)
|
||||
{
|
||||
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
|
||||
(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
|
||||
}
|
||||
#else
|
||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
|
||||
(__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
|
||||
(__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)(A)))
|
||||
#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
|
||||
(__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( \
|
||||
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
|
||||
(__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)(A)))
|
||||
#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( (__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( (__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
|
||||
#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
|
||||
#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512VLBW__
|
||||
#undef __DISABLE_GFNIAVX512VLBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512VLBW__ */
|
||||
|
||||
#endif
|
||||
#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("gfni,avx512f,avx512bw")
|
||||
#define __DISABLE_GFNIAVX512FBW__
|
||||
#endif /* __GFNIAVX512FBW__ */
|
||||
|
||||
__funline __m512i _mm512_mask_gf2p8mul_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C, __m512i __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, (__v64qi)__A, __B);
|
||||
#endif
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_gf2p8mul_epi8(__mmask64 __A, __m512i __B,
|
||||
__m512i __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
__funline __m512i _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi((__v64qi)__A, (__v64qi)__B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B);
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline __m512i _mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A,
|
||||
__mmask64 __B,
|
||||
__m512i __C, __m512i __D,
|
||||
const int __E) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D, const int __E)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D,
|
||||
__E,
|
||||
(__v64qi)__A,
|
||||
__B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A,
|
||||
__m512i __B, __m512i __C,
|
||||
const int __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
|
||||
__m512i __C, const int __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, __D,
|
||||
(__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_gf2p8affineinv_epi64_epi8(__m512i __A, __m512i __B,
|
||||
const int __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)__A,
|
||||
(__v64qi)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B, __C);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_mask_gf2p8affine_epi64_epi8(__m512i __A, __mmask64 __B,
|
||||
__m512i __C, __m512i __D,
|
||||
const int __E) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
|
||||
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
|
||||
__m512i __D, const int __E)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
|
||||
(__v64qi) __D, __E, (__v64qi)__A, __B);
|
||||
}
|
||||
|
||||
__funline __m512i _mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A, __m512i __B,
|
||||
__m512i __C,
|
||||
const int __D) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask(
|
||||
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
|
||||
const int __D)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
|
||||
(__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
|
||||
}
|
||||
__funline __m512i _mm512_gf2p8affine_epi64_epi8(__m512i __A, __m512i __B,
|
||||
const int __C) {
|
||||
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)__A,
|
||||
(__v64qi)__B, __C);
|
||||
extern __inline __m512i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
|
||||
{
|
||||
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
|
||||
(__v64qi) __B, __C);
|
||||
}
|
||||
#else
|
||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
|
||||
(__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
|
||||
(__v64qi)(__m512i)_mm512_setzero_si512(), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi( \
|
||||
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
|
||||
(__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( \
|
||||
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
|
||||
(__v64qi)(__m512i)_mm512_setzero_si512(), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \
|
||||
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi( \
|
||||
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( (__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( (__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
|
||||
#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
|
||||
#define _mm512_gf2p8affine_epi64_epi8(A, B, C) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_GFNIAVX512FBW__
|
||||
#undef __DISABLE_GFNIAVX512FBW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __GFNIAVX512FBW__ */
|
||||
|
||||
#endif /* _GFNIINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
24
third_party/intel/hresetintrin.internal.h
vendored
Normal file
24
third_party/intel/hresetintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <hresetintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
#ifndef _HRESETINTRIN_H_INCLUDED
|
||||
#define _HRESETINTRIN_H_INCLUDED
|
||||
#ifndef __HRESET__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target ("hreset")
|
||||
#define __DISABLE_HRESET__
|
||||
#endif
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_hreset (unsigned int __EAX)
|
||||
{
|
||||
__builtin_ia32_hreset (__EAX);
|
||||
}
|
||||
#ifdef __DISABLE_HRESET__
|
||||
#undef __DISABLE_HRESET__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
265
third_party/intel/ia32intrin.internal.h
vendored
265
third_party/intel/ia32intrin.internal.h
vendored
|
@ -1,174 +1,206 @@
|
|||
#ifndef _X86INTRIN_H_INCLUDED
|
||||
#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <ia32intrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
__funline int __bsfd(int __X) {
|
||||
return __builtin_ctz(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsfd (int __X)
|
||||
{
|
||||
return __builtin_ctz (__X);
|
||||
}
|
||||
|
||||
__funline int __bsrd(int __X) {
|
||||
return __builtin_ia32_bsrsi(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsrd (int __X)
|
||||
{
|
||||
return __builtin_ia32_bsrsi (__X);
|
||||
}
|
||||
|
||||
__funline int __bswapd(int __X) {
|
||||
return __builtin_bswap32(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bswapd (int __X)
|
||||
{
|
||||
return __builtin_bswap32 (__X);
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
#ifndef __SSE4_2__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4.2")
|
||||
#define __DISABLE_SSE4_2__
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
__funline unsigned int __crc32b(unsigned int __C, unsigned char __V) {
|
||||
return __builtin_ia32_crc32qi(__C, __V);
|
||||
#endif
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32b (unsigned int __C, unsigned char __V)
|
||||
{
|
||||
return __builtin_ia32_crc32qi (__C, __V);
|
||||
}
|
||||
|
||||
__funline unsigned int __crc32w(unsigned int __C, unsigned short __V) {
|
||||
return __builtin_ia32_crc32hi(__C, __V);
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32w (unsigned int __C, unsigned short __V)
|
||||
{
|
||||
return __builtin_ia32_crc32hi (__C, __V);
|
||||
}
|
||||
|
||||
__funline unsigned int __crc32d(unsigned int __C, unsigned int __V) {
|
||||
return __builtin_ia32_crc32si(__C, __V);
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32d (unsigned int __C, unsigned int __V)
|
||||
{
|
||||
return __builtin_ia32_crc32si (__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SSE4_2__
|
||||
#undef __DISABLE_SSE4_2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4_2__ */
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
__funline int __popcntd(unsigned int __X) {
|
||||
return __builtin_popcount(__X);
|
||||
#endif
|
||||
#endif
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__popcntd (unsigned int __X)
|
||||
{
|
||||
return __builtin_popcount (__X);
|
||||
}
|
||||
|
||||
#ifndef __iamcu__
|
||||
|
||||
__funline unsigned long long __rdpmc(int __S) {
|
||||
return __builtin_ia32_rdpmc(__S);
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rdpmc (int __S)
|
||||
{
|
||||
return __builtin_ia32_rdpmc (__S);
|
||||
}
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
__funline unsigned long long __rdtsc(void) {
|
||||
return __builtin_ia32_rdtsc();
|
||||
}
|
||||
|
||||
#endif
|
||||
#define __rdtsc() __builtin_ia32_rdtsc ()
|
||||
#ifndef __iamcu__
|
||||
|
||||
__funline unsigned long long __rdtscp(unsigned int *__A) {
|
||||
return __builtin_ia32_rdtscp(__A);
|
||||
#define __rdtscp(a) __builtin_ia32_rdtscp (a)
|
||||
#endif
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolb (unsigned char __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rolqi (__X, __C);
|
||||
}
|
||||
|
||||
#endif /* __iamcu__ */
|
||||
|
||||
__funline unsigned char __rolb(unsigned char __X, int __C) {
|
||||
return __builtin_ia32_rolqi(__X, __C);
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolw (unsigned short __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rolhi (__X, __C);
|
||||
}
|
||||
|
||||
__funline unsigned short __rolw(unsigned short __X, int __C) {
|
||||
return __builtin_ia32_rolhi(__X, __C);
|
||||
}
|
||||
|
||||
__funline unsigned int __rold(unsigned int __X, int __C) {
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rold (unsigned int __X, int __C)
|
||||
{
|
||||
__C &= 31;
|
||||
return (__X << __C) | (__X >> (-__C & 31));
|
||||
}
|
||||
|
||||
__funline unsigned char __rorb(unsigned char __X, int __C) {
|
||||
return __builtin_ia32_rorqi(__X, __C);
|
||||
extern __inline unsigned char
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorb (unsigned char __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rorqi (__X, __C);
|
||||
}
|
||||
|
||||
__funline unsigned short __rorw(unsigned short __X, int __C) {
|
||||
return __builtin_ia32_rorhi(__X, __C);
|
||||
extern __inline unsigned short
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorw (unsigned short __X, int __C)
|
||||
{
|
||||
return __builtin_ia32_rorhi (__X, __C);
|
||||
}
|
||||
|
||||
__funline unsigned int __rord(unsigned int __X, int __C) {
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rord (unsigned int __X, int __C)
|
||||
{
|
||||
__C &= 31;
|
||||
return (__X >> __C) | (__X << (-__C & 31));
|
||||
}
|
||||
|
||||
__funline void __pause(void) {
|
||||
__builtin_ia32_pause();
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__pause (void)
|
||||
{
|
||||
__builtin_ia32_pause ();
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
__funline int __bsfq(long long __X) {
|
||||
return __builtin_ctzll(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsfq (long long __X)
|
||||
{
|
||||
return __builtin_ctzll (__X);
|
||||
}
|
||||
|
||||
__funline int __bsrq(long long __X) {
|
||||
return __builtin_ia32_bsrdi(__X);
|
||||
extern __inline int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bsrq (long long __X)
|
||||
{
|
||||
return __builtin_ia32_bsrdi (__X);
|
||||
}
|
||||
|
||||
__funline long long __bswapq(long long __X) {
|
||||
return __builtin_bswap64(__X);
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__bswapq (long long __X)
|
||||
{
|
||||
return __builtin_bswap64 (__X);
|
||||
}
|
||||
|
||||
#ifndef __SSE4_2__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse4.2")
|
||||
#define __DISABLE_SSE4_2__
|
||||
#endif /* __SSE4_2__ */
|
||||
|
||||
__funline unsigned long long __crc32q(unsigned long long __C,
|
||||
unsigned long long __V) {
|
||||
return __builtin_ia32_crc32di(__C, __V);
|
||||
#endif
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__crc32q (unsigned long long __C, unsigned long long __V)
|
||||
{
|
||||
return __builtin_ia32_crc32di (__C, __V);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_SSE4_2__
|
||||
#undef __DISABLE_SSE4_2__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_SSE4_2__ */
|
||||
|
||||
__funline long long __popcntq(unsigned long long __X) {
|
||||
return __builtin_popcountll(__X);
|
||||
#endif
|
||||
extern __inline long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__popcntq (unsigned long long __X)
|
||||
{
|
||||
return __builtin_popcountll (__X);
|
||||
}
|
||||
|
||||
__funline unsigned long long __rolq(unsigned long long __X, int __C) {
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rolq (unsigned long long __X, int __C)
|
||||
{
|
||||
__C &= 63;
|
||||
return (__X << __C) | (__X >> (-__C & 63));
|
||||
}
|
||||
|
||||
__funline unsigned long long __rorq(unsigned long long __X, int __C) {
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__rorq (unsigned long long __X, int __C)
|
||||
{
|
||||
__C &= 63;
|
||||
return (__X >> __C) | (__X << (-__C & 63));
|
||||
}
|
||||
|
||||
__funline unsigned long long __readeflags(void) {
|
||||
return __builtin_ia32_readeflags_u64();
|
||||
extern __inline unsigned long long
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__readeflags (void)
|
||||
{
|
||||
return __builtin_ia32_readeflags_u64 ();
|
||||
}
|
||||
|
||||
__funline void __writeeflags(unsigned long long __X) {
|
||||
__builtin_ia32_writeeflags_u64(__X);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__writeeflags (unsigned long long __X)
|
||||
{
|
||||
__builtin_ia32_writeeflags_u64 (__X);
|
||||
}
|
||||
|
||||
#define _bswap64(a) __bswapq(a)
|
||||
#define _popcnt64(a) __popcntq(a)
|
||||
#else
|
||||
|
||||
__funline unsigned int __readeflags(void) {
|
||||
return __builtin_ia32_readeflags_u32();
|
||||
extern __inline unsigned int
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__readeflags (void)
|
||||
{
|
||||
return __builtin_ia32_readeflags_u32 ();
|
||||
}
|
||||
|
||||
__funline void __writeeflags(unsigned int __X) {
|
||||
__builtin_ia32_writeeflags_u32(__X);
|
||||
extern __inline void
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__writeeflags (unsigned int __X)
|
||||
{
|
||||
__builtin_ia32_writeeflags_u32 (__X);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __LP64__
|
||||
#define _lrotl(a, b) __rolq((a), (b))
|
||||
#define _lrotr(a, b) __rorq((a), (b))
|
||||
#define _lrotl(a,b) __rolq((a), (b))
|
||||
#define _lrotr(a,b) __rorq((a), (b))
|
||||
#else
|
||||
#define _lrotl(a, b) __rold((a), (b))
|
||||
#define _lrotr(a, b) __rord((a), (b))
|
||||
#define _lrotl(a,b) __rold((a), (b))
|
||||
#define _lrotr(a,b) __rord((a), (b))
|
||||
#endif
|
||||
|
||||
#define _bit_scan_forward(a) __bsfd(a)
|
||||
#define _bit_scan_reverse(a) __bsrd(a)
|
||||
#define _bswap(a) __bswapd(a)
|
||||
|
@ -176,9 +208,10 @@ __funline void __writeeflags(unsigned int __X) {
|
|||
#ifndef __iamcu__
|
||||
#define _rdpmc(a) __rdpmc(a)
|
||||
#define _rdtscp(a) __rdtscp(a)
|
||||
#endif /* __iamcu__ */
|
||||
#endif
|
||||
#define _rdtsc() __rdtsc()
|
||||
#define _rotwl(a, b) __rolw((a), (b))
|
||||
#define _rotwr(a, b) __rorw((a), (b))
|
||||
#define _rotl(a, b) __rold((a), (b))
|
||||
#define _rotr(a, b) __rord((a), (b))
|
||||
#define _rotwl(a,b) __rolw((a), (b))
|
||||
#define _rotwr(a,b) __rorw((a), (b))
|
||||
#define _rotl(a,b) __rold((a), (b))
|
||||
#define _rotr(a,b) __rord((a), (b))
|
||||
#endif
|
||||
|
|
152
third_party/intel/immintrin.internal.h
vendored
152
third_party/intel/immintrin.internal.h
vendored
|
@ -1,8 +1,8 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _IMMINTRIN_H_INCLUDED
|
||||
#define _IMMINTRIN_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
|
||||
/* clang-format off */
|
||||
#include "third_party/intel/x86gprintrin.internal.h"
|
||||
#include "third_party/intel/mmintrin.internal.h"
|
||||
#include "third_party/intel/xmmintrin.internal.h"
|
||||
#include "third_party/intel/emmintrin.internal.h"
|
||||
|
@ -10,12 +10,8 @@
|
|||
#include "third_party/intel/tmmintrin.internal.h"
|
||||
#include "third_party/intel/smmintrin.internal.h"
|
||||
#include "third_party/intel/wmmintrin.internal.h"
|
||||
#include "third_party/intel/fxsrintrin.internal.h"
|
||||
#include "third_party/intel/xsaveintrin.internal.h"
|
||||
#include "third_party/intel/xsaveoptintrin.internal.h"
|
||||
#include "third_party/intel/xsavesintrin.internal.h"
|
||||
#include "third_party/intel/xsavecintrin.internal.h"
|
||||
#include "third_party/intel/avxintrin.internal.h"
|
||||
#include "third_party/intel/avxvnniintrin.internal.h"
|
||||
#include "third_party/intel/avx2intrin.internal.h"
|
||||
#include "third_party/intel/avx512fintrin.internal.h"
|
||||
#include "third_party/intel/avx512erintrin.internal.h"
|
||||
|
@ -39,143 +35,21 @@
|
|||
#include "third_party/intel/avx512vnnivlintrin.internal.h"
|
||||
#include "third_party/intel/avx512vpopcntdqvlintrin.internal.h"
|
||||
#include "third_party/intel/avx512bitalgintrin.internal.h"
|
||||
#include "third_party/intel/avx512vp2intersectintrin.internal.h"
|
||||
#include "third_party/intel/avx512vp2intersectvlintrin.internal.h"
|
||||
#include "third_party/intel/shaintrin.internal.h"
|
||||
#include "third_party/intel/lzcntintrin.internal.h"
|
||||
#include "third_party/intel/bmiintrin.internal.h"
|
||||
#include "third_party/intel/bmi2intrin.internal.h"
|
||||
#include "third_party/intel/fmaintrin.internal.h"
|
||||
#include "third_party/intel/f16cintrin.internal.h"
|
||||
#include "third_party/intel/rtmintrin.internal.h"
|
||||
#include "third_party/intel/xtestintrin.internal.h"
|
||||
#include "third_party/intel/cetintrin.internal.h"
|
||||
#include "third_party/intel/gfniintrin.internal.h"
|
||||
#include "third_party/intel/vaesintrin.internal.h"
|
||||
#include "third_party/intel/vpclmulqdqintrin.internal.h"
|
||||
#include "third_party/intel/movdirintrin.internal.h"
|
||||
#include "third_party/intel/sgxintrin.internal.h"
|
||||
#include "third_party/intel/pconfigintrin.internal.h"
|
||||
#include "third_party/intel/waitpkgintrin.internal.h"
|
||||
#include "third_party/intel/cldemoteintrin.internal.h"
|
||||
#include "third_party/intel/rdseedintrin.internal.h"
|
||||
#include "third_party/intel/avx512bf16vlintrin.internal.h"
|
||||
#include "third_party/intel/avx512bf16intrin.internal.h"
|
||||
#include "third_party/intel/amxtileintrin.internal.h"
|
||||
#include "third_party/intel/amxint8intrin.internal.h"
|
||||
#include "third_party/intel/amxbf16intrin.internal.h"
|
||||
#include "third_party/intel/prfchwintrin.internal.h"
|
||||
#include "third_party/intel/adxintrin.internal.h"
|
||||
#include "third_party/intel/clwbintrin.internal.h"
|
||||
#include "third_party/intel/clflushoptintrin.internal.h"
|
||||
#include "third_party/intel/wbnoinvdintrin.internal.h"
|
||||
#include "third_party/intel/pkuintrin.internal.h"
|
||||
/* clang-format on */
|
||||
|
||||
__funline void _wbinvd(void) {
|
||||
__builtin_ia32_wbinvd();
|
||||
}
|
||||
|
||||
#ifndef __RDRND__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("rdrnd")
|
||||
#define __DISABLE_RDRND__
|
||||
#endif /* __RDRND__ */
|
||||
__funline int _rdrand16_step(unsigned short *__P) {
|
||||
return __builtin_ia32_rdrand16_step(__P);
|
||||
}
|
||||
|
||||
__funline int _rdrand32_step(unsigned int *__P) {
|
||||
return __builtin_ia32_rdrand32_step(__P);
|
||||
}
|
||||
#ifdef __DISABLE_RDRND__
|
||||
#undef __DISABLE_RDRND__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RDRND__ */
|
||||
|
||||
#ifndef __RDPID__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("rdpid")
|
||||
#define __DISABLE_RDPID__
|
||||
#endif /* __RDPID__ */
|
||||
__funline unsigned int _rdpid_u32(void) {
|
||||
return __builtin_ia32_rdpid();
|
||||
}
|
||||
#ifdef __DISABLE_RDPID__
|
||||
#undef __DISABLE_RDPID__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RDPID__ */
|
||||
|
||||
#ifdef __x86_64__
|
||||
|
||||
#ifndef __FSGSBASE__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fsgsbase")
|
||||
#define __DISABLE_FSGSBASE__
|
||||
#endif /* __FSGSBASE__ */
|
||||
__funline unsigned int _readfsbase_u32(void) {
|
||||
return __builtin_ia32_rdfsbase32();
|
||||
}
|
||||
|
||||
__funline unsigned long long _readfsbase_u64(void) {
|
||||
return __builtin_ia32_rdfsbase64();
|
||||
}
|
||||
|
||||
__funline unsigned int _readgsbase_u32(void) {
|
||||
return __builtin_ia32_rdgsbase32();
|
||||
}
|
||||
|
||||
__funline unsigned long long _readgsbase_u64(void) {
|
||||
return __builtin_ia32_rdgsbase64();
|
||||
}
|
||||
|
||||
__funline void _writefsbase_u32(unsigned int __B) {
|
||||
__builtin_ia32_wrfsbase32(__B);
|
||||
}
|
||||
|
||||
__funline void _writefsbase_u64(unsigned long long __B) {
|
||||
__builtin_ia32_wrfsbase64(__B);
|
||||
}
|
||||
|
||||
__funline void _writegsbase_u32(unsigned int __B) {
|
||||
__builtin_ia32_wrgsbase32(__B);
|
||||
}
|
||||
|
||||
__funline void _writegsbase_u64(unsigned long long __B) {
|
||||
__builtin_ia32_wrgsbase64(__B);
|
||||
}
|
||||
#ifdef __DISABLE_FSGSBASE__
|
||||
#undef __DISABLE_FSGSBASE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_FSGSBASE__ */
|
||||
|
||||
#ifndef __RDRND__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("rdrnd")
|
||||
#define __DISABLE_RDRND__
|
||||
#endif /* __RDRND__ */
|
||||
__funline int _rdrand64_step(unsigned long long *__P) {
|
||||
return __builtin_ia32_rdrand64_step(__P);
|
||||
}
|
||||
#ifdef __DISABLE_RDRND__
|
||||
#undef __DISABLE_RDRND__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_RDRND__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#ifndef __PTWRITE__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("ptwrite")
|
||||
#define __DISABLE_PTWRITE__
|
||||
#include "third_party/intel/keylockerintrin.internal.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void _ptwrite64(unsigned long long __B) {
|
||||
__builtin_ia32_ptwrite64(__B);
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
__funline void _ptwrite32(unsigned __B) {
|
||||
__builtin_ia32_ptwrite32(__B);
|
||||
}
|
||||
#ifdef __DISABLE_PTWRITE__
|
||||
#undef __DISABLE_PTWRITE__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_PTWRITE__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _IMMINTRIN_H_INCLUDED */
|
||||
|
|
93
third_party/intel/keylockerintrin.internal.h
vendored
Normal file
93
third_party/intel/keylockerintrin.internal.h
vendored
Normal file
|
@ -0,0 +1,93 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <keylockerintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
#ifndef _KEYLOCKERINTRIN_H_INCLUDED
|
||||
#define _KEYLOCKERINTRIN_H_INCLUDED
|
||||
#ifndef __KL__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("kl")
|
||||
#define __DISABLE_KL__
|
||||
#endif
|
||||
extern __inline
|
||||
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_loadiwkey (unsigned int __I, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
__builtin_ia32_loadiwkey ((__v2di) __B, (__v2di) __C, (__v2di) __A, __I);
|
||||
}
|
||||
extern __inline
|
||||
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_encodekey128_u32 (unsigned int __I, __m128i __A, void * __P)
|
||||
{
|
||||
return __builtin_ia32_encodekey128_u32 (__I, (__v2di)__A, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_encodekey256_u32 (unsigned int __I, __m128i __A, __m128i __B, void * __P)
|
||||
{
|
||||
return __builtin_ia32_encodekey256_u32 (__I, (__v2di)__A, (__v2di)__B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdec128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdec256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesenc128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesenc256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
#ifdef __DISABLE_KL__
|
||||
#undef __DISABLE_KL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#ifndef __WIDEKL__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("widekl")
|
||||
#define __DISABLE_WIDEKL__
|
||||
#endif
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdecwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdecwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdecwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdecwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesencwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesencwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesencwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesencwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
#ifdef __DISABLE_WIDEKL__
|
||||
#undef __DISABLE_WIDEKL__
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
81
third_party/intel/lwpintrin.internal.h
vendored
81
third_party/intel/lwpintrin.internal.h
vendored
|
@ -1,73 +1,68 @@
|
|||
#ifndef _X86INTRIN_H_INCLUDED
|
||||
#error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <lwpintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _LWPINTRIN_H_INCLUDED
|
||||
#define _LWPINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __LWP__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("lwp")
|
||||
#define __DISABLE_LWP__
|
||||
#endif /* __LWP__ */
|
||||
|
||||
__funline void __llwpcb(void *__pcbAddress) {
|
||||
__builtin_ia32_llwpcb(__pcbAddress);
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__llwpcb (void *__pcbAddress)
|
||||
{
|
||||
__builtin_ia32_llwpcb (__pcbAddress);
|
||||
}
|
||||
|
||||
__funline void *__slwpcb(void) {
|
||||
return __builtin_ia32_slwpcb();
|
||||
extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__slwpcb (void)
|
||||
{
|
||||
return __builtin_ia32_slwpcb ();
|
||||
}
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline void __lwpval32(unsigned int __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
__builtin_ia32_lwpval32(__data2, __data1, __flags);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
|
||||
{
|
||||
__builtin_ia32_lwpval32 (__data2, __data1, __flags);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline void __lwpval64(unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
__builtin_ia32_lwpval64(__data2, __data1, __flags);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpval64 (unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags)
|
||||
{
|
||||
__builtin_ia32_lwpval64 (__data2, __data1, __flags);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define __lwpval32(D2, D1, F) \
|
||||
(__builtin_ia32_lwpval32((unsigned int)(D2), (unsigned int)(D1), \
|
||||
(unsigned int)(F)))
|
||||
#define __lwpval32(D2, D1, F) (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||
#ifdef __x86_64__
|
||||
#define __lwpval64(D2, D1, F) \
|
||||
(__builtin_ia32_lwpval64((unsigned long long)(D2), (unsigned int)(D1), \
|
||||
(unsigned int)(F)))
|
||||
#define __lwpval64(D2, D1, F) (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
__funline unsigned char __lwpins32(unsigned int __data2, unsigned int __data1,
|
||||
unsigned int __flags) {
|
||||
return __builtin_ia32_lwpins32(__data2, __data1, __flags);
|
||||
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
|
||||
{
|
||||
return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline unsigned char __lwpins64(unsigned long long __data2,
|
||||
unsigned int __data1, unsigned int __flags) {
|
||||
return __builtin_ia32_lwpins64(__data2, __data1, __flags);
|
||||
extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lwpins64 (unsigned long long __data2, unsigned int __data1,
|
||||
unsigned int __flags)
|
||||
{
|
||||
return __builtin_ia32_lwpins64 (__data2, __data1, __flags);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define __lwpins32(D2, D1, F) \
|
||||
(__builtin_ia32_lwpins32((unsigned int)(D2), (unsigned int)(D1), \
|
||||
(unsigned int)(F)))
|
||||
#define __lwpins32(D2, D1, F) (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||
#ifdef __x86_64__
|
||||
#define __lwpins64(D2, D1, F) \
|
||||
(__builtin_ia32_lwpins64((unsigned long long)(D2), (unsigned int)(D1), \
|
||||
(unsigned int)(F)))
|
||||
#define __lwpins64(D2, D1, F) (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), (unsigned int) (F)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_LWP__
|
||||
#undef __DISABLE_LWP__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_LWP__ */
|
||||
|
||||
#endif /* _LWPINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
52
third_party/intel/lzcntintrin.internal.h
vendored
52
third_party/intel/lzcntintrin.internal.h
vendored
|
@ -1,41 +1,45 @@
|
|||
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
|
||||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _X86GPRINTRIN_H_INCLUDED
|
||||
# error "Never use <lzcntintrin.h> directly; include <x86gprintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _LZCNTINTRIN_H_INCLUDED
|
||||
#define _LZCNTINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __LZCNT__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("lzcnt")
|
||||
#define __DISABLE_LZCNT__
|
||||
#endif /* __LZCNT__ */
|
||||
|
||||
__funline unsigned short __lzcnt16(unsigned short __X) {
|
||||
return __builtin_ia32_lzcnt_u16(__X);
|
||||
#endif
|
||||
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt16 (unsigned short __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u16 (__X);
|
||||
}
|
||||
|
||||
__funline unsigned int __lzcnt32(unsigned int __X) {
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
__funline unsigned int _lzcnt_u32(unsigned int __X) {
|
||||
return __builtin_ia32_lzcnt_u32(__X);
|
||||
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_lzcnt_u32 (unsigned int __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u32 (__X);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
__funline unsigned long long __lzcnt64(unsigned long long __X) {
|
||||
return __builtin_ia32_lzcnt_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
__lzcnt64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u64 (__X);
|
||||
}
|
||||
|
||||
__funline unsigned long long _lzcnt_u64(unsigned long long __X) {
|
||||
return __builtin_ia32_lzcnt_u64(__X);
|
||||
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_lzcnt_u64 (unsigned long long __X)
|
||||
{
|
||||
return __builtin_ia32_lzcnt_u64 (__X);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __DISABLE_LZCNT__
|
||||
#undef __DISABLE_LZCNT__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_LZCNT__ */
|
||||
|
||||
#endif /* _LZCNTINTRIN_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
217
third_party/intel/mm3dnow.internal.h
vendored
217
third_party/intel/mm3dnow.internal.h
vendored
|
@ -1,9 +1,9 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _MM3DNOW_H_INCLUDED
|
||||
#define _MM3DNOW_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
#include "third_party/intel/mmintrin.internal.h"
|
||||
#include "third_party/intel/prfchwintrin.internal.h"
|
||||
|
||||
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW__
|
||||
#pragma GCC push_options
|
||||
#ifdef __x86_64__
|
||||
|
@ -12,110 +12,128 @@
|
|||
#pragma GCC target("3dnow")
|
||||
#endif
|
||||
#define __DISABLE_3dNOW__
|
||||
#endif /* __3dNOW__ */
|
||||
|
||||
__funline void _m_femms(void) {
|
||||
#endif
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_femms (void)
|
||||
{
|
||||
__builtin_ia32_femms();
|
||||
}
|
||||
|
||||
__funline __m64 _m_pavgusb(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pavgusb((__v8qi)__A, (__v8qi)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pavgusb (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pf2id(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pf2id((__v2sf)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pf2id (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfacc((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfadd(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfadd((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfadd (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfcmpeq(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpeq (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfcmpge(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpge((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpge (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfcmpgt(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfcmpgt (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfmax(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmax((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmax (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfmin(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmin((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmin (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfmul(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfmul((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfmul (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrcp(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pfrcp((__v2sf)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcp (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrcpit1(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcpit1 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrcpit2(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrcpit2 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrsqrt(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrsqrt (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfrsqit1(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfrsqit1 (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfsub(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfsub((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfsub (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfsubr(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfsubr((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfsubr (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pi2fd(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pi2fd((__v2si)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pi2fd (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pmulhrw(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pmulhrw((__v4hi)__A, (__v4hi)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pmulhrw (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
|
||||
}
|
||||
|
||||
__funline void _m_prefetch(void *__P) {
|
||||
__builtin_prefetch(__P, 0, 3 /* _MM_HINT_T0 */);
|
||||
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_prefetch (void *__P)
|
||||
{
|
||||
__builtin_prefetch (__P, 0, 3 );
|
||||
}
|
||||
|
||||
__funline __m64 _m_from_float(float __A) {
|
||||
return __extension__(__m64)(__v2sf){__A, 0.0f};
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_from_float (float __A)
|
||||
{
|
||||
return __extension__ (__m64)(__v2sf){ __A, 0.0f };
|
||||
}
|
||||
|
||||
__funline float _m_to_float(__m64 __A) {
|
||||
union {
|
||||
__v2sf v;
|
||||
float a[2];
|
||||
} __tmp;
|
||||
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_to_float (__m64 __A)
|
||||
{
|
||||
union { __v2sf v; float a[2]; } __tmp;
|
||||
__tmp.v = (__v2sf)__A;
|
||||
return __tmp.a[0];
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_3dNOW__
|
||||
#undef __DISABLE_3dNOW__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_3dNOW__ */
|
||||
|
||||
#endif
|
||||
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW_A__
|
||||
#pragma GCC push_options
|
||||
#ifdef __x86_64__
|
||||
|
@ -124,32 +142,35 @@ __funline float _m_to_float(__m64 __A) {
|
|||
#pragma GCC target("3dnowa")
|
||||
#endif
|
||||
#define __DISABLE_3dNOW_A__
|
||||
#endif /* __3dNOW_A__ */
|
||||
|
||||
__funline __m64 _m_pf2iw(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pf2iw((__v2sf)__A);
|
||||
#endif
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pf2iw (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfnacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfnacc((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfnacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pfpnacc(__m64 __A, __m64 __B) {
|
||||
return (__m64)__builtin_ia32_pfpnacc((__v2sf)__A, (__v2sf)__B);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pfpnacc (__m64 __A, __m64 __B)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pi2fw(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pi2fw((__v2si)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pi2fw (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
|
||||
}
|
||||
|
||||
__funline __m64 _m_pswapd(__m64 __A) {
|
||||
return (__m64)__builtin_ia32_pswapdsf((__v2sf)__A);
|
||||
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_m_pswapd (__m64 __A)
|
||||
{
|
||||
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_3dNOW_A__
|
||||
#undef __DISABLE_3dNOW_A__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_3dNOW_A__ */
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _MM3DNOW_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
11
third_party/intel/mm_malloc.internal.h
vendored
11
third_party/intel/mm_malloc.internal.h
vendored
|
@ -1,15 +1,14 @@
|
|||
/* clang-format off */
|
||||
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
|
||||
#ifndef _MM_MALLOC_H_INCLUDED
|
||||
#define _MM_MALLOC_H_INCLUDED
|
||||
#ifdef __x86_64__
|
||||
#include "libc/mem/mem.h"
|
||||
|
||||
#ifndef __cplusplus
|
||||
extern int _mm_posix_memalign(void **, size_t, size_t)
|
||||
#else
|
||||
extern "C" int _mm_posix_memalign(void **, size_t, size_t) throw()
|
||||
#endif
|
||||
__asm__("posix_memalign");
|
||||
|
||||
static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
|
||||
void *__ptr;
|
||||
if (__alignment == 1) return malloc(__size);
|
||||
|
@ -20,10 +19,8 @@ static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
|
|||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static __inline void _mm_free(void *__ptr) {
|
||||
free(__ptr);
|
||||
}
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
#endif /* _MM_MALLOC_H_INCLUDED */
|
||||
#endif
|
||||
#endif
|
||||
|
|
1092
third_party/intel/mmintrin.internal.h
vendored
1092
third_party/intel/mmintrin.internal.h
vendored
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue