Upgrade to Cosmopolitan GCC 11.2.0 for x86_64

This commit is contained in:
Justine Tunney 2023-06-05 00:37:25 -07:00
parent 682b74ed88
commit 39f20dbb13
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
137 changed files with 48523 additions and 34001 deletions

View file

@ -94,6 +94,7 @@ o/$(MODE): \
rwc:/dev/shm \ rwc:/dev/shm \
rx:build/bootstrap \ rx:build/bootstrap \
rx:o/third_party/gcc \ rx:o/third_party/gcc \
r:build/portcosmo.h \
/proc/stat \ /proc/stat \
rw:/dev/null \ rw:/dev/null \
w:o/stack.log \ w:o/stack.log \

View file

@ -88,11 +88,15 @@ ARCH = x86_64
HOSTS ?= freebsd openbsd netbsd rhel7 rhel5 xnu win10 HOSTS ?= freebsd openbsd netbsd rhel7 rhel5 xnu win10
endif endif
PORTCOSMO_CCFLAGS = -fportcosmo -include build/portcosmo.h
ifneq ("$(wildcard o/third_party/gcc/bin/x86_64-pc-linux-gnu-*)","") ifneq ("$(wildcard o/third_party/gcc/bin/x86_64-pc-linux-gnu-*)","")
PREFIX = o/third_party/gcc/bin/x86_64-pc-linux-gnu- PREFIX = o/third_party/gcc/bin/x86_64-pc-linux-gnu-
DEFAULT_CPPFLAGS += $(PORTCOSMO_CCFLAGS)
else else
IGNORE := $(shell build/bootstrap/unbundle.com) IGNORE := $(shell build/bootstrap/unbundle.com)
PREFIX = o/third_party/gcc/bin/x86_64-linux-musl- PREFIX = o/third_party/gcc/bin/x86_64-linux-musl-
DEFAULT_CPPFLAGS += $(PORTCOSMO_CCFLAGS)
endif endif
ifeq ($(ARCH), aarch64) ifeq ($(ARCH), aarch64)
PREFIX = o/third_party/gcc/bin/aarch64-linux-musl- PREFIX = o/third_party/gcc/bin/aarch64-linux-musl-
@ -163,7 +167,7 @@ TRADITIONAL = \
-Wno-return-type \ -Wno-return-type \
-Wno-pointer-sign -Wno-pointer-sign
DEFAULT_CCFLAGS = \ DEFAULT_CCFLAGS += \
-Wall \ -Wall \
-Werror \ -Werror \
-fdebug-prefix-map='$(PWD)'= \ -fdebug-prefix-map='$(PWD)'= \
@ -206,7 +210,7 @@ MATHEMATICAL = \
-O3 \ -O3 \
-fwrapv -fwrapv
DEFAULT_CPPFLAGS = \ DEFAULT_CPPFLAGS += \
-DCOSMO \ -DCOSMO \
-DMODE='"$(MODE)"' \ -DMODE='"$(MODE)"' \
-DIMAGE_BASE_VIRTUAL=$(IMAGE_BASE_VIRTUAL) \ -DIMAGE_BASE_VIRTUAL=$(IMAGE_BASE_VIRTUAL) \

361
build/portcosmo.h Normal file
View file

@ -0,0 +1,361 @@
#ifndef ACTUALLY_MODS
#define ACTUALLY_MODS
#if !(__ASSEMBLER__ + __LINKER__ + 0)
static const int __tmpcosmo_AF_ALG = -15823936;
static const int __tmpcosmo_AF_APPLETALK = -15823820;
static const int __tmpcosmo_AF_ASH = -15823924;
static const int __tmpcosmo_AF_ATMPVC = -15824070;
static const int __tmpcosmo_AF_ATMSVC = -15824056;
static const int __tmpcosmo_AF_AX25 = -15824014;
static const int __tmpcosmo_AF_BLUETOOTH = -15823992;
static const int __tmpcosmo_AF_BRIDGE = -15823812;
static const int __tmpcosmo_AF_CAIF = -15823850;
static const int __tmpcosmo_AF_CAN = -15823868;
static const int __tmpcosmo_AF_ECONET = -15823852;
static const int __tmpcosmo_AF_FILE = -15824118;
static const int __tmpcosmo_AF_IB = -15823966;
static const int __tmpcosmo_AF_IEEE802154 = -15823906;
static const int __tmpcosmo_AF_IPX = -15824002;
static const int __tmpcosmo_AF_IRDA = -15823860;
static const int __tmpcosmo_AF_ISDN = -15823978;
static const int __tmpcosmo_AF_IUCV = -15824106;
static const int __tmpcosmo_AF_KCM = -15824024;
static const int __tmpcosmo_AF_KEY = -15823948;
static const int __tmpcosmo_AF_LINK = -15823878;
static const int __tmpcosmo_AF_LLC = -15823824;
static const int __tmpcosmo_AF_LOCAL = -15823928;
static const int __tmpcosmo_AF_MAX = -15824082;
static const int __tmpcosmo_AF_MPLS = -15824026;
static const int __tmpcosmo_AF_NETBEUI = -15824124;
static const int __tmpcosmo_AF_NETLINK = -15824004;
static const int __tmpcosmo_AF_NETROM = -15823886;
static const int __tmpcosmo_AF_NFC = -15824142;
static const int __tmpcosmo_AF_PACKET = -15824028;
static const int __tmpcosmo_AF_PHONET = -15823830;
static const int __tmpcosmo_AF_PPPOX = -15823876;
static const int __tmpcosmo_AF_ROSE = -15824016;
static const int __tmpcosmo_AF_ROUTE = -15824100;
static const int __tmpcosmo_AF_RXRPC = -15823926;
static const int __tmpcosmo_AF_SECURITY = -15824136;
static const int __tmpcosmo_AF_SNA = -15823950;
static const int __tmpcosmo_AF_TIPC = -15824034;
static const int __tmpcosmo_AF_VSOCK = -15824146;
static const int __tmpcosmo_AF_WANPIPE = -15823960;
static const int __tmpcosmo_AF_X25 = -15823864;
static const int __tmpcosmo_E2BIG = -15823698;
static const int __tmpcosmo_EACCES = -15823580;
static const int __tmpcosmo_EADDRINUSE = -15823756;
static const int __tmpcosmo_EADDRNOTAVAIL = -15823592;
static const int __tmpcosmo_EADV = -15823574;
static const int __tmpcosmo_EAFNOSUPPORT = -15823748;
static const int __tmpcosmo_EAGAIN = -15823506;
static const int __tmpcosmo_EALREADY = -15823530;
static const int __tmpcosmo_EAUTH = -15823702;
static const int __tmpcosmo_EBADARCH = -15823738;
static const int __tmpcosmo_EBADE = -15823740;
static const int __tmpcosmo_EBADEXEC = -15823684;
static const int __tmpcosmo_EBADF = -15823744;
static const int __tmpcosmo_EBADFD = -15823554;
static const int __tmpcosmo_EBADMACHO = -15823618;
static const int __tmpcosmo_EBADMSG = -15823650;
static const int __tmpcosmo_EBADR = -15823570;
static const int __tmpcosmo_EBADRPC = -15823626;
static const int __tmpcosmo_EBADRQC = -15823688;
static const int __tmpcosmo_EBADSLT = -15823788;
static const int __tmpcosmo_EBUSY = -15823550;
static const int __tmpcosmo_ECANCELED = -15823676;
static const int __tmpcosmo_ECHILD = -15823662;
static const int __tmpcosmo_ECHRNG = -15823722;
static const int __tmpcosmo_ECOMM = -15823634;
static const int __tmpcosmo_ECONNABORTED = -15823616;
static const int __tmpcosmo_ECONNREFUSED = -15823556;
static const int __tmpcosmo_ECONNRESET = -15823548;
static const int __tmpcosmo_EDEADLK = -15823718;
static const int __tmpcosmo_EDESTADDRREQ = -15823658;
static const int __tmpcosmo_EDEVERR = -15823518;
static const int __tmpcosmo_EDOM = -15823798;
static const int __tmpcosmo_EDOTDOT = -15823726;
static const int __tmpcosmo_EDQUOT = -15823620;
static const int __tmpcosmo_EEXIST = -15823594;
static const int __tmpcosmo_EFAULT = -15823686;
static const int __tmpcosmo_EFBIG = -15823768;
static const int __tmpcosmo_EFTYPE = -15823568;
static const int __tmpcosmo_EHOSTDOWN = -15823596;
static const int __tmpcosmo_EHOSTUNREACH = -15823742;
static const int __tmpcosmo_EHWPOISON = -15823680;
static const int __tmpcosmo_EIDRM = -15823644;
static const int __tmpcosmo_EILSEQ = -15823540;
static const int __tmpcosmo_EINPROGRESS = -15823720;
static const int __tmpcosmo_EINTR = -15823710;
static const int __tmpcosmo_EINVAL = -15823624;
static const int __tmpcosmo_EIO = -15823544;
static const int __tmpcosmo_EISCONN = -15823704;
static const int __tmpcosmo_EISDIR = -15823758;
static const int __tmpcosmo_EISNAM = -15823682;
static const int __tmpcosmo_EKEYEXPIRED = -15823520;
static const int __tmpcosmo_EKEYREJECTED = -15823712;
static const int __tmpcosmo_EKEYREVOKED = -15823780;
static const int __tmpcosmo_EL2HLT = -15823510;
static const int __tmpcosmo_EL2NSYNC = -15823670;
static const int __tmpcosmo_EL3HLT = -15823792;
static const int __tmpcosmo_EL3RST = -15823654;
static const int __tmpcosmo_ELIBACC = -15823708;
static const int __tmpcosmo_ELIBBAD = -15823564;
static const int __tmpcosmo_ELIBEXEC = -15823696;
static const int __tmpcosmo_ELIBMAX = -15823724;
static const int __tmpcosmo_ELIBSCN = -15823786;
static const int __tmpcosmo_ELNRNG = -15823732;
static const int __tmpcosmo_ELOOP = -15823672;
static const int __tmpcosmo_EMEDIUMTYPE = -15823508;
static const int __tmpcosmo_EMFILE = -15823762;
static const int __tmpcosmo_EMLINK = -15823694;
static const int __tmpcosmo_EMSGSIZE = -15823536;
static const int __tmpcosmo_EMULTIHOP = -15823750;
static const int __tmpcosmo_ENAMETOOLONG = -15823600;
static const int __tmpcosmo_ENAVAIL = -15823656;
static const int __tmpcosmo_ENEEDAUTH = -15823766;
static const int __tmpcosmo_ENETDOWN = -15823730;
static const int __tmpcosmo_ENETRESET = -15823604;
static const int __tmpcosmo_ENETUNREACH = -15823524;
static const int __tmpcosmo_ENFILE = -15823700;
static const int __tmpcosmo_ENOANO = -15823734;
static const int __tmpcosmo_ENOATTR = -15823606;
static const int __tmpcosmo_ENOBUFS = -15823628;
static const int __tmpcosmo_ENOCSI = -15823760;
static const int __tmpcosmo_ENODATA = -15823516;
static const int __tmpcosmo_ENODEV = -15823774;
static const int __tmpcosmo_ENOENT = -15823590;
static const int __tmpcosmo_ENOEXEC = -15823512;
static const int __tmpcosmo_ENOKEY = -15823764;
static const int __tmpcosmo_ENOLCK = -15823782;
static const int __tmpcosmo_ENOLINK = -15823538;
static const int __tmpcosmo_ENOMEDIUM = -15823598;
static const int __tmpcosmo_ENOMEM = -15823514;
static const int __tmpcosmo_ENOMSG = -15823796;
static const int __tmpcosmo_ENONET = -15823642;
static const int __tmpcosmo_ENOPKG = -15823664;
static const int __tmpcosmo_ENOPOLICY = -15823716;
static const int __tmpcosmo_ENOPROTOOPT = -15823608;
static const int __tmpcosmo_ENOSPC = -15823646;
static const int __tmpcosmo_ENOSR = -15823558;
static const int __tmpcosmo_ENOSTR = -15823706;
static const int __tmpcosmo_ENOSYS = -15823636;
static const int __tmpcosmo_ENOTBLK = -15823640;
static const int __tmpcosmo_ENOTCONN = -15823778;
static const int __tmpcosmo_ENOTDIR = -15823648;
static const int __tmpcosmo_ENOTEMPTY = -15823552;
static const int __tmpcosmo_ENOTNAM = -15823532;
static const int __tmpcosmo_ENOTRECOVERABLE = -15823746;
static const int __tmpcosmo_ENOTSOCK = -15823582;
static const int __tmpcosmo_ENOTSUP = -15823602;
static const int __tmpcosmo_ENOTTY = -15823528;
static const int __tmpcosmo_ENOTUNIQ = -15823790;
static const int __tmpcosmo_ENXIO = -15823622;
static const int __tmpcosmo_EOPNOTSUPP = -15823588;
static const int __tmpcosmo_EOVERFLOW = -15823736;
static const int __tmpcosmo_EOWNERDEAD = -15823562;
static const int __tmpcosmo_EPERM = -15823754;
static const int __tmpcosmo_EPFNOSUPPORT = -15823690;
static const int __tmpcosmo_EPIPE = -15823534;
static const int __tmpcosmo_EPROCLIM = -15823610;
static const int __tmpcosmo_EPROCUNAVAIL = -15823546;
static const int __tmpcosmo_EPROGMISMATCH = -15823572;
static const int __tmpcosmo_EPROGUNAVAIL = -15823526;
static const int __tmpcosmo_EPROTO = -15823678;
static const int __tmpcosmo_EPROTONOSUPPORT = -15823576;
static const int __tmpcosmo_EPROTOTYPE = -15823614;
static const int __tmpcosmo_EPWROFF = -15823692;
static const int __tmpcosmo_ERANGE = -15823772;
static const int __tmpcosmo_EREMCHG = -15823666;
static const int __tmpcosmo_EREMOTE = -15823560;
static const int __tmpcosmo_EREMOTEIO = -15823794;
static const int __tmpcosmo_ERESTART = -15823728;
static const int __tmpcosmo_ERFKILL = -15823612;
static const int __tmpcosmo_EROFS = -15823566;
static const int __tmpcosmo_ERPCMISMATCH = -15823542;
static const int __tmpcosmo_ESHLIBVERS = -15823584;
static const int __tmpcosmo_ESHUTDOWN = -15823660;
static const int __tmpcosmo_ESOCKTNOSUPPORT = -15823776;
static const int __tmpcosmo_ESPIPE = -15823652;
static const int __tmpcosmo_ESRCH = -15823674;
static const int __tmpcosmo_ESRMNT = -15823714;
static const int __tmpcosmo_ESTALE = -15823632;
static const int __tmpcosmo_ESTRPIPE = -15823770;
static const int __tmpcosmo_ETIME = -15823630;
static const int __tmpcosmo_ETIMEDOUT = -15823522;
static const int __tmpcosmo_ETOOMANYREFS = -15823586;
static const int __tmpcosmo_ETXTBSY = -15823638;
static const int __tmpcosmo_EUCLEAN = -15823578;
static const int __tmpcosmo_EUNATCH = -15823504;
static const int __tmpcosmo_EUSERS = -15823668;
static const int __tmpcosmo_EXDEV = -15823752;
static const int __tmpcosmo_EXFULL = -15823784;
static const int __tmpcosmo_F_DUPFD_CLOEXEC = -15823938;
static const int __tmpcosmo_F_GETLEASE = -15823862;
static const int __tmpcosmo_F_GETLK = -15823916;
static const int __tmpcosmo_F_GETLK64 = -15823846;
static const int __tmpcosmo_F_GETOWN = -15824116;
static const int __tmpcosmo_F_GETPATH = -15824128;
static const int __tmpcosmo_F_GETPIPE_SZ = -15824006;
static const int __tmpcosmo_F_GETSIG = -15824112;
static const int __tmpcosmo_F_MAXFD = -15823896;
static const int __tmpcosmo_F_NOCACHE = -15824048;
static const int __tmpcosmo_F_NOTIFY = -15823898;
static const int __tmpcosmo_F_RDLCK = -15823826;
static const int __tmpcosmo_F_SETLEASE = -15823884;
static const int __tmpcosmo_F_SETLK = -15824088;
static const int __tmpcosmo_F_SETLK64 = -15824154;
static const int __tmpcosmo_F_SETLKW = -15824096;
static const int __tmpcosmo_F_SETLKW64 = -15824104;
static const int __tmpcosmo_F_SETOWN = -15823874;
static const int __tmpcosmo_F_SETPIPE_SZ = -15823958;
static const int __tmpcosmo_F_SETSIG = -15823832;
static const int __tmpcosmo_F_UNLCK = -15824148;
static const int __tmpcosmo_F_WRLCK = -15824058;
static const int __tmpcosmo_IFF_ALLMULTI = -15824140;
static const int __tmpcosmo_IFF_AUTOMEDIA = -15823962;
static const int __tmpcosmo_IFF_DYNAMIC = -15823848;
static const int __tmpcosmo_IFF_MASTER = -15823900;
static const int __tmpcosmo_IFF_MULTICAST = -15824000;
static const int __tmpcosmo_IFF_NOARP = -15823802;
static const int __tmpcosmo_IFF_NOTRAILERS = -15824130;
static const int __tmpcosmo_IFF_POINTOPOINT = -15824138;
static const int __tmpcosmo_IFF_PORTSEL = -15824150;
static const int __tmpcosmo_IFF_PROMISC = -15824010;
static const int __tmpcosmo_IFF_RUNNING = -15824080;
static const int __tmpcosmo_IFF_SLAVE = -15824022;
static const int __tmpcosmo_LOCAL_PEERCRED = -15823986;
static const int __tmpcosmo_SIGBUS = -15824132;
static const int __tmpcosmo_SIGCHLD = -15824036;
static const int __tmpcosmo_SIGCONT = -15823836;
static const int __tmpcosmo_SIGEMT = -15823972;
static const int __tmpcosmo_SIGINFO = -15824086;
static const int __tmpcosmo_SIGIO = -15823912;
static const int __tmpcosmo_SIGPOLL = -15823854;
static const int __tmpcosmo_SIGPWR = -15824114;
static const int __tmpcosmo_SIGRTMAX = -15824040;
static const int __tmpcosmo_SIGRTMIN = -15824134;
static const int __tmpcosmo_SIGSTKFLT = -15823934;
static const int __tmpcosmo_SIGSTOP = -15824158;
static const int __tmpcosmo_SIGSYS = -15823922;
static const int __tmpcosmo_SIGTHR = -15823902;
static const int __tmpcosmo_SIGTSTP = -15823988;
static const int __tmpcosmo_SIGUNUSED = -15823970;
static const int __tmpcosmo_SIGURG = -15823952;
static const int __tmpcosmo_SIGUSR1 = -15824018;
static const int __tmpcosmo_SIGUSR2 = -15823998;
static const int __tmpcosmo_SIG_BLOCK = -15823800;
static const int __tmpcosmo_SIG_SETMASK = -15824090;
static const int __tmpcosmo_SIG_UNBLOCK = -15824078;
static const int __tmpcosmo_SOL_AAL = -15823976;
static const int __tmpcosmo_SOL_ALG = -15823956;
static const int __tmpcosmo_SOL_ATM = -15823914;
static const int __tmpcosmo_SOL_BLUETOOTH = -15824062;
static const int __tmpcosmo_SOL_CAIF = -15823904;
static const int __tmpcosmo_SOL_DCCP = -15823814;
static const int __tmpcosmo_SOL_DECNET = -15823842;
static const int __tmpcosmo_SOL_ICMPV6 = -15823908;
static const int __tmpcosmo_SOL_IPV6 = -15823808;
static const int __tmpcosmo_SOL_IRDA = -15823880;
static const int __tmpcosmo_SOL_IUCV = -15824156;
static const int __tmpcosmo_SOL_KCM = -15824092;
static const int __tmpcosmo_SOL_LLC = -15823930;
static const int __tmpcosmo_SOL_NETBEUI = -15823894;
static const int __tmpcosmo_SOL_NETLINK = -15824012;
static const int __tmpcosmo_SOL_NFC = -15823942;
static const int __tmpcosmo_SOL_PACKET = -15823806;
static const int __tmpcosmo_SOL_PNPIPE = -15823968;
static const int __tmpcosmo_SOL_PPPOL2TP = -15823816;
static const int __tmpcosmo_SOL_RAW = -15824044;
static const int __tmpcosmo_SOL_RDS = -15824020;
static const int __tmpcosmo_SOL_RXRPC = -15823984;
static const int __tmpcosmo_SOL_SOCKET = -15824050;
static const int __tmpcosmo_SOL_TIPC = -15823940;
static const int __tmpcosmo_SOL_X25 = -15823856;
static const int __tmpcosmo_SO_ACCEPTCONN = -15823872;
static const int __tmpcosmo_SO_ATTACH_BPF = -15824072;
static const int __tmpcosmo_SO_ATTACH_FILTER = -15824094;
static const int __tmpcosmo_SO_ATTACH_REUSEPORT_CBPF = -15823964;
static const int __tmpcosmo_SO_ATTACH_REUSEPORT_EBPF = -15824060;
static const int __tmpcosmo_SO_BINDTODEVICE = -15823990;
static const int __tmpcosmo_SO_BPF_EXTENSIONS = -15824030;
static const int __tmpcosmo_SO_BROADCAST = -15823882;
static const int __tmpcosmo_SO_BSDCOMPAT = -15824038;
static const int __tmpcosmo_SO_BUSY_POLL = -15823944;
static const int __tmpcosmo_SO_CNX_ADVICE = -15823828;
static const int __tmpcosmo_SO_DETACH_BPF = -15824068;
static const int __tmpcosmo_SO_DETACH_FILTER = -15824032;
static const int __tmpcosmo_SO_DOMAIN = -15823980;
static const int __tmpcosmo_SO_DONTROUTE = -15823918;
static const int __tmpcosmo_SO_ERROR = -15823892;
static const int __tmpcosmo_SO_EXCLUSIVEADDRUSE = -15823858;
static const int __tmpcosmo_SO_GET_FILTER = -15823834;
static const int __tmpcosmo_SO_INCOMING_CPU = -15824074;
static const int __tmpcosmo_SO_KEEPALIVE = -15823890;
static const int __tmpcosmo_SO_LINGER = -15824084;
static const int __tmpcosmo_SO_LOCK_FILTER = -15823804;
static const int __tmpcosmo_SO_MARK = -15824008;
static const int __tmpcosmo_SO_MAX_PACING_RATE = -15824120;
static const int __tmpcosmo_SO_NOFCS = -15823818;
static const int __tmpcosmo_SO_NO_CHECK = -15824152;
static const int __tmpcosmo_SO_OOBINLINE = -15823838;
static const int __tmpcosmo_SO_PASSCRED = -15823888;
static const int __tmpcosmo_SO_PASSSEC = -15823866;
static const int __tmpcosmo_SO_PEEK_OFF = -15823870;
static const int __tmpcosmo_SO_PEERCRED = -15823954;
static const int __tmpcosmo_SO_PEERNAME = -15824042;
static const int __tmpcosmo_SO_PEERSEC = -15823844;
static const int __tmpcosmo_SO_PRIORITY = -15824122;
static const int __tmpcosmo_SO_PROTOCOL = -15823982;
static const int __tmpcosmo_SO_RCVBUF = -15823974;
static const int __tmpcosmo_SO_RCVBUFFORCE = -15823994;
static const int __tmpcosmo_SO_RCVLOWAT = -15824076;
static const int __tmpcosmo_SO_RCVTIMEO = -15824046;
static const int __tmpcosmo_SO_REUSEADDR = -15823810;
static const int __tmpcosmo_SO_REUSEPORT = -15823822;
static const int __tmpcosmo_SO_RXQ_OVFL = -15824066;
static const int __tmpcosmo_SO_SECURITY_AUTHENTICATION = -15824098;
static const int __tmpcosmo_SO_SECURITY_ENCRYPTION_NETWORK = -15824126;
static const int __tmpcosmo_SO_SELECT_ERR_QUEUE = -15824052;
static const int __tmpcosmo_SO_SETFIB = -15823920;
static const int __tmpcosmo_SO_SNDBUF = -15824102;
static const int __tmpcosmo_SO_SNDBUFFORCE = -15823840;
static const int __tmpcosmo_SO_SNDLOWAT = -15823946;
static const int __tmpcosmo_SO_SNDTIMEO = -15824064;
static const int __tmpcosmo_SO_TIMESTAMP = -15823932;
static const int __tmpcosmo_SO_TIMESTAMPING = -15824054;
static const int __tmpcosmo_SO_TIMESTAMPNS = -15823910;
static const int __tmpcosmo_SO_TYPE = -15824144;
static const int __tmpcosmo_SO_USELOOPBACK = -15824110;
static const int __tmpcosmo_SO_WIFI_STATUS = -15824108;
static const unsigned int __tmpcosmo_B1000000 = 15823512;
static const unsigned int __tmpcosmo_B110 = 15823518;
static const unsigned int __tmpcosmo_B115200 = 15823540;
static const unsigned int __tmpcosmo_B1152000 = 15823538;
static const unsigned int __tmpcosmo_B1200 = 15823548;
static const unsigned int __tmpcosmo_B134 = 15823510;
static const unsigned int __tmpcosmo_B150 = 15823542;
static const unsigned int __tmpcosmo_B1500000 = 15823508;
static const unsigned int __tmpcosmo_B1800 = 15823522;
static const unsigned int __tmpcosmo_B19200 = 15823546;
static const unsigned int __tmpcosmo_B200 = 15823528;
static const unsigned int __tmpcosmo_B2000000 = 15823524;
static const unsigned int __tmpcosmo_B230400 = 15823516;
static const unsigned int __tmpcosmo_B2400 = 15823526;
static const unsigned int __tmpcosmo_B2500000 = 15823558;
static const unsigned int __tmpcosmo_B300 = 15823534;
static const unsigned int __tmpcosmo_B3000000 = 15823530;
static const unsigned int __tmpcosmo_B3500000 = 15823544;
static const unsigned int __tmpcosmo_B38400 = 15823514;
static const unsigned int __tmpcosmo_B4000000 = 15823520;
static const unsigned int __tmpcosmo_B4800 = 15823556;
static const unsigned int __tmpcosmo_B50 = 15823532;
static const unsigned int __tmpcosmo_B500000 = 15823550;
static const unsigned int __tmpcosmo_B57600 = 15823552;
static const unsigned int __tmpcosmo_B576000 = 15823506;
static const unsigned int __tmpcosmo_B600 = 15823554;
static const unsigned int __tmpcosmo_B75 = 15823536;
static const unsigned int __tmpcosmo_B9600 = 15823504;
static const unsigned short __tmpcosmo_AF_INET6 = 58236;
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
#endif /* ACTUALLY_MODS */

View file

@ -1,11 +1,27 @@
#ifndef COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_ #ifndef COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_ #define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMESPEC_H_
#ifdef COSMO
#define timespec_get __timespec_get
#define timespec_getres __timespec_getres
#define timespec_cmp __timespec_cmp
#define timespec_tomicros __timespec_tomicros
#define timespec_tomillis __timespec_tomillis
#define timespec_tonanos __timespec_tonanos
#define timespec_add __timespec_add
#define timespec_fromnanos __timespec_fromnanos
#define timespec_frommicros __timespec_frommicros
#define timespec_frommillis __timespec_frommillis
#define timespec_real __timespec_real
#define timespec_mono __timespec_mono
#define timespec_sleep __timespec_sleep
#define timespec_sleep_until __timespec_sleep_until
#define timespec_sub __timespec_sub
#endif /* COSMO */
#if !(__ASSEMBLER__ + __LINKER__ + 0) #if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_ COSMOPOLITAN_C_START_
#define timespec_zero ((struct timespec){0})
#define timespec_max ((struct timespec){0x7fffffffffffffff, 999999999})
struct timespec { struct timespec {
int64_t tv_sec; int64_t tv_sec;
int64_t tv_nsec; /* nanoseconds */ int64_t tv_nsec; /* nanoseconds */
@ -18,9 +34,14 @@ int futimens(int, const struct timespec[2]);
int nanosleep(const struct timespec *, struct timespec *); int nanosleep(const struct timespec *, struct timespec *);
int sys_futex(int *, int, int, const struct timespec *, int *); int sys_futex(int *, int, int, const struct timespec *, int *);
int utimensat(int, const char *, const struct timespec[2], int); int utimensat(int, const char *, const struct timespec[2], int);
#ifdef COSMO
/* cosmopolitan libc's non-posix timespec library
removed by default due to emacs codebase clash */
#define timespec_zero ((struct timespec){0})
#define timespec_max ((struct timespec){0x7fffffffffffffff, 999999999})
int timespec_get(struct timespec *, int); int timespec_get(struct timespec *, int);
int timespec_getres(struct timespec *, int); int timespec_getres(struct timespec *, int);
int timespec_cmp(struct timespec, struct timespec) pureconst; int timespec_cmp(struct timespec, struct timespec) pureconst;
int64_t timespec_tomicros(struct timespec) pureconst; int64_t timespec_tomicros(struct timespec) pureconst;
int64_t timespec_tomillis(struct timespec) pureconst; int64_t timespec_tomillis(struct timespec) pureconst;
@ -34,6 +55,7 @@ struct timespec timespec_mono(void);
struct timespec timespec_sleep(struct timespec); struct timespec timespec_sleep(struct timespec);
int timespec_sleep_until(struct timespec); int timespec_sleep_until(struct timespec);
struct timespec timespec_sub(struct timespec, struct timespec) pureconst; struct timespec timespec_sub(struct timespec, struct timespec) pureconst;
#endif /* COSMO */
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -2,6 +2,16 @@
#define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMEVAL_H_ #define COSMOPOLITAN_LIBC_CALLS_STRUCT_TIMEVAL_H_
#include "libc/calls/struct/timespec.h" #include "libc/calls/struct/timespec.h"
#include "libc/time/struct/timezone.h" #include "libc/time/struct/timezone.h"
#ifdef COSMO
#define timeval_cmp __timeval_cmp
#define timeval_frommicros __timeval_frommicros
#define timeval_frommillis __timeval_frommillis
#define timeval_add __timeval_add
#define timeval_sub __timeval_sub
#define timeval_totimespec __timeval_totimespec
#endif /* COSMO */
#if !(__ASSEMBLER__ + __LINKER__ + 0) #if !(__ASSEMBLER__ + __LINKER__ + 0)
COSMOPOLITAN_C_START_ COSMOPOLITAN_C_START_
@ -16,6 +26,9 @@ int gettimeofday(struct timeval *, struct timezone *);
int lutimes(const char *, const struct timeval[2]); int lutimes(const char *, const struct timeval[2]);
int utimes(const char *, const struct timeval[2]); int utimes(const char *, const struct timeval[2]);
#ifdef COSMO
/* cosmopolitan libc's non-posix timevals library
removed by default due to emacs codebase clash */
int timeval_cmp(struct timeval, struct timeval) pureconst; int timeval_cmp(struct timeval, struct timeval) pureconst;
struct timeval timeval_frommicros(int64_t) pureconst; struct timeval timeval_frommicros(int64_t) pureconst;
struct timeval timeval_frommillis(int64_t) pureconst; struct timeval timeval_frommillis(int64_t) pureconst;
@ -23,6 +36,7 @@ struct timeval timeval_add(struct timeval, struct timeval) pureconst;
struct timeval timeval_sub(struct timeval, struct timeval) pureconst; struct timeval timeval_sub(struct timeval, struct timeval) pureconst;
struct timeval timespec_totimeval(struct timespec) pureconst; struct timeval timespec_totimeval(struct timespec) pureconst;
struct timespec timeval_totimespec(struct timeval) pureconst; struct timespec timeval_totimespec(struct timeval) pureconst;
#endif /* COSMO */
COSMOPOLITAN_C_END_ COSMOPOLITAN_C_END_
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ #endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

View file

@ -725,6 +725,9 @@ void abort(void) wontreturn;
#endif /* GCC8+ */ #endif /* GCC8+ */
#if __GNUC__ + 0 >= 9 #if __GNUC__ + 0 >= 9
#pragma GCC diagnostic ignored /* "always true" breaks dce */ "-Waddress" #pragma GCC diagnostic ignored /* "always true" breaks dce */ "-Waddress"
#if __GNUC__ >= 11
#pragma GCC diagnostic ignored /* orwellian */ "-Wold-style-definition"
#endif /* GCC11+ */
#endif /* GCC9+ */ #endif /* GCC9+ */
#endif /* !C++ */ #endif /* !C++ */
#endif /* GCC && !LLVM */ #endif /* GCC && !LLVM */

View file

@ -1,232 +1,28 @@
This is a modern statically-linked GNU C2X toolchain. DESCRIPTION
You have the freedom to obtain the original sources to these binaries, Cosmopolitan GCC
and build ones just like them, by visiting: Prebuilt x86_64-linux binaries
An APE-friendly C/C++ compiler
https://www.gnu.org/ LICENSE
https://github.com/richfelker/musl-cross-make
The musl-cross-make tool also produces libraries and header files. We've GPLv3 and other licenses (see LICENSE.txt)
only vendored the statically-linked executable files, since Cosmopolitan
won't depend on GPL-licensed headers / runtime libraries.
We haven't made any modifications to the original software. The versions ORIGIN
we chose are documented in $PKG/LICENSE.txt. Here's our Musl
build config for maximum transparency:
commit 38e52db8358c043ae82b346a2e6e66bc86a53bc1 @ahgamut's musl-cross-make fork
Author: Rich Felker <dalias@aerifal.cx> https://github.com/ahgamut/musl-cross-make/
Date: Wed Dec 18 14:29:07 2019 -0500 d0f33e2162cf5e5b30cdf3b3accc0d0f7756830c
switch linux kernel headers to 4.19.88 by default MODIFICATIONS
using slim headers-only version. this change is needed to support all ahgamut's musl-cross-make fork includes a 2kLOC patch that modifies
future versions of musl on 32-bit archs, since prior to 4.16 the GCC so it'll compile C code like `switch(errno){case EINVAL: etc.}`
kernel headers had incompatibility with userspace time_t not matching
the kernel's old (32-bit) time_t. support for older headers will be
dropped entirely soon.
TARGET = x86_64-linux-musl SEE ALSO
OUTPUT = /opt/cross9
GCC_VER = 9.2.0
export LANG=en_US.UTF-8
export LC_CTYPE=en_US.UTF-8
COMMON_CONFIG += CC="/opt/cross9/bin/x86_64-linux-musl-cc -static --static -g -Os -ftree-vectorize -fvect-cost-model=unlimited -mstringop-strategy=vector_loop -save-temps -fno-ident"
COMMON_CONFIG += CXX="/opt/cross9/bin/x86_64-linux-musl-c++ -static --static -g -Os -ftree-vectorize -fvect-cost-model=unlimited -mstringop-strategy=vector_loop -save-temps -fno-ident"
COMMON_CONFIG += LD="/opt/cross9/bin/x86_64-linux-musl-ld --build-id=none"
COMMON_CONFIG += NM="/opt/cross9/bin/x86_64-linux-musl-nm"
COMMON_CONFIG += LDFLAGS="-Wl,--build-id=none"
COMMON_CONFIG += OBJCOPY="/opt/cross9/bin/x86_64-linux-musl-objcopy"
COMMON_CONFIG += --disable-nls --disable-lto
GCC_CONFIG += --enable-languages=c,c++
GCC_CONFIG += --disable-multilib
GCC_CONFIG += --with-gnu-as
GCC_CONFIG += --with-gnu-ld
GCC_CONFIG += --disable-multilib
GCC_CONFIG += --enable-sjlj-exceptions
GCC_CONFIG += --disable-threads
GCC_CONFIG += --disable-tls
COMMON_CONFIG += --with-debug-prefix-map=$(CURDIR)=
#!/bin/sh third_party/gcc/portcosmo.patch
set -e
export LC_ALL=C
export GUNZ="/bin/gzip --rsyncable -9 -c"
BASE=/opt/cross9
PKG=third_party/gcc
VERS=9.2.0
if [ ! -d $BASE ]; then NOTES
echo error: run make install >&2
exit 1
fi
if [ -d $BASE/$PKG ]; then My name is Justine Tunney and I approve of these binaries.
rm -rf $BASE/$PKG
fi
mkdir -p $BASE/$PKG/bin
mkdir -p $BASE/$PKG/libexec/gcc/x86_64-linux-musl/$VERS
mkdir -p $BASE/$PKG/x86_64-linux-musl/bin
cp $BASE/bin/x86_64-linux-musl-gcov-dump $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
cp $BASE/bin/x86_64-linux-musl-cc $BASE/$PKG/bin/x86_64-linux-musl-gcc
cp $BASE/bin/x86_64-linux-musl-addr2line $BASE/$PKG/bin/x86_64-linux-musl-addr2line
cp $BASE/bin/x86_64-linux-musl-ar $BASE/$PKG/bin/x86_64-linux-musl-ar
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
cp $BASE/bin/x86_64-linux-musl-c++ $BASE/$PKG/bin/x86_64-linux-musl-g++
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/collect2 $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
cp $BASE/bin/x86_64-linux-musl-gcc-nm $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
cp $BASE/bin/x86_64-linux-musl-c++filt $BASE/$PKG/bin/x86_64-linux-musl-c++filt
cp $BASE/bin/x86_64-linux-musl-elfedit $BASE/$PKG/bin/x86_64-linux-musl-elfedit
cp $BASE/bin/x86_64-linux-musl-ld $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
cp $BASE/bin/x86_64-linux-musl-size $BASE/$PKG/bin/x86_64-linux-musl-size
cp $BASE/bin/x86_64-linux-musl-strings $BASE/$PKG/bin/x86_64-linux-musl-strings
cp $BASE/bin/x86_64-linux-musl-objcopy $BASE/$PKG/bin/x86_64-linux-musl-objcopy
cp $BASE/bin/x86_64-linux-musl-nm $BASE/$PKG/bin/x86_64-linux-musl-nm
cp $BASE/libexec/gcc/x86_64-linux-musl/9.2.0/cc1 $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
cp $BASE/bin/x86_64-linux-musl-readelf $BASE/$PKG/bin/x86_64-linux-musl-readelf
cp $BASE/bin/x86_64-linux-musl-objdump $BASE/$PKG/bin/x86_64-linux-musl-objdump
cp $BASE/bin/x86_64-linux-musl-gcc-ar $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
cp $BASE/bin/x86_64-linux-musl-gcov $BASE/$PKG/bin/x86_64-linux-musl-gcov
cp $BASE/bin/x86_64-linux-musl-ranlib $BASE/$PKG/bin/x86_64-linux-musl-ranlib
cp $BASE/bin/x86_64-linux-musl-as $BASE/$PKG/bin/x86_64-linux-musl-as
cp $BASE/bin/x86_64-linux-musl-gcc-ranlib $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
cp $BASE/bin/x86_64-linux-musl-cpp $BASE/$PKG/bin/x86_64-linux-musl-cpp
cp $BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strip
cp $BASE/bin/x86_64-linux-musl-gprof $BASE/$PKG/bin/x86_64-linux-musl-gprof
cp $BASE/bin/x86_64-linux-musl-gcov-tool $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-addr2line
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-ar
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-g++
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-c++filt
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-elfedit
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-size
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strings
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-objcopy
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-nm
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-readelf
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-objdump
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-ranlib
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-as
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-cpp
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-strip
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gprof
$BASE/bin/x86_64-linux-musl-strip $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump >$BASE/$PKG/bin/x86_64-linux-musl-gcov-dump.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc >$BASE/$PKG/bin/x86_64-linux-musl-gcc.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-addr2line >$BASE/$PKG/bin/x86_64-linux-musl-addr2line.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-ar >$BASE/$PKG/bin/x86_64-linux-musl-ar.gz
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-g++ >$BASE/$PKG/bin/x86_64-linux-musl-g++.gz
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2 >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm >$BASE/$PKG/bin/x86_64-linux-musl-gcc-nm.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-c++filt >$BASE/$PKG/bin/x86_64-linux-musl-c++filt.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-elfedit >$BASE/$PKG/bin/x86_64-linux-musl-elfedit.gz
$GUNZ $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd >$BASE/$PKG/x86_64-linux-musl/bin/ld.bfd.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-size >$BASE/$PKG/bin/x86_64-linux-musl-size.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-strings >$BASE/$PKG/bin/x86_64-linux-musl-strings.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-objcopy >$BASE/$PKG/bin/x86_64-linux-musl-objcopy.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-nm >$BASE/$PKG/bin/x86_64-linux-musl-nm.gz
$GUNZ $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1 >$BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-readelf >$BASE/$PKG/bin/x86_64-linux-musl-readelf.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-objdump >$BASE/$PKG/bin/x86_64-linux-musl-objdump.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar >$BASE/$PKG/bin/x86_64-linux-musl-gcc-ar.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov >$BASE/$PKG/bin/x86_64-linux-musl-gcov.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-ranlib >$BASE/$PKG/bin/x86_64-linux-musl-ranlib.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-as >$BASE/$PKG/bin/x86_64-linux-musl-as.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib >$BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-cpp >$BASE/$PKG/bin/x86_64-linux-musl-cpp.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-strip >$BASE/$PKG/bin/x86_64-linux-musl-strip.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gprof >$BASE/$PKG/bin/x86_64-linux-musl-gprof.gz
$GUNZ $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool >$BASE/$PKG/bin/x86_64-linux-musl-gcov-tool.gz
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov-dump
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc
rm $BASE/$PKG/bin/x86_64-linux-musl-addr2line
rm $BASE/$PKG/bin/x86_64-linux-musl-ar
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1plus
rm $BASE/$PKG/bin/x86_64-linux-musl-g++
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/collect2
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-nm
rm $BASE/$PKG/bin/x86_64-linux-musl-c++filt
rm $BASE/$PKG/bin/x86_64-linux-musl-elfedit
rm $BASE/$PKG/x86_64-linux-musl/bin/ld.bfd
rm $BASE/$PKG/bin/x86_64-linux-musl-size
rm $BASE/$PKG/bin/x86_64-linux-musl-strings
rm $BASE/$PKG/bin/x86_64-linux-musl-objcopy
rm $BASE/$PKG/bin/x86_64-linux-musl-nm
rm $BASE/$PKG/libexec/gcc/x86_64-linux-musl/9.2.0/cc1
rm $BASE/$PKG/bin/x86_64-linux-musl-readelf
rm $BASE/$PKG/bin/x86_64-linux-musl-objdump
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-ar
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov
rm $BASE/$PKG/bin/x86_64-linux-musl-ranlib
rm $BASE/$PKG/bin/x86_64-linux-musl-as
rm $BASE/$PKG/bin/x86_64-linux-musl-gcc-ranlib
rm $BASE/$PKG/bin/x86_64-linux-musl-cpp
rm $BASE/$PKG/bin/x86_64-linux-musl-strip
rm $BASE/$PKG/bin/x86_64-linux-musl-gprof
rm $BASE/$PKG/bin/x86_64-linux-musl-gcov-tool
ln -s x86_64-linux-musl-gcc $BASE/$PKG/bin/x86_64-linux-musl-cc
ln -s x86_64-linux-musl-gcc $BASE/$PKG/bin/x86_64-linux-musl-gcc-9.2.0
ln -s ../../bin/x86_64-linux-musl-ar $BASE/$PKG/x86_64-linux-musl/bin/ar
ln -s x86_64-linux-musl-g++ $BASE/$PKG/bin/x86_64-linux-musl-c++
ln -s ld.bfd $BASE/$PKG/x86_64-linux-musl/bin/ld
ln -s ../x86_64-linux-musl/bin/ld.bfd $BASE/$PKG/bin/x86_64-linux-musl-ld.bfd
ln -s ../x86_64-linux-musl/bin/ld.bfd $BASE/$PKG/bin/x86_64-linux-musl-ld
ln -s ../../bin/x86_64-linux-musl-objcopy $BASE/$PKG/x86_64-linux-musl/bin/objcopy
ln -s ../../bin/x86_64-linux-musl-nm $BASE/$PKG/x86_64-linux-musl/bin/nm
ln -s ../../bin/x86_64-linux-musl-readelf $BASE/$PKG/x86_64-linux-musl/bin/readelf
ln -s ../../bin/x86_64-linux-musl-objdump $BASE/$PKG/x86_64-linux-musl/bin/objdump
ln -s ../../bin/x86_64-linux-musl-ranlib $BASE/$PKG/x86_64-linux-musl/bin/ranlib
ln -s ../../bin/x86_64-linux-musl-as $BASE/$PKG/x86_64-linux-musl/bin/as
ln -s ../../bin/x86_64-linux-musl-strip $BASE/$PKG/x86_64-linux-musl/bin/strip
{
cat <<'EOF'
This is a modern statically-linked GNU C2X toolchain.
You have the freedom to obtain the original sources to these binaries,
and build ones just like them, by visiting:
https://www.gnu.org/
https://github.com/richfelker/musl-cross-make
The musl-cross-make tool also produces libraries and header files. We've
only vendored the statically-linked executable files, since Cosmopolitan
won't depend on GPL-licensed headers / runtime libraries.
We haven't made any modifications to the original software. The versions
we chose are documented in $PKG/LICENSE.txt. Here's our Musl
build config for maximum transparency:
EOF
git show --quiet
echo
cat config.mak
echo
cat bundle.sh
} >$BASE/$PKG/README.cosmo
{
for f in $(find . -iname \*copying\* -or -iname \*license\* | sort); do
printf '\n'
printf '%s\n' "$f"
printf '========================================================================\n'
cat "$f"
done
} >$BASE/$PKG/LICENSE.txt

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

97
third_party/gcc/config.mak vendored Normal file
View file

@ -0,0 +1,97 @@
#
# config.mak.dist - sample musl-cross-make configuration
#
# Copy to config.mak and edit as desired.
#
# There is no default TARGET; you must select one here or on the make
# command line. Some examples:
# TARGET = i486-linux-musl
TARGET = aarch64-linux-musl
# TARGET = arm-linux-musleabi
# TARGET = arm-linux-musleabihf
# TARGET = sh2eb-linux-muslfdpic
# TARGET = powerpc64le-linux-musl
# TARGET = aarch64-linux-musl
# By default, cross compilers are installed to ./output under the top-level
# musl-cross-make directory and can later be moved wherever you want them.
# To install directly to a specific location, set it here. Multiple targets
# can safely be installed in the same location. Some examples:
OUTPUT = /opt/cross11portcosmo
# OUTPUT = /usr/local
# By default, latest supported release versions of musl and the toolchain
# components are used. You can override those here, but the version selected
# must be supported (under hashes/ and patches/) to work. For musl, you
# can use "git-refname" (e.g. git-master) instead of a release. Setting a
# blank version for gmp, mpc, mpfr and isl will suppress download and
# in-tree build of these libraries and instead depend on pre-installed
# libraries when available (isl is optional and not set by default).
# Setting a blank version for linux will suppress installation of kernel
# headers, which are not needed unless compiling programs that use them.
# BINUTILS_VER = 2.25.1
GCC_VER = 11.2.0
# MUSL_VER = git-master
# GMP_VER =
# MPC_VER =
# MPFR_VER =
# ISL_VER =
# LINUX_VER =
# By default source archives are downloaded with wget. curl is also an option.
# DL_CMD = wget -c -O
# DL_CMD = curl -C - -L -o
# Check sha-1 hashes of downloaded source archives. On gnu systems this is
# usually done with sha1sum.
# SHA1_CMD = sha1sum -c
# SHA1_CMD = sha1 -c
# SHA1_CMD = shasum -a 1 -c
# Something like the following can be used to produce a static-linked
# toolchain that's deployable to any system with matching arch, using
# an existing musl-targeted cross compiler. This only works if the
# system you build on can natively (or via binfmt_misc and qemu) run
# binaries produced by the existing toolchain (in this example, i486).
# MUSL_CONFIG += --enable-debug
# MUSL_CONFIG += CFLAGS="-Os -fno-omit-frame-pointer -fno-optimize-sibling-calls -mno-omit-leaf-frame-pointer"
MUSL_CONFIG += CFLAGS="-Os"
COMMON_CONFIG += CC="/opt/cross/bin/x86_64-linux-musl-gcc -static --static"
COMMON_CONFIG += CXX="/opt/cross/bin/x86_64-linux-musl-g++ -static --static"
# COMMON_CONFIG += CC="gcc -static --static"
# COMMON_CONFIG += CXX="g++ -static --static"
# Recommended options for smaller build for deploying binaries:
COMMON_CONFIG += CFLAGS="-Os -g0"
COMMON_CONFIG += CXXFLAGS="-Os -g0"
COMMON_CONFIG += LDFLAGS="-s"
# Options you can add for faster/simpler build at the expense of features:
COMMON_CONFIG += --disable-nls
GCC_CONFIG += --disable-libquadmath --disable-decimal-float
GCC_CONFIG += --disable-libitm
GCC_CONFIG += --disable-fixed-point
GCC_CONFIG += --disable-lto
# By default C and C++ are the only languages enabled, and these are
# the only ones tested and known to be supported. You can uncomment the
# following and add other languages if you want to try getting them to
# work too.
GCC_CONFIG += --enable-languages=c,c++ #--enable-plugin
# You can keep the local build path out of your toolchain binaries and
# target libraries with the following, but then gdb needs to be told
# where to look for source files.
# COMMON_CONFIG += --with-debug-prefix-map=$(CURDIR)=

View file

View file

@ -1,141 +0,0 @@
*asm:
%{m16|m32:--32} %{m16|m32:;:--64} %{msse2avx:%{!mavx:-msse2avx}}
*asm_debug:
%{%:debug-level-gt(0):%{gstabs*:--gstabs}%{!gstabs*:%{g*:--gdwarf2}}} %{fdebug-prefix-map=*:--debug-prefix-map %*}
*asm_final:
%{gsplit-dwarf:
objcopy --extract-dwo %{c:%{o*:%*}%{!o*:%b%O}}%{!c:%U%O} %{c:%{o*:%:replace-extension(%{o*:%*} .dwo)}%{!o*:%b.dwo}}%{!c:%b.dwo}
objcopy --strip-dwo %{c:%{o*:%*}%{!o*:%b%O}}%{!c:%U%O} }
*asm_options:
%{-target-help:%:print-asm-header()} %{v} %{w:-W} %{I*} %{gz|gz=zlib:--compress-debug-sections=zlib} %{gz=none:--compress-debug-sections=none} %{gz=zlib-gnu:--compress-debug-sections=zlib-gnu} %a %Y %{c:%W{o*}%{!o*:-o %w%b%O}}%{!c:-o %d%w%u%O}
*invoke_as:
%{!fwpa*: %{fcompare-debug=*|fdump-final-insns=*:%:compare-debug-dump-opt()} %{!S:-o %|.s |
as %(asm_options) %m.s %A } }
*cpp:
%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}
*cpp_options:
%(cpp_unique_options) %1 %{m*} %{std*&ansi&trigraphs} %{W*&pedantic*} %{w} %{f*} %{g*:%{%:debug-level-gt(0):%{g*} %{!fno-working-directory:-fworking-directory}}} %{O*} %{undef} %{save-temps*:-fpch-preprocess}
*cpp_debug_options:
%{d*}
*cpp_unique_options:
%{!Q:-quiet} %{nostdinc*} %{C} %{CC} %{v} %@{I*&F*} %{P} %I %{MD:-MD %{!o:%b.d}%{o*:%.d%*}} %{MMD:-MMD %{!o:%b.d}%{o*:%.d%*}} %{M} %{MM} %{MF*} %{MG} %{MP} %{MQ*} %{MT*} %{!E:%{!M:%{!MM:%{!MT:%{!MQ:%{MD|MMD:%{o*:-MQ %*}}}}}}} %{remap} %{g3|ggdb3|gstabs3|gxcoff3|gvms3:-dD} %{!iplugindir*:%{fplugin*:%:find-plugindir()}} %{H} %C %{D*&U*&A*} %{i*} %Z %i %{E|M|MM:%W{o*}}
*trad_capable_cpp:
cc1 -E %{traditional|traditional-cpp:-traditional-cpp}
*cc1:
%{!mandroid|tno-android-cc:%(cc1_cpu) %{profile:-p};:%(cc1_cpu) %{profile:-p} %{!fno-pic:%{!fno-PIC:%{!fpic:%{!fPIC: -fPIC}}}}}
*cc1_options:
%{pg:%{fomit-frame-pointer:%e-pg and -fomit-frame-pointer are incompatible}} %{!iplugindir*:%{fplugin*:%:find-plugindir()}} %1 %{!Q:-quiet} %{!dumpbase:-dumpbase %B} %{d*} %{m*} %{aux-info*} %{fcompare-debug-second:%:compare-debug-auxbase-opt(%b)} %{!fcompare-debug-second:%{c|S:%{o*:-auxbase-strip %*}%{!o*:-auxbase %b}}}%{!c:%{!S:-auxbase %b}} %{g*} %{O*} %{W*&pedantic*} %{w} %{std*&ansi&trigraphs} %{v:-version} %{pg:-p} %{p} %{f*} %{undef} %{Qn:-fno-ident} %{Qy:} %{-help:--help} %{-target-help:--target-help} %{-version:--version} %{-help=*:--help=%*} %{!fsyntax-only:%{S:%W{o*}%{!o*:-o %b.s}}} %{fsyntax-only:-o %j} %{-param*} %{coverage:-fprofile-arcs -ftest-coverage} %{fprofile-arcs|fprofile-generate*|coverage: %{!fprofile-update=single: %{pthread:-fprofile-update=prefer-atomic}}}
*cc1plus:
*link_gcc_c_sequence:
%{static|static-pie:--start-group} %G %{!nolibc:%L} %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}
*link_ssp:
%{fstack-protector|fstack-protector-all|fstack-protector-strong|fstack-protector-explicit:-lssp_nonshared}
*endfile:
--push-state --pop-state
*link:
%{!mandroid|tno-android-ld:%{m16|m32:;:-m elf_x86_64} %{m16|m32:-m elf_i386} %{shared:-shared} %{!shared: %{!static: %{!static-pie: %{rdynamic:-export-dynamic} }} %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}};:%{m16|m32:;:-m elf_x86_64} %{m16|m32:-m elf_i386} %{mx32:-m elf32_x86_64} %{shared:-shared} %{!shared: %{!static: %{!static-pie: %{rdynamic:-export-dynamic} %{m16|m32:-dynamic-linker } %{m16|m32:;:-dynamic-linker} }} %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}} %{shared: -Bsymbolic}}
*lib:
--push-state --pop-state
*link_gomp:
*libgcc:
--push-state --pop-state
*startfile:
--push-state --pop-state
*cross_compile:
1
*version:
9.2.0
*multilib:
. ;
*multilib_defaults:
m64
*multilib_extra:
*multilib_matches:
*multilib_exclusions:
*multilib_options:
*multilib_reuse:
*linker:
collect2
*linker_plugin_file:
*lto_wrapper:
*lto_gcc:
*post_link:
*link_libgcc:
%D
*md_exec_prefix:
*md_startfile_prefix:
*md_startfile_prefix_1:
*startfile_prefix_spec:
*sysroot_spec:
--sysroot=%R
*sysroot_suffix_spec:
*sysroot_hdrs_suffix_spec:
*self_spec:
*cc1_cpu:
%{march=native:%>march=native %:local_cpu_detect(arch) %{!mtune=*:%>mtune=native %:local_cpu_detect(tune)}} %{mtune=native:%>mtune=native %:local_cpu_detect(tune)}
*link_command:
%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S: %(linker) %{fuse-linker-plugin: %e-fuse-linker-plugin is not supported in this configuration}%{flto|flto=*:%<fcompare-debug*} %{flto} %{fno-lto} %{flto=*} %l %{shared|r:;pie|static-pie:-pie %{static|static-pie:--no-dynamic-linker -z text -Bsymbolic}} %{fuse-ld=*:-fuse-ld=%*} %{gz|gz=zlib:--compress-debug-sections=zlib} %{gz=none:--compress-debug-sections=none} %{gz=zlib-gnu:--compress-debug-sections=zlib-gnu} %X %{o*} %{e*} %{N} %{n} %{r} %{s} %{t} %{u*} %{z} %{Z} %{!nostdlib:%{!r:%{!nostartfiles:%S}}} %{static|no-pie|static-pie:} %@{L*} %(mfwrap) %(link_libgcc) %{fvtable-verify=none:} %{fvtable-verify=std: %e-fvtable-verify=std is not supported in this configuration} %{fvtable-verify=preinit: %e-fvtable-verify=preinit is not supported in this configuration} %{!nostdlib:%{!r:%{!nodefaultlibs:%{%:sanitize(address):%{!shared:libasan_preinit%O%s} %{static-libasan:%{!shared:-Bstatic --whole-archive -lasan --no-whole-archive -Bdynamic}}%{!static-libasan:-lasan}} %{%:sanitize(thread):%{!shared:libtsan_preinit%O%s} %{static-libtsan:%{!shared:-Bstatic --whole-archive -ltsan --no-whole-archive -Bdynamic}}%{!static-libtsan:-ltsan}} %{%:sanitize(leak):%{!shared:liblsan_preinit%O%s} %{static-liblsan:%{!shared:-Bstatic --whole-archive -llsan --no-whole-archive -Bdynamic}}%{!static-liblsan:-llsan}}}}} %o %{fopenacc|fopenmp|%:gt(%{ftree-parallelize-loops=*:%*} 1): %:include(libgomp.spec)%(link_gomp)} %{fgnu-tm:%:include(libitm.spec)%(link_itm)} %(mflib) %{fsplit-stack: --wrap=pthread_create} %{fprofile-arcs|fprofile-generate*|coverage:-lgcov} %{!nostdlib:%{!r:%{!nodefaultlibs:%{%:sanitize(address): %{static-libasan|static:%:include(libsanitizer.spec)%(link_libasan)} %{static:%ecannot specify -static with -fsanitize=address}} %{%:sanitize(thread): %{static-libtsan|static:%:include(libsanitizer.spec)%(link_libtsan)} %{static:%ecannot specify -static with -fsanitize=thread}} %{%:sanitize(undefined):%{static-libubsan:-Bstatic} -lubsan %{static-libubsan:-Bdynamic} %{static-libubsan|static:%:include(libsanitizer.spec)%(link_libubsan)}} %{%:sanitize(leak): %{static-liblsan|static:%:include(libsanitizer.spec)%(link_liblsan)}}}}} %{!nostdlib:%{!r:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}} %{!nostdlib:%{!r:%{!nostartfiles:%E}}} %{T*}
%(post_link) }}}}}}

Binary file not shown.

Binary file not shown.

1869
third_party/gcc/portcosmo.patch vendored Normal file

File diff suppressed because it is too large Load diff

46
third_party/gcc/upgrade-cosmo-gcc.sh vendored Executable file
View file

@ -0,0 +1,46 @@
#!/bin/sh
ARCH=${1:-x86_64}
IMPORT=${2:-/opt/cross11portcosmo}
PREFIX=third_party/gcc/
OLDVERSION=9.2.0
NEWVERSION=11.2.0
rm -rf o/third_party/gcc
mv $PREFIX/libexec/gcc/$ARCH-linux-musl/$OLDVERSION $PREFIX/libexec/gcc/$ARCH-linux-musl/$NEWVERSION
mv $PREFIX/lib/gcc/$ARCH-linux-musl/$OLDVERSION $PREFIX/lib/gcc/$ARCH-linux-musl/$NEWVERSION
sed -i -e "s/$OLDVERSION/$NEWVERSION/g" $(find $PREFIX -name \*.sym | grep $ARCH)
FILES="
$ARCH-linux-musl/bin/ld.bfd
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/collect2
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/cc1
libexec/gcc/$ARCH-linux-musl/$NEWVERSION/cc1plus
bin/$ARCH-linux-musl-elfedit
bin/$ARCH-linux-musl-nm
bin/$ARCH-linux-musl-objcopy
bin/$ARCH-linux-musl-gcc
bin/$ARCH-linux-musl-c++filt
bin/$ARCH-linux-musl-gcc-ranlib
bin/$ARCH-linux-musl-addr2line
bin/$ARCH-linux-musl-objdump
bin/$ARCH-linux-musl-gcov
bin/$ARCH-linux-musl-ranlib
bin/$ARCH-linux-musl-gcc-nm
bin/$ARCH-linux-musl-strip
bin/$ARCH-linux-musl-gcov-tool
bin/$ARCH-linux-musl-gprof
bin/$ARCH-linux-musl-strings
bin/$ARCH-linux-musl-gcov-dump
bin/$ARCH-linux-musl-cpp
bin/$ARCH-linux-musl-ar
bin/$ARCH-linux-musl-readelf
bin/$ARCH-linux-musl-size
bin/$ARCH-linux-musl-as
bin/$ARCH-linux-musl-g++
bin/$ARCH-linux-musl-gcc-ar
"
for f in $FILES; do
gzip -9 <$IMPORT/$f >$PREFIX/$f.gz || exit
done

Binary file not shown.

View file

@ -1,43 +1,53 @@
#if !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <adxintrin.h> directly; include <immintrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <adxintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _ADXINTRIN_H_INCLUDED #ifndef _ADXINTRIN_H_INCLUDED
#define _ADXINTRIN_H_INCLUDED #define _ADXINTRIN_H_INCLUDED
extern __inline unsigned char
__funline unsigned char _subborrow_u32(unsigned char __CF, unsigned int __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned int __Y, unsigned int *__P) { _subborrow_u32 (unsigned char __CF, unsigned int __X,
unsigned int __Y, unsigned int *__P)
{
return __builtin_ia32_sbb_u32 (__CF, __X, __Y, __P); return __builtin_ia32_sbb_u32 (__CF, __X, __Y, __P);
} }
extern __inline unsigned char
__funline unsigned char _addcarry_u32(unsigned char __CF, unsigned int __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned int __Y, unsigned int *__P) { _addcarry_u32 (unsigned char __CF, unsigned int __X,
unsigned int __Y, unsigned int *__P)
{
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P); return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
} }
extern __inline unsigned char
__funline unsigned char _addcarryx_u32(unsigned char __CF, unsigned int __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned int __Y, unsigned int *__P) { _addcarryx_u32 (unsigned char __CF, unsigned int __X,
unsigned int __Y, unsigned int *__P)
{
return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P); return __builtin_ia32_addcarryx_u32 (__CF, __X, __Y, __P);
} }
#ifdef __x86_64__ #ifdef __x86_64__
__funline unsigned char _subborrow_u64(unsigned char __CF, unsigned long long __X, extern __inline unsigned char
unsigned long long __Y, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned long long *__P) { _subborrow_u64 (unsigned char __CF, unsigned long long __X,
unsigned long long __Y, unsigned long long *__P)
{
return __builtin_ia32_sbb_u64 (__CF, __X, __Y, __P); return __builtin_ia32_sbb_u64 (__CF, __X, __Y, __P);
} }
extern __inline unsigned char
__funline unsigned char _addcarry_u64(unsigned char __CF, unsigned long long __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned long long __Y, _addcarry_u64 (unsigned char __CF, unsigned long long __X,
unsigned long long *__P) { unsigned long long __Y, unsigned long long *__P)
{
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P); return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
} }
extern __inline unsigned char
__funline unsigned char _addcarryx_u64(unsigned char __CF, unsigned long long __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned long long __Y, _addcarryx_u64 (unsigned char __CF, unsigned long long __X,
unsigned long long *__P) { unsigned long long __Y, unsigned long long *__P)
{
return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P); return __builtin_ia32_addcarryx_u64 (__CF, __X, __Y, __P);
} }
#endif #endif
#endif
#endif /* _ADXINTRIN_H_INCLUDED */ #endif

View file

@ -1,58 +1,54 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _AMMINTRIN_H_INCLUDED #ifndef _AMMINTRIN_H_INCLUDED
#define _AMMINTRIN_H_INCLUDED #define _AMMINTRIN_H_INCLUDED
#ifdef __x86_64__
#include "third_party/intel/pmmintrin.internal.h" #include "third_party/intel/pmmintrin.internal.h"
#ifndef __SSE4A__ #ifndef __SSE4A__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("sse4a") #pragma GCC target("sse4a")
#define __DISABLE_SSE4A__ #define __DISABLE_SSE4A__
#endif /* __SSE4A__ */ #endif
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void _mm_stream_sd(double* __P, __m128d __Y) { _mm_stream_sd (double * __P, __m128d __Y)
{
__builtin_ia32_movntsd (__P, (__v2df) __Y); __builtin_ia32_movntsd (__P, (__v2df) __Y);
} }
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void _mm_stream_ss(float* __P, __m128 __Y) { _mm_stream_ss (float * __P, __m128 __Y)
{
__builtin_ia32_movntss (__P, (__v4sf) __Y); __builtin_ia32_movntss (__P, (__v4sf) __Y);
} }
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m128i _mm_extract_si64(__m128i __X, __m128i __Y) { _mm_extract_si64 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y); return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m128i _mm_extracti_si64(__m128i __X, unsigned const int __I, extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned const int __L) { _mm_extracti_si64 (__m128i __X, unsigned const int __I, unsigned const int __L)
{
return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L); return (__m128i) __builtin_ia32_extrqi ((__v2di) __X, __I, __L);
} }
#else #else
#define _mm_extracti_si64(X, I, L) \ #define _mm_extracti_si64(X, I, L) ((__m128i) __builtin_ia32_extrqi ((__v2di)(__m128i)(X), (unsigned int)(I), (unsigned int)(L)))
((__m128i)__builtin_ia32_extrqi((__v2di)(__m128i)(X), (unsigned int)(I), \
(unsigned int)(L)))
#endif #endif
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m128i _mm_insert_si64(__m128i __X, __m128i __Y) { _mm_insert_si64 (__m128i __X,__m128i __Y)
{
return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y); return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m128i _mm_inserti_si64(__m128i __X, __m128i __Y, extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned const int __I, _mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned const int __L)
unsigned const int __L) { {
return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L); return (__m128i) __builtin_ia32_insertqi ((__v2di)__X, (__v2di)__Y, __I, __L);
} }
#else #else
#define _mm_inserti_si64(X, Y, I, L) \ #define _mm_inserti_si64(X, Y, I, L) ((__m128i) __builtin_ia32_insertqi ((__v2di)(__m128i)(X), (__v2di)(__m128i)(Y), (unsigned int)(I), (unsigned int)(L)))
((__m128i)__builtin_ia32_insertqi((__v2di)(__m128i)(X), \
(__v2di)(__m128i)(Y), (unsigned int)(I), \
(unsigned int)(L)))
#endif #endif
#ifdef __DISABLE_SSE4A__ #ifdef __DISABLE_SSE4A__
#undef __DISABLE_SSE4A__ #undef __DISABLE_SSE4A__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_SSE4A__ */ #endif
#endif
#endif /* __x86_64__ */ #endif
#endif /* _AMMINTRIN_H_INCLUDED */

View file

@ -0,0 +1,22 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED
#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AMXBF16INTRIN_H_INCLUDED
#define _AMXBF16INTRIN_H_INCLUDED
#if !defined(__AMX_BF16__)
#pragma GCC push_options
#pragma GCC target("amx-bf16")
#define __DISABLE_AMX_BF16__
#endif
#if defined(__x86_64__) && defined(__AMX_BF16__)
#define _tile_dpbf16ps_internal(dst,src1,src2) __asm__ volatile ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
#define _tile_dpbf16ps(dst,src1,src2) _tile_dpbf16ps_internal (dst, src1, src2)
#endif
#ifdef __DISABLE_AMX_BF16__
#undef __DISABLE_AMX_BF16__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -0,0 +1,25 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED
#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AMXINT8INTRIN_H_INCLUDED
#define _AMXINT8INTRIN_H_INCLUDED
#if !defined(__AMX_INT8__)
#pragma GCC push_options
#pragma GCC target("amx-int8")
#define __DISABLE_AMX_INT8__
#endif
#if defined(__x86_64__) && defined(__AMX_INT8__)
#define _tile_int8_dp_internal(name,dst,src1,src2) __asm__ volatile ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
#define _tile_dpbssd(dst,src1,src2) _tile_int8_dp_internal (tdpbssd, dst, src1, src2)
#define _tile_dpbsud(dst,src1,src2) _tile_int8_dp_internal (tdpbsud, dst, src1, src2)
#define _tile_dpbusd(dst,src1,src2) _tile_int8_dp_internal (tdpbusd, dst, src1, src2)
#define _tile_dpbuud(dst,src1,src2) _tile_int8_dp_internal (tdpbuud, dst, src1, src2)
#endif
#ifdef __DISABLE_AMX_INT8__
#undef __DISABLE_AMX_INT8__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -0,0 +1,46 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED
#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AMXTILEINTRIN_H_INCLUDED
#define _AMXTILEINTRIN_H_INCLUDED
#if !defined(__AMX_TILE__)
#pragma GCC push_options
#pragma GCC target("amx-tile")
#define __DISABLE_AMX_TILE__
#endif
#if defined(__x86_64__) && defined(__AMX_TILE__)
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tile_loadconfig (const void *__config)
{
__asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tile_storeconfig (void *__config)
{
__asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tile_release (void)
{
__asm__ volatile ("tilerelease" ::);
}
#define _tile_loadd(dst,base,stride) _tile_loadd_internal (dst, base, stride)
#define _tile_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
#define _tile_stream_loadd(dst,base,stride) _tile_stream_loadd_internal (dst, base, stride)
#define _tile_stream_loadd_internal(dst,base,stride) __asm__ volatile ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" :: "r" ((const void*) base), "r" ((long) stride))
#define _tile_stored(dst,base,stride) _tile_stored_internal (dst, base, stride)
#define _tile_stored_internal(src,base,stride) __asm__ volatile ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" :: "r" ((void*) base), "r" ((long) stride) : "memory")
#define _tile_zero(dst) _tile_zero_internal (dst)
#define _tile_zero_internal(dst) __asm__ volatile ("tilezero\t%%tmm"#dst ::)
#endif
#ifdef __DISABLE_AMX_TILE__
#undef __DISABLE_AMX_TILE__
#pragma GCC pop_options
#endif
#endif
#endif

File diff suppressed because it is too large Load diff

View file

@ -1,112 +1,180 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED #if !defined _IMMINTRIN_H_INCLUDED
#error \ # error "Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
"Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
#endif #endif
#ifndef _AVX5124FMAPSINTRIN_H_INCLUDED #ifndef _AVX5124FMAPSINTRIN_H_INCLUDED
#define _AVX5124FMAPSINTRIN_H_INCLUDED #define _AVX5124FMAPSINTRIN_H_INCLUDED
#ifndef __AVX5124FMAPS__ #ifndef __AVX5124FMAPS__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx5124fmaps") #pragma GCC target("avx5124fmaps")
#define __DISABLE_AVX5124FMAPS__ #define __DISABLE_AVX5124FMAPS__
#endif /* __AVX5124FMAPS__ */ #endif
extern __inline __m512
__funline __m512 _mm512_4fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512 __E, __m128 *__F) { _mm512_4fmadd_ps (__m512 __A, __m512 __B, __m512 __C,
return (__m512)__builtin_ia32_4fmaddps((__v16sf)__B, (__v16sf)__C, __m512 __D, __m512 __E, __m128 *__F)
(__v16sf)__D, (__v16sf)__E, {
(__v16sf)__A, (const __v4sf *)__F); return (__m512) __builtin_ia32_4fmaddps ((__v16sf) __B,
} (__v16sf) __C,
(__v16sf) __D,
__funline __m512 _mm512_mask_4fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, (__v16sf) __E,
__m512 __C, __m512 __D, __m512 __E, (__v16sf) __A,
__m128 *__F) {
return (__m512)__builtin_ia32_4fmaddps_mask(
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
}
__funline __m512 _mm512_maskz_4fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
__m512 __C, __m512 __D, __m512 __E,
__m128 *__F) {
return (__m512)__builtin_ia32_4fmaddps_mask(
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
}
__funline __m128 _mm_4fmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
__m128 __E, __m128 *__F) {
return (__m128)__builtin_ia32_4fmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
(__v4sf)__E, (__v4sf)__A,
(const __v4sf *) __F); (const __v4sf *) __F);
} }
extern __inline __m512
__funline __m128 _mm_mask_4fmadd_ss(__m128 __A, __mmask8 __U, __m128 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128 __C, __m128 __D, __m128 __E, _mm512_mask_4fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
__m128 *__F) { __m512 __C, __m512 __D, __m512 __E, __m128 *__F)
return (__m128)__builtin_ia32_4fmaddss_mask( {
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A, return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U); (__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F,
(__v16sf) __A,
(__mmask16) __U);
} }
extern __inline __m512
__funline __m128 _mm_maskz_4fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128 __C, __m128 __D, __m128 __E, _mm512_maskz_4fmadd_ps (__mmask16 __U,
__m128 *__F) { __m512 __A, __m512 __B, __m512 __C,
return (__m128)__builtin_ia32_4fmaddss_mask( __m512 __D, __m512 __E, __m128 *__F)
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A, {
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U); return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U);
} }
extern __inline __m128
__funline __m512 _mm512_4fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __m512 __D, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512 __E, __m128 *__F) { _mm_4fmadd_ss (__m128 __A, __m128 __B, __m128 __C,
return (__m512)__builtin_ia32_4fnmaddps((__v16sf)__B, (__v16sf)__C, __m128 __D, __m128 __E, __m128 *__F)
(__v16sf)__D, (__v16sf)__E, {
(__v16sf)__A, (const __v4sf *)__F); return (__m128) __builtin_ia32_4fmaddss ((__v4sf) __B,
} (__v4sf) __C,
(__v4sf) __D,
__funline __m512 _mm512_mask_4fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, (__v4sf) __E,
__m512 __C, __m512 __D, __m512 __E, (__v4sf) __A,
__m128 *__F) {
return (__m512)__builtin_ia32_4fnmaddps_mask(
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
(const __v4sf *)__F, (__v16sf)__A, (__mmask16)__U);
}
__funline __m512 _mm512_maskz_4fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B,
__m512 __C, __m512 __D, __m512 __E,
__m128 *__F) {
return (__m512)__builtin_ia32_4fnmaddps_mask(
(__v16sf)__B, (__v16sf)__C, (__v16sf)__D, (__v16sf)__E, (__v16sf)__A,
(const __v4sf *)__F, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U);
}
__funline __m128 _mm_4fnmadd_ss(__m128 __A, __m128 __B, __m128 __C, __m128 __D,
__m128 __E, __m128 *__F) {
return (__m128)__builtin_ia32_4fnmaddss((__v4sf)__B, (__v4sf)__C, (__v4sf)__D,
(__v4sf)__E, (__v4sf)__A,
(const __v4sf *) __F); (const __v4sf *) __F);
} }
extern __inline __m128
__funline __m128 _mm_mask_4fnmadd_ss(__m128 __A, __mmask8 __U, __m128 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128 __C, __m128 __D, __m128 __E, _mm_mask_4fmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
__m128 *__F) { __m128 __D, __m128 __E, __m128 *__F)
return (__m128)__builtin_ia32_4fnmaddss_mask( {
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A, return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
(const __v4sf *)__F, (__v4sf)__A, (__mmask8)__U); (__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F,
(__v4sf) __A,
(__mmask8) __U);
} }
extern __inline __m128
__funline __m128 _mm_maskz_4fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128 __C, __m128 __D, __m128 __E, _mm_maskz_4fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
__m128 *__F) { __m128 __D, __m128 __E, __m128 *__F)
return (__m128)__builtin_ia32_4fnmaddss_mask( {
(__v4sf)__B, (__v4sf)__C, (__v4sf)__D, (__v4sf)__E, (__v4sf)__A, return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
(const __v4sf *)__F, (__v4sf)_mm_setzero_ps(), (__mmask8)__U); (__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F,
(__v4sf) _mm_setzero_ps (),
(__mmask8) __U);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4fnmadd_ps (__m512 __A, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fnmaddps ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F,
(__v16sf) __A,
(__mmask16) __U);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4fnmadd_ps (__mmask16 __U,
__m512 __A, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_4fnmadd_ss (__m128 __A, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fnmaddss ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_4fnmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F,
(__v4sf) __A,
(__mmask8) __U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_4fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F,
(__v4sf) _mm_setzero_ps (),
(__mmask8) __U);
} }
#ifdef __DISABLE_AVX5124FMAPS__ #ifdef __DISABLE_AVX5124FMAPS__
#undef __DISABLE_AVX5124FMAPS__ #undef __DISABLE_AVX5124FMAPS__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX5124FMAPS__ */ #endif
#endif
#endif /* _AVX5124FMAPSINTRIN_H_INCLUDED */ #endif

View file

@ -1,69 +1,102 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED #if !defined _IMMINTRIN_H_INCLUDED
#error \ # error "Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
"Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
#endif #endif
#ifndef _AVX5124VNNIWINTRIN_H_INCLUDED #ifndef _AVX5124VNNIWINTRIN_H_INCLUDED
#define _AVX5124VNNIWINTRIN_H_INCLUDED #define _AVX5124VNNIWINTRIN_H_INCLUDED
#ifndef __AVX5124VNNIW__ #ifndef __AVX5124VNNIW__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx5124vnniw") #pragma GCC target("avx5124vnniw")
#define __DISABLE_AVX5124VNNIW__ #define __DISABLE_AVX5124VNNIW__
#endif /* __AVX5124VNNIW__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_4dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D, __m512i __E, __m128i *__F) { _mm512_4dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C,
return (__m512i)__builtin_ia32_vp4dpwssd((__v16si)__B, (__v16si)__C, __m512i __D, __m512i __E, __m128i *__F)
(__v16si)__D, (__v16si)__E, {
(__v16si)__A, (const __v4si *)__F); return (__m512i) __builtin_ia32_vp4dpwssd ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_4dpwssd_epi32(__m512i __A, __mmask16 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B, __m512i __C, __m512i __D, _mm512_mask_4dpwssd_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
__m512i __E, __m128i *__F) { __m512i __C, __m512i __D, __m512i __E,
return (__m512i)__builtin_ia32_vp4dpwssd_mask( __m128i *__F)
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A, {
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U); return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F,
(__v16si) __A,
(__mmask16) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_4dpwssd_epi32(__mmask16 __U, __m512i __A, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B, __m512i __C, _mm512_maskz_4dpwssd_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
__m512i __D, __m512i __E, __m512i __C, __m512i __D, __m512i __E,
__m128i *__F) { __m128i *__F)
return (__m512i)__builtin_ia32_vp4dpwssd_mask( {
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A, return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U); (__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F,
(__v16si) _mm512_setzero_ps (),
(__mmask16) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_4dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D, __m512i __E, __m128i *__F) { _mm512_4dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C,
return (__m512i)__builtin_ia32_vp4dpwssds((__v16si)__B, (__v16si)__C, __m512i __D, __m512i __E, __m128i *__F)
(__v16si)__D, (__v16si)__E, {
(__v16si)__A, (const __v4si *)__F); return (__m512i) __builtin_ia32_vp4dpwssds ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_4dpwssds_epi32(__m512i __A, __mmask16 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B, __m512i __C, _mm512_mask_4dpwssds_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
__m512i __D, __m512i __E, __m512i __C, __m512i __D, __m512i __E,
__m128i *__F) { __m128i *__F)
return (__m512i)__builtin_ia32_vp4dpwssds_mask( {
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A, return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
(const __v4si *)__F, (__v16si)__A, (__mmask16)__U); (__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F,
(__v16si) __A,
(__mmask16) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_4dpwssds_epi32(__mmask16 __U, __m512i __A, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B, __m512i __C, _mm512_maskz_4dpwssds_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
__m512i __D, __m512i __E, __m512i __C, __m512i __D, __m512i __E,
__m128i *__F) { __m128i *__F)
return (__m512i)__builtin_ia32_vp4dpwssds_mask( {
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__v16si)__E, (__v16si)__A, return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
(const __v4si *)__F, (__v16si)_mm512_setzero_ps(), (__mmask16)__U); (__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F,
(__v16si) _mm512_setzero_ps (),
(__mmask16) __U);
} }
#ifdef __DISABLE_AVX5124VNNIW__ #ifdef __DISABLE_AVX5124VNNIW__
#undef __DISABLE_AVX5124VNNIW__ #undef __DISABLE_AVX5124VNNIW__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX5124VNNIW__ */ #endif
#endif
#endif /* _AVX5124VNNIWINTRIN_H_INCLUDED */ #endif

View file

@ -0,0 +1,74 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512BF16INTRIN_H_INCLUDED
#define _AVX512BF16INTRIN_H_INCLUDED
#ifndef __AVX512BF16__
#pragma GCC push_options
#pragma GCC target("avx512bf16")
#define __DISABLE_AVX512BF16__
#endif
typedef short __v32bh __attribute__ ((__vector_size__ (64)));
typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
extern __inline __m512bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
{
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
}
extern __inline __m512bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
{
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
}
extern __inline __m512bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
{
return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtneps_pbh (__m512 __A)
{
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
{
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
{
return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
{
return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
{
return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
{
return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
}
#ifdef __DISABLE_AVX512BF16__
#undef __DISABLE_AVX512BF16__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -0,0 +1,130 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
#define _AVX512BF16VLINTRIN_H_INCLUDED
#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
#pragma GCC push_options
#pragma GCC target("avx512bf16,avx512vl")
#define __DISABLE_AVX512BF16VL__
#endif
typedef short __v16bh __attribute__ ((__vector_size__ (32)));
typedef short __v8bh __attribute__ ((__vector_size__ (16)));
typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
{
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
{
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
}
extern __inline __m256bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
{
return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
{
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
{
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
{
return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cvtneps_pbh (__m256 __A)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtneps_pbh (__m128 __A)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
}
extern __inline __m128bh
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
{
return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
{
return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
{
return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
{
return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
{
return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
{
return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
{
return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
}
#ifdef __DISABLE_AVX512BF16VL__
#undef __DISABLE_AVX512BF16VL__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -1,172 +1,231 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED #if !defined _IMMINTRIN_H_INCLUDED
#error \ # error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
"Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
#endif #endif
#ifndef _AVX512BITALGINTRIN_H_INCLUDED #ifndef _AVX512BITALGINTRIN_H_INCLUDED
#define _AVX512BITALGINTRIN_H_INCLUDED #define _AVX512BITALGINTRIN_H_INCLUDED
#ifndef __AVX512BITALG__ #ifndef __AVX512BITALG__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512bitalg") #pragma GCC target("avx512bitalg")
#define __DISABLE_AVX512BITALG__ #define __DISABLE_AVX512BITALG__
#endif /* __AVX512BITALG__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_popcnt_epi8(__m512i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_popcnt_epi8 (__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A); return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
} }
extern __inline __m512i
__funline __m512i _mm512_popcnt_epi16(__m512i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_popcnt_epi16 (__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A); return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
} }
#ifdef __DISABLE_AVX512BITALG__ #ifdef __DISABLE_AVX512BITALG__
#undef __DISABLE_AVX512BITALG__ #undef __DISABLE_AVX512BITALG__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512BITALG__ */ #endif
#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__) #if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512bitalg,avx512bw") #pragma GCC target("avx512bitalg,avx512bw")
#define __DISABLE_AVX512BITALGBW__ #define __DISABLE_AVX512BITALGBW__
#endif /* __AVX512VLBW__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B) { _mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask( {
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U); return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
(__v64qi) __W,
(__mmask64) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpopcountb_v64qi_mask( _mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A)
(__v64qi)__A, (__v64qi)_mm512_setzero_si512(), (__mmask64)__U); {
return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
(__v64qi)
_mm512_setzero_si512 (),
(__mmask64) __U);
} }
__funline __m512i _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, extern __inline __m512i
__m512i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask( _mm512_mask_popcnt_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
(__v32hi)__A, (__v32hi)__B, (__mmask32)__U); {
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
(__v32hi) __W,
(__mmask32) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpopcountw_v32hi_mask( _mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A)
(__v32hi)__A, (__v32hi)_mm512_setzero_si512(), (__mmask32)__U); {
return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
(__v32hi)
_mm512_setzero_si512 (),
(__mmask32) __U);
} }
extern __inline __mmask64
__funline __mmask64 _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask( _mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1); {
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
(__v64qi) __B,
(__mmask64) -1);
} }
extern __inline __mmask64
__funline __mmask64 _mm512_mask_bitshuffle_epi64_mask(__mmask64 __M, __m512i __A, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B) { _mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
return (__mmask64)__builtin_ia32_vpshufbitqmb512_mask( {
(__v64qi)__A, (__v64qi)__B, (__mmask64)__M); return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
(__v64qi) __B,
(__mmask64) __M);
} }
#ifdef __DISABLE_AVX512BITALGBW__ #ifdef __DISABLE_AVX512BITALGBW__
#undef __DISABLE_AVX512BITALGBW__ #undef __DISABLE_AVX512BITALGBW__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512BITALGBW__ */ #endif
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || \
!defined(__AVX512BW__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512bitalg,avx512vl,avx512bw") #pragma GCC target("avx512bitalg,avx512vl,avx512bw")
#define __DISABLE_AVX512BITALGVLBW__ #define __DISABLE_AVX512BITALGVLBW__
#endif /* __AVX512VLBW__ */ #endif
extern __inline __m256i
__funline __m256i _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __B) { _mm256_mask_popcnt_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask( {
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U); return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
(__v32qi) __W,
(__mmask32) __U);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpopcountb_v32qi_mask( _mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
(__v32qi)__A, (__v32qi)_mm256_setzero_si256(), (__mmask32)__U); {
return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
(__v32qi)
_mm256_setzero_si256 (),
(__mmask32) __U);
} }
extern __inline __mmask32
__funline __mmask32 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask( _mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1); {
return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
(__v32qi) __B,
(__mmask32) -1);
} }
extern __inline __mmask32
__funline __mmask32 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __M, __m256i __A, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __B) { _mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
return (__mmask32)__builtin_ia32_vpshufbitqmb256_mask( {
(__v32qi)__A, (__v32qi)__B, (__mmask32)__M); return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v32qi) __A,
(__v32qi) __B,
(__mmask32) __M);
} }
#ifdef __DISABLE_AVX512BITALGVLBW__ #ifdef __DISABLE_AVX512BITALGVLBW__
#undef __DISABLE_AVX512BITALGVLBW__ #undef __DISABLE_AVX512BITALGVLBW__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512BITALGVLBW__ */ #endif
#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) #if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512bitalg,avx512vl") #pragma GCC target("avx512bitalg,avx512vl")
#define __DISABLE_AVX512BITALGVL__ #define __DISABLE_AVX512BITALGVL__
#endif /* __AVX512VLBW__ */ #endif
extern __inline __mmask16
__funline __mmask16 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask( _mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1); {
return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
(__v16qi) __B,
(__mmask16) -1);
} }
extern __inline __mmask16
__funline __mmask16 _mm_mask_bitshuffle_epi64_mask(__mmask16 __M, __m128i __A, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128i __B) { _mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
return (__mmask16)__builtin_ia32_vpshufbitqmb128_mask( {
(__v16qi)__A, (__v16qi)__B, (__mmask16)__M); return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v16qi) __A,
(__v16qi) __B,
(__mmask16) __M);
} }
extern __inline __m256i
__funline __m256i _mm256_popcnt_epi8(__m256i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_popcnt_epi8 (__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A); return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
} }
extern __inline __m256i
__funline __m256i _mm256_popcnt_epi16(__m256i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_popcnt_epi16 (__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A); return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
} }
extern __inline __m128i
__funline __m128i _mm_popcnt_epi8(__m128i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_epi8 (__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A); return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
} }
extern __inline __m128i
__funline __m128i _mm_popcnt_epi16(__m128i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_epi16 (__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A); return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __B) { _mm256_mask_popcnt_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask( {
(__v16hi)__A, (__v16hi)__B, (__mmask16)__U); return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
(__v16hi) __W,
(__mmask16) __U);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpopcountw_v16hi_mask( _mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
(__v16hi)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); {
return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
(__v16hi)
_mm256_setzero_si256 (),
(__mmask16) __U);
} }
extern __inline __m128i
__funline __m128i _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask( _mm_mask_popcnt_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U); {
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
(__v16qi) __W,
(__mmask16) __U);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpopcountb_v16qi_mask( _mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
(__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__U); {
return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
(__v16qi)
_mm_setzero_si128 (),
(__mmask16) __U);
} }
__funline __m128i _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { extern __inline __m128i
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask((__v8hi)__A, (__v8hi)__B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_popcnt_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
(__v8hi) __W,
(__mmask8) __U); (__mmask8) __U);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpopcountw_v8hi_mask( _mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
(__v8hi)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); {
return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
(__v8hi)
_mm_setzero_si128 (),
(__mmask8) __U);
} }
#ifdef __DISABLE_AVX512BITALGVL__ #ifdef __DISABLE_AVX512BITALGVL__
#undef __DISABLE_AVX512BITALGVL__ #undef __DISABLE_AVX512BITALGVL__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512BITALGBW__ */ #endif
#endif
#endif /* _AVX512BITALGINTRIN_H_INCLUDED */ #endif

File diff suppressed because it is too large Load diff

View file

@ -1,100 +1,140 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead." #error "Never use <avx512cdintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512CDINTRIN_H_INCLUDED #ifndef _AVX512CDINTRIN_H_INCLUDED
#define _AVX512CDINTRIN_H_INCLUDED #define _AVX512CDINTRIN_H_INCLUDED
#ifndef __AVX512CD__ #ifndef __AVX512CD__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512cd") #pragma GCC target("avx512cd")
#define __DISABLE_AVX512CD__ #define __DISABLE_AVX512CD__
#endif /* __AVX512CD__ */ #endif
typedef long long __v8di __attribute__ ((__vector_size__ (64))); typedef long long __v8di __attribute__ ((__vector_size__ (64)));
typedef int __v16si __attribute__ ((__vector_size__ (64))); typedef int __v16si __attribute__ ((__vector_size__ (64)));
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
typedef unsigned char __mmask8; typedef unsigned char __mmask8;
typedef unsigned short __mmask16; typedef unsigned short __mmask16;
extern __inline __m512i
__funline __m512i _mm512_conflict_epi32(__m512i __A) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpconflictsi_512_mask( _mm512_conflict_epi32 (__m512i __A)
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1); {
return (__m512i)
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) -1);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __A) { _mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
return (__m512i)__builtin_ia32_vpconflictsi_512_mask( {
(__v16si)__A, (__v16si)__W, (__mmask16)__U); return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
} (__v16si) __W,
__funline __m512i _mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vpconflictsi_512_mask(
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U);
}
__funline __m512i _mm512_conflict_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
}
__funline __m512i _mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U,
__m512i __A) {
return (__m512i)__builtin_ia32_vpconflictdi_512_mask((__v8di)__A, (__v8di)__W,
(__mmask8)__U);
}
__funline __m512i _mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vpconflictdi_512_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
}
__funline __m512i _mm512_lzcnt_epi64(__m512i __A) {
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)-1);
}
__funline __m512i _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U,
__m512i __A) {
return (__m512i)__builtin_ia32_vplzcntq_512_mask((__v8di)__A, (__v8di)__W,
(__mmask8)__U);
}
__funline __m512i _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_vplzcntq_512_mask(
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U);
}
__funline __m512i _mm512_lzcnt_epi32(__m512i __A) {
return (__m512i)__builtin_ia32_vplzcntd_512_mask(
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)-1);
}
__funline __m512i _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U,
__m512i __A) {
return (__m512i)__builtin_ia32_vplzcntd_512_mask((__v16si)__A, (__v16si)__W,
(__mmask16) __U); (__mmask16) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vplzcntd_512_mask( _mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U); {
return (__m512i)
__builtin_ia32_vpconflictsi_512_mask ((__v16si) __A,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_broadcastmb_epi64(__mmask8 __A) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_conflict_epi64 (__m512i __A)
{
return (__m512i)
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
(__v8di) _mm512_setzero_si512 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
{
return (__m512i)
__builtin_ia32_vpconflictdi_512_mask ((__v8di) __A,
(__v8di) _mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_lzcnt_epi64 (__m512i __A)
{
return (__m512i)
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
(__v8di) _mm512_setzero_si512 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_lzcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_lzcnt_epi64 (__mmask8 __U, __m512i __A)
{
return (__m512i)
__builtin_ia32_vplzcntq_512_mask ((__v8di) __A,
(__v8di) _mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_lzcnt_epi32 (__m512i __A)
{
return (__m512i)
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_lzcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_lzcnt_epi32 (__mmask16 __U, __m512i __A)
{
return (__m512i)
__builtin_ia32_vplzcntd_512_mask ((__v16si) __A,
(__v16si) _mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastmb_epi64 (__mmask8 __A)
{
return (__m512i) __builtin_ia32_broadcastmb512 (__A); return (__m512i) __builtin_ia32_broadcastmb512 (__A);
} }
extern __inline __m512i
__funline __m512i _mm512_broadcastmw_epi32(__mmask16 __A) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_broadcastmw_epi32 (__mmask16 __A)
{
return (__m512i) __builtin_ia32_broadcastmw512 (__A); return (__m512i) __builtin_ia32_broadcastmw512 (__A);
} }
#ifdef __DISABLE_AVX512CD__ #ifdef __DISABLE_AVX512CD__
#undef __DISABLE_AVX512CD__ #undef __DISABLE_AVX512CD__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512CD__ */ #endif
#endif
#endif /* _AVX512CDINTRIN_H_INCLUDED */ #endif

File diff suppressed because it is too large Load diff

View file

@ -1,281 +1,357 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512ERINTRIN_H_INCLUDED #ifndef _AVX512ERINTRIN_H_INCLUDED
#define _AVX512ERINTRIN_H_INCLUDED #define _AVX512ERINTRIN_H_INCLUDED
#ifndef __AVX512ER__ #ifndef __AVX512ER__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512er") #pragma GCC target("avx512er")
#define __DISABLE_AVX512ER__ #define __DISABLE_AVX512ER__
#endif /* __AVX512ER__ */ #endif
typedef double __v8df __attribute__ ((__vector_size__ (64))); typedef double __v8df __attribute__ ((__vector_size__ (64)));
typedef float __v16sf __attribute__ ((__vector_size__ (64))); typedef float __v16sf __attribute__ ((__vector_size__ (64)));
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
typedef unsigned char __mmask8; typedef unsigned char __mmask8;
typedef unsigned short __mmask16; typedef unsigned short __mmask16;
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m512d _mm512_exp2a23_round_pd(__m512d __A, int __R) { extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_exp2a23_round_pd (__m512d __A, int __R)
{
__m512d __W; __m512d __W;
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W, return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) -1, __R); (__mmask8) -1, __R);
} }
extern __inline __m512d
__funline __m512d _mm512_mask_exp2a23_round_pd(__m512d __W, __mmask8 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512d __A, int __R) { _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
return (__m512d)__builtin_ia32_exp2pd_mask((__v8df)__A, (__v8df)__W, {
return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U, __R); (__mmask8) __U, __R);
} }
extern __inline __m512d
__funline __m512d _mm512_maskz_exp2a23_round_pd(__mmask8 __U, __m512d __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __R) { _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
return (__m512d)__builtin_ia32_exp2pd_mask( {
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R); return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
(__v8df) _mm512_setzero_pd (),
(__mmask8) __U, __R);
} }
extern __inline __m512
__funline __m512 _mm512_exp2a23_round_ps(__m512 __A, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_exp2a23_round_ps (__m512 __A, int __R)
{
__m512 __W; __m512 __W;
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W, return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) -1, __R); (__mmask16) -1, __R);
} }
extern __inline __m512
__funline __m512 _mm512_mask_exp2a23_round_ps(__m512 __W, __mmask16 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512 __A, int __R) { _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
return (__m512)__builtin_ia32_exp2ps_mask((__v16sf)__A, (__v16sf)__W, {
return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U, __R); (__mmask16) __U, __R);
} }
extern __inline __m512
__funline __m512 _mm512_maskz_exp2a23_round_ps(__mmask16 __U, __m512 __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __R) { _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
return (__m512)__builtin_ia32_exp2ps_mask( {
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R); return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U, __R);
} }
extern __inline __m512d
__funline __m512d _mm512_rcp28_round_pd(__m512d __A, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_pd (__m512d __A, int __R)
{
__m512d __W; __m512d __W;
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W, return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) -1, __R); (__mmask8) -1, __R);
} }
extern __inline __m512d
__funline __m512d _mm512_mask_rcp28_round_pd(__m512d __W, __mmask8 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512d __A, int __R) { _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
return (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)__A, (__v8df)__W, {
return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U, __R); (__mmask8) __U, __R);
} }
extern __inline __m512d
__funline __m512d _mm512_maskz_rcp28_round_pd(__mmask8 __U, __m512d __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __R) { _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
return (__m512d)__builtin_ia32_rcp28pd_mask( {
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R); return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
(__v8df) _mm512_setzero_pd (),
(__mmask8) __U, __R);
} }
extern __inline __m512
__funline __m512 _mm512_rcp28_round_ps(__m512 __A, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_ps (__m512 __A, int __R)
{
__m512 __W; __m512 __W;
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W, return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) -1, __R); (__mmask16) -1, __R);
} }
extern __inline __m512
__funline __m512 _mm512_mask_rcp28_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __R) { _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
return (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)__A, (__v16sf)__W, {
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U, __R); (__mmask16) __U, __R);
} }
extern __inline __m512
__funline __m512 _mm512_maskz_rcp28_round_ps(__mmask16 __U, __m512 __A, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m512)__builtin_ia32_rcp28ps_mask( _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R); {
return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U, __R);
} }
extern __inline __m128d
__funline __m128d _mm_rcp28_round_sd(__m128d __A, __m128d __B, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m128d)__builtin_ia32_rcp28sd_round((__v2df)__B, (__v2df)__A, __R); _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
{
return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
(__v2df) __A,
__R);
} }
extern __inline __m128d
__funline __m128 _mm_rcp28_round_ss(__m128 __A, __m128 __B, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m128)__builtin_ia32_rcp28ss_round((__v4sf)__B, (__v4sf)__A, __R); _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
__m128d __B, int __R)
{
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
(__v2df) __A,
(__v2df) __W,
__U,
__R);
} }
extern __inline __m128d
__funline __m512d _mm512_rsqrt28_round_pd(__m512d __A, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
{
return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
(__v2df) __A,
(__v2df)
_mm_setzero_pd (),
__U,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
{
return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
(__v4sf) __A,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
__m128 __B, int __R)
{
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
(__v4sf) __A,
(__v4sf) __W,
__U,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
{
return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
(__v4sf) __A,
(__v4sf)
_mm_setzero_ps (),
__U,
__R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
{
__m512d __W; __m512d __W;
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W, return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) -1, __R); (__mmask8) -1, __R);
} }
extern __inline __m512d
__funline __m512d _mm512_mask_rsqrt28_round_pd(__m512d __W, __mmask8 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512d __A, int __R) { _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
return (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)__A, (__v8df)__W, {
return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U, __R); (__mmask8) __U, __R);
} }
extern __inline __m512d
__funline __m512d _mm512_maskz_rsqrt28_round_pd(__mmask8 __U, __m512d __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __R) { _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
return (__m512d)__builtin_ia32_rsqrt28pd_mask( {
(__v8df)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U, __R); return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
(__v8df) _mm512_setzero_pd (),
(__mmask8) __U, __R);
} }
extern __inline __m512
__funline __m512 _mm512_rsqrt28_round_ps(__m512 __A, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_ps (__m512 __A, int __R)
{
__m512 __W; __m512 __W;
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W, return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) -1, __R); (__mmask16) -1, __R);
} }
extern __inline __m512
__funline __m512 _mm512_mask_rsqrt28_round_ps(__m512 __W, __mmask16 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512 __A, int __R) { _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
return (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)__A, (__v16sf)__W, {
return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U, __R); (__mmask16) __U, __R);
} }
extern __inline __m512
__funline __m512 _mm512_maskz_rsqrt28_round_ps(__mmask16 __U, __m512 __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __R) { _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
return (__m512)__builtin_ia32_rsqrt28ps_mask( {
(__v16sf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U, __R); return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U, __R);
} }
extern __inline __m128d
__funline __m128d _mm_rsqrt28_round_sd(__m128d __A, __m128d __B, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)__B, (__v2df)__A, __R); _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
{
return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
(__v2df) __A,
__R);
} }
extern __inline __m128d
__funline __m128 _mm_rsqrt28_round_ss(__m128 __A, __m128 __B, int __R) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)__B, (__v4sf)__A, __R); _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
__m128d __B, int __R)
{
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
(__v2df) __A,
(__v2df) __W,
__U,
__R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
{
return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
(__v2df) __A,
(__v2df)
_mm_setzero_pd (),
__U,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
{
return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
(__v4sf) __A,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
__m128 __B, int __R)
{
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
(__v4sf) __A,
(__v4sf) __W,
__U,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
{
return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
(__v4sf) __A,
(__v4sf)
_mm_setzero_ps (),
__U,
__R);
} }
#else #else
#define _mm512_exp2a23_round_pd(A, C) \ #define _mm512_exp2a23_round_pd(A, C) __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) #define _mm512_mask_exp2a23_round_pd(W, U, A, C) __builtin_ia32_exp2pd_mask(A, W, U, C)
#define _mm512_maskz_exp2a23_round_pd(U, A, C) __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \ #define _mm512_exp2a23_round_ps(A, C) __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
__builtin_ia32_exp2pd_mask(A, W, U, C) #define _mm512_mask_exp2a23_round_ps(W, U, A, C) __builtin_ia32_exp2ps_mask(A, W, U, C)
#define _mm512_maskz_exp2a23_round_ps(U, A, C) __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_maskz_exp2a23_round_pd(U, A, C) \ #define _mm512_rcp28_round_pd(A, C) __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
__builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) #define _mm512_mask_rcp28_round_pd(W, U, A, C) __builtin_ia32_rcp28pd_mask(A, W, U, C)
#define _mm512_maskz_rcp28_round_pd(U, A, C) __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_exp2a23_round_ps(A, C) \ #define _mm512_rcp28_round_ps(A, C) __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) #define _mm512_mask_rcp28_round_ps(W, U, A, C) __builtin_ia32_rcp28ps_mask(A, W, U, C)
#define _mm512_maskz_rcp28_round_ps(U, A, C) __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \ #define _mm512_rsqrt28_round_pd(A, C) __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
__builtin_ia32_exp2ps_mask(A, W, U, C) #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_maskz_exp2a23_round_ps(U, A, C) \ #define _mm512_rsqrt28_round_ps(A, C) __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_rcp28_round_pd(A, C) \
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
__builtin_ia32_rcp28pd_mask(A, W, U, C)
#define _mm512_maskz_rcp28_round_pd(U, A, C) \
__builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_rcp28_round_ps(A, C) \
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
__builtin_ia32_rcp28ps_mask(A, W, U, C)
#define _mm512_maskz_rcp28_round_ps(U, A, C) \
__builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_rsqrt28_round_pd(A, C) \
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
__builtin_ia32_rsqrt28pd_mask(A, W, U, C)
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
__builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_rsqrt28_round_ps(A, C) \
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
__builtin_ia32_rsqrt28ps_mask(A, W, U, C)
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
__builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_rcp28_round_sd(A, B, R) __builtin_ia32_rcp28sd_round(A, B, R) #define _mm_rcp28_round_sd(A, B, R) __builtin_ia32_rcp28sd_round(A, B, R)
#define _mm_mask_rcp28_round_sd(W, U, A, B, R) __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
#define _mm_maskz_rcp28_round_sd(U, A, B, R) __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (R))
#define _mm_rcp28_round_ss(A, B, R) __builtin_ia32_rcp28ss_round(A, B, R) #define _mm_rcp28_round_ss(A, B, R) __builtin_ia32_rcp28ss_round(A, B, R)
#define _mm_mask_rcp28_round_ss(W, U, A, B, R) __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
#define _mm_maskz_rcp28_round_ss(U, A, B, R) __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (R))
#define _mm_rsqrt28_round_sd(A, B, R) __builtin_ia32_rsqrt28sd_round(A, B, R) #define _mm_rsqrt28_round_sd(A, B, R) __builtin_ia32_rsqrt28sd_round(A, B, R)
#define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
#define _mm_maskz_rsqrt28_round_sd(U, A, B, R) __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), (U), (R))
#define _mm_rsqrt28_round_ss(A, B, R) __builtin_ia32_rsqrt28ss_round(A, B, R) #define _mm_rsqrt28_round_ss(A, B, R) __builtin_ia32_rsqrt28ss_round(A, B, R)
#define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
#define _mm_maskz_rsqrt28_round_ss(U, A, B, R) __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), (U), (R))
#endif #endif
#define _mm_mask_rcp28_sd(W, U, A, B) _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm512_exp2a23_pd(A) \ #define _mm_maskz_rcp28_sd(U, A, B) _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
_mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION) #define _mm_mask_rcp28_ss(W, U, A, B) _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_rcp28_ss(U, A, B) _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_exp2a23_pd(W, U, A) \ #define _mm_mask_rsqrt28_sd(W, U, A, B) _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
_mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) #define _mm_maskz_rsqrt28_sd(U, A, B) _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_mask_rsqrt28_ss(W, U, A, B) _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_exp2a23_pd(U, A) \ #define _mm_maskz_rsqrt28_ss(U, A, B) _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
_mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) #define _mm512_exp2a23_pd(A) _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_exp2a23_pd(W, U, A) _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_exp2a23_ps(A) \ #define _mm512_maskz_exp2a23_pd(U, A) _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
_mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION) #define _mm512_exp2a23_ps(A) _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_exp2a23_ps(W, U, A) _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_exp2a23_ps(W, U, A) \ #define _mm512_maskz_exp2a23_ps(U, A) _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
_mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_exp2a23_ps(U, A) \
_mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_rcp28_pd(A) _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION) #define _mm512_rcp28_pd(A) _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rcp28_pd(W, U, A) _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rcp28_pd(W, U, A) \ #define _mm512_maskz_rcp28_pd(U, A) _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
_mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_rcp28_pd(U, A) \
_mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_rcp28_ps(A) _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION) #define _mm512_rcp28_ps(A) _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rcp28_ps(W, U, A) _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rcp28_ps(W, U, A) \ #define _mm512_maskz_rcp28_ps(U, A) _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
_mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) #define _mm512_rsqrt28_pd(A) _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rsqrt28_pd(W, U, A) _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_rcp28_ps(U, A) \ #define _mm512_maskz_rsqrt28_pd(U, A) _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
_mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) #define _mm512_rsqrt28_ps(A) _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rsqrt28_ps(W, U, A) _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_rsqrt28_pd(A) \ #define _mm512_maskz_rsqrt28_ps(U, A) _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
_mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION) #define _mm_rcp28_sd(A, B) __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_ss(A, B) __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rsqrt28_pd(W, U, A) \ #define _mm_rsqrt28_sd(A, B) __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
_mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) #define _mm_rsqrt28_ss(A, B) __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_rsqrt28_pd(U, A) \
_mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_rsqrt28_ps(A) \
_mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_mask_rsqrt28_ps(W, U, A) \
_mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm512_maskz_rsqrt28_ps(U, A) \
_mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_sd(A, B) \
__builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_ss(A, B) \
__builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_sd(A, B) \
__builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_ss(A, B) \
__builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
#ifdef __DISABLE_AVX512ER__ #ifdef __DISABLE_AVX512ER__
#undef __DISABLE_AVX512ER__ #undef __DISABLE_AVX512ER__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512ER__ */ #endif
#endif
#endif /* _AVX512ERINTRIN_H_INCLUDED */ #endif

File diff suppressed because it is too large Load diff

View file

@ -1,53 +1,74 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead." #error "Never use <avx512ifmaintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512IFMAINTRIN_H_INCLUDED #ifndef _AVX512IFMAINTRIN_H_INCLUDED
#define _AVX512IFMAINTRIN_H_INCLUDED #define _AVX512IFMAINTRIN_H_INCLUDED
#ifndef __AVX512IFMA__ #ifndef __AVX512IFMA__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512ifma") #pragma GCC target("avx512ifma")
#define __DISABLE_AVX512IFMA__ #define __DISABLE_AVX512IFMA__
#endif /* __AVX512IFMA__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_madd52lo_epu64(__m512i __X, __m512i __Y, __m512i __Z) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpmadd52luq512_mask((__v8di)__X, (__v8di)__Y, _mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
(__v8di)__Z, (__mmask8)-1); {
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __Z,
(__mmask8) -1);
} }
extern __inline __m512i
__funline __m512i _mm512_madd52hi_epu64(__m512i __X, __m512i __Y, __m512i __Z) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpmadd52huq512_mask((__v8di)__X, (__v8di)__Y, _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
(__v8di)__Z, (__mmask8)-1); {
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __Z,
(__mmask8) -1);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_madd52lo_epu64(__m512i __W, __mmask8 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __X, __m512i __Y) { _mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
return (__m512i)__builtin_ia32_vpmadd52luq512_mask( __m512i __Y)
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M); {
return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W,
(__v8di) __X,
(__v8di) __Y,
(__mmask8) __M);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_madd52hi_epu64(__m512i __W, __mmask8 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __X, __m512i __Y) { _mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X,
return (__m512i)__builtin_ia32_vpmadd52huq512_mask( __m512i __Y)
(__v8di)__W, (__v8di)__X, (__v8di)__Y, (__mmask8)__M); {
return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W,
(__v8di) __X,
(__v8di) __Y,
(__mmask8) __M);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_madd52lo_epu64(__mmask8 __M, __m512i __X, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __Y, __m512i __Z) { _mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
return (__m512i)__builtin_ia32_vpmadd52luq512_maskz( {
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M); return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X,
(__v8di) __Y,
(__v8di) __Z,
(__mmask8) __M);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_madd52hi_epu64(__mmask8 __M, __m512i __X, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __Y, __m512i __Z) { _mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z)
return (__m512i)__builtin_ia32_vpmadd52huq512_maskz( {
(__v8di)__X, (__v8di)__Y, (__v8di)__Z, (__mmask8)__M); return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X,
(__v8di) __Y,
(__v8di) __Z,
(__mmask8) __M);
} }
#ifdef __DISABLE_AVX512IFMA__ #ifdef __DISABLE_AVX512IFMA__
#undef __DISABLE_AVX512IFMA__ #undef __DISABLE_AVX512IFMA__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512IFMA__ */ #endif
#endif
#endif /* _AVX512IFMAINTRIN_H_INCLUDED */ #endif

View file

@ -1,88 +1,128 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error \ #error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
"Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED #ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
#define _AVX512IFMAVLINTRIN_H_INCLUDED #define _AVX512IFMAVLINTRIN_H_INCLUDED
#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) #if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512ifma,avx512vl") #pragma GCC target("avx512ifma,avx512vl")
#define __DISABLE_AVX512IFMAVL__ #define __DISABLE_AVX512IFMAVL__
#endif /* __AVX512IFMAVL__ */ #endif
extern __inline __m128i
__funline __m128i _mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpmadd52luq128_mask((__v2di)__X, (__v2di)__Y, _mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
(__v2di)__Z, (__mmask8)-1); {
return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X,
(__v2di) __Y,
(__v2di) __Z,
(__mmask8) -1);
} }
extern __inline __m128i
__funline __m128i _mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpmadd52huq128_mask((__v2di)__X, (__v2di)__Y, _mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
(__v2di)__Z, (__mmask8)-1); {
return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X,
(__v2di) __Y,
(__v2di) __Z,
(__mmask8) -1);
} }
extern __inline __m256i
__funline __m256i _mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpmadd52luq256_mask((__v4di)__X, (__v4di)__Y, _mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
(__v4di)__Z, (__mmask8)-1); {
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X,
(__v4di) __Y,
(__v4di) __Z,
(__mmask8) -1);
} }
extern __inline __m256i
__funline __m256i _mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpmadd52huq256_mask((__v4di)__X, (__v4di)__Y, _mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
(__v4di)__Z, (__mmask8)-1); {
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X,
(__v4di) __Y,
(__v4di) __Z,
(__mmask8) -1);
} }
extern __inline __m128i
__funline __m128i _mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m128i __Y) { _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
return (__m128i)__builtin_ia32_vpmadd52luq128_mask( {
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M); return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W,
(__v2di) __X,
(__v2di) __Y,
(__mmask8) __M);
} }
extern __inline __m128i
__funline __m128i _mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m128i __Y) { _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
return (__m128i)__builtin_ia32_vpmadd52huq128_mask( {
(__v2di)__W, (__v2di)__X, (__v2di)__Y, (__mmask8)__M); return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W,
(__v2di) __X,
(__v2di) __Y,
(__mmask8) __M);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_madd52lo_epu64(__m256i __W, __mmask8 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __X, __m256i __Y) { _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
return (__m256i)__builtin_ia32_vpmadd52luq256_mask( __m256i __Y)
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M); {
return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W,
(__v4di) __X,
(__v4di) __Y,
(__mmask8) __M);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_madd52hi_epu64(__m256i __W, __mmask8 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __X, __m256i __Y) { _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X,
return (__m256i)__builtin_ia32_vpmadd52huq256_mask( __m256i __Y)
(__v4di)__W, (__v4di)__X, (__v4di)__Y, (__mmask8)__M); {
return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W,
(__v4di) __X,
(__v4di) __Y,
(__mmask8) __M);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m128i __Z) { _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
return (__m128i)__builtin_ia32_vpmadd52luq128_maskz( {
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M); return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X,
(__v2di) __Y,
(__v2di) __Z,
(__mmask8) __M);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m128i __Z) { _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
return (__m128i)__builtin_ia32_vpmadd52huq128_maskz( {
(__v2di)__X, (__v2di)__Y, (__v2di)__Z, (__mmask8)__M); return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X,
(__v2di) __Y,
(__v2di) __Z,
(__mmask8) __M);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_madd52lo_epu64(__mmask8 __M, __m256i __X, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __Y, __m256i __Z) { _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
return (__m256i)__builtin_ia32_vpmadd52luq256_maskz( {
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M); return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X,
(__v4di) __Y,
(__v4di) __Z,
(__mmask8) __M);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_madd52hi_epu64(__mmask8 __M, __m256i __X, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __Y, __m256i __Z) { _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
return (__m256i)__builtin_ia32_vpmadd52huq256_maskz( {
(__v4di)__X, (__v4di)__Y, (__v4di)__Z, (__mmask8)__M); return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X,
(__v4di) __Y,
(__v4di) __Z,
(__mmask8) __M);
} }
#ifdef __DISABLE_AVX512IFMAVL__ #ifdef __DISABLE_AVX512IFMAVL__
#undef __DISABLE_AVX512IFMAVL__ #undef __DISABLE_AVX512IFMAVL__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512IFMAVL__ */ #endif
#endif
#endif /* _AVX512IFMAVLINTRIN_H_INCLUDED */ #endif

View file

@ -1,190 +1,170 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead." #error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512PFINTRIN_H_INCLUDED #ifndef _AVX512PFINTRIN_H_INCLUDED
#define _AVX512PFINTRIN_H_INCLUDED #define _AVX512PFINTRIN_H_INCLUDED
#ifndef __AVX512PF__ #ifndef __AVX512PF__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512pf") #pragma GCC target("avx512pf")
#define __DISABLE_AVX512PF__ #define __DISABLE_AVX512PF__
#endif /* __AVX512PF__ */ #endif
typedef long long __v8di __attribute__ ((__vector_size__ (64))); typedef long long __v8di __attribute__ ((__vector_size__ (64)));
typedef int __v16si __attribute__ ((__vector_size__ (64))); typedef int __v16si __attribute__ ((__vector_size__ (64)));
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
typedef unsigned char __mmask8; typedef unsigned char __mmask8;
typedef unsigned short __mmask16; typedef unsigned short __mmask16;
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline void _mm512_prefetch_i32gather_pd(__m256i __index, void const *__addr, extern __inline void
int __scale, int __hint) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale, _mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
__hint); int __scale, int __hint)
{
__builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
__scale, __hint);
} }
extern __inline void
__funline void _mm512_prefetch_i32gather_ps(__m512i __index, void const *__addr, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __scale, int __hint) { _mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
int __scale, int __hint)
{
__builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr, __builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
__scale, __hint); __scale, __hint);
} }
extern __inline void
__funline void _mm512_mask_prefetch_i32gather_pd(__m256i __index, __mmask8 __mask, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
void const *__addr, int __scale, _mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
int __hint) { void const *__addr, int __scale, int __hint)
__builtin_ia32_gatherpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint); {
} __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index, __addr, __scale,
__funline void _mm512_mask_prefetch_i32gather_ps(__m512i __index,
__mmask16 __mask,
void const *__addr, int __scale,
int __hint) {
__builtin_ia32_gatherpfdps(__mask, (__v16si)__index, __addr, __scale, __hint);
}
__funline void _mm512_prefetch_i64gather_pd(__m512i __index, void const *__addr,
int __scale, int __hint) {
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale,
__hint); __hint);
} }
extern __inline void
__funline void _mm512_prefetch_i64gather_ps(__m512i __index, void const *__addr, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __scale, int __hint) { _mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale, void const *__addr, int __scale, int __hint)
{
__builtin_ia32_gatherpfdps (__mask, (__v16si) __index, __addr, __scale,
__hint); __hint);
} }
extern __inline void
__funline void _mm512_mask_prefetch_i64gather_pd(__m512i __index, __mmask8 __mask, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
void const *__addr, int __scale, _mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
int __hint) { int __scale, int __hint)
__builtin_ia32_gatherpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint); {
__builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
__scale, __hint);
} }
extern __inline void
__funline void _mm512_mask_prefetch_i64gather_ps(__m512i __index, __mmask8 __mask, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
void const *__addr, int __scale, _mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
int __hint) { int __scale, int __hint)
__builtin_ia32_gatherpfqps(__mask, (__v8di)__index, __addr, __scale, __hint); {
__builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
__scale, __hint);
} }
extern __inline void
__funline void _mm512_prefetch_i32scatter_pd(void *__addr, __m256i __index, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __scale, int __hint) { _mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)__index, __addr, __scale, void const *__addr, int __scale, int __hint)
{
__builtin_ia32_gatherpfqpd (__mask, (__v8di) __index, __addr, __scale,
__hint); __hint);
} }
extern __inline void
__funline void _mm512_prefetch_i32scatter_ps(void *__addr, __m512i __index, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __scale, int __hint) { _mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
void const *__addr, int __scale, int __hint)
{
__builtin_ia32_gatherpfqps (__mask, (__v8di) __index, __addr, __scale,
__hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
int __hint)
{
__builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
__scale, __hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
int __hint)
{
__builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr, __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
__scale, __hint); __scale, __hint);
} }
extern __inline void
__funline void _mm512_mask_prefetch_i32scatter_pd(void *__addr, __mmask8 __mask, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __index, int __scale, _mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
int __hint) { __m256i __index, int __scale, int __hint)
__builtin_ia32_scatterpfdpd(__mask, (__v8si)__index, __addr, __scale, __hint); {
__builtin_ia32_scatterpfdpd (__mask, (__v8si) __index, __addr, __scale,
__hint);
} }
extern __inline void
__funline void _mm512_mask_prefetch_i32scatter_ps(void *__addr, __mmask16 __mask, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __index, int __scale, _mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
int __hint) { __m512i __index, int __scale, int __hint)
{
__builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale, __builtin_ia32_scatterpfdps (__mask, (__v16si) __index, __addr, __scale,
__hint); __hint);
} }
extern __inline void
__funline void _mm512_prefetch_i64scatter_pd(void *__addr, __m512i __index, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __scale, int __hint) { _mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)__index, __addr, __scale, int __hint)
{
__builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,__addr,
__scale, __hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
int __hint)
{
__builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
__scale, __hint);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask8 __mask,
__m512i __index, int __scale, int __hint)
{
__builtin_ia32_scatterpfqpd (__mask, (__v8di) __index, __addr, __scale,
__hint); __hint);
} }
extern __inline void
__funline void _mm512_prefetch_i64scatter_ps(void *__addr, __m512i __index, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
int __scale, int __hint) { _mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask8 __mask,
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)__index, __addr, __scale, __m512i __index, int __scale, int __hint)
{
__builtin_ia32_scatterpfqps (__mask, (__v8di) __index, __addr, __scale,
__hint); __hint);
} }
__funline void _mm512_mask_prefetch_i64scatter_pd(void *__addr, __mmask8 __mask,
__m512i __index, int __scale,
int __hint) {
__builtin_ia32_scatterpfqpd(__mask, (__v8di)__index, __addr, __scale, __hint);
}
__funline void _mm512_mask_prefetch_i64scatter_ps(void *__addr, __mmask8 __mask,
__m512i __index, int __scale,
int __hint) {
__builtin_ia32_scatterpfqps(__mask, (__v8di)__index, __addr, __scale, __hint);
}
#else #else
#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \ #define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
__builtin_ia32_gatherpfdpd((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \ #define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
(void const *)ADDR, (int)SCALE, (int)HINT) #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX), (void const *) (ADDR), (int) (SCALE), (int) (HINT))
#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \ #define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
__builtin_ia32_gatherpfdps((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \ #define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
(void const *)ADDR, (int)SCALE, (int)HINT) #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) __builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ #define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
__builtin_ia32_gatherpfdpd((__mmask8)MASK, (__v8si)(__m256i)INDEX, \ #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
(void const *)ADDR, (int)SCALE, (int)HINT) #define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
#define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \ #define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
__builtin_ia32_gatherpfdps((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
(void const *)ADDR, (int)SCALE, (int)HINT) #define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) __builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), (void *) (ADDR), (int) (SCALE), (int) (HINT))
#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \
__builtin_ia32_gatherpfqpd((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \
__builtin_ia32_gatherpfqps((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \
__builtin_ia32_gatherpfqpd((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT) \
__builtin_ia32_gatherpfqps((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfdpd((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfdps((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfdpd((__mmask8)MASK, (__v8si)(__m256i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfdps((__mmask16)MASK, (__v16si)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfqpd((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfqps((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfqpd((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT) \
__builtin_ia32_scatterpfqps((__mmask8)MASK, (__v8di)(__m512i)INDEX, \
(void *)ADDR, (int)SCALE, (int)HINT)
#endif #endif
#ifdef __DISABLE_AVX512PF__ #ifdef __DISABLE_AVX512PF__
#undef __DISABLE_AVX512PF__ #undef __DISABLE_AVX512PF__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512PF__ */ #endif
#endif
#endif /* _AVX512PFINTRIN_H_INCLUDED */ #endif

View file

@ -1,381 +1,407 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error \ #error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
"Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef __AVX512VBMI2INTRIN_H_INCLUDED #ifndef __AVX512VBMI2INTRIN_H_INCLUDED
#define __AVX512VBMI2INTRIN_H_INCLUDED #define __AVX512VBMI2INTRIN_H_INCLUDED
#if !defined(__AVX512VBMI2__) #if !defined(__AVX512VBMI2__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512vbmi2") #pragma GCC target("avx512vbmi2")
#define __DISABLE_AVX512VBMI2__ #define __DISABLE_AVX512VBMI2__
#endif /* __AVX512VBMI2__ */ #endif
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m512i _mm512_shrdi_epi16(__m512i __A, __m512i __B, int __C) { extern __inline __m512i
return (__m512i)__builtin_ia32_vpshrd_v32hi((__v32hi)__A, (__v32hi)__B, __C); __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdi_epi16 (__m512i __A, __m512i __B, int __C)
{
return (__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)__A, (__v32hi) __B,
__C);
} }
extern __inline __m512i
__funline __m512i _mm512_shrdi_epi32(__m512i __A, __m512i __B, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpshrd_v16si((__v16si)__A, (__v16si)__B, __C); _mm512_shrdi_epi32 (__m512i __A, __m512i __B, int __C)
{
return (__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)__A, (__v16si) __B,
__C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shrdi_epi32(__m512i __A, __mmask16 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D, int __E) { _mm512_mask_shrdi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
return (__m512i)__builtin_ia32_vpshrd_v16si_mask( int __E)
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B); {
return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__C,
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shrdi_epi32(__mmask16 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, int __D) { _mm512_maskz_shrdi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
return (__m512i)__builtin_ia32_vpshrd_v16si_mask( {
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(), return (__m512i)__builtin_ia32_vpshrd_v16si_mask ((__v16si)__B,
(__mmask16)__A); (__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_shrdi_epi64(__m512i __A, __m512i __B, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdi_epi64 (__m512i __A, __m512i __B, int __C)
{
return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C); return (__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)__A, (__v8di) __B, __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shrdi_epi64(__m512i __A, __mmask8 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D, int __E) { _mm512_mask_shrdi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
return (__m512i)__builtin_ia32_vpshrd_v8di_mask((__v8di)__C, (__v8di)__D, __E, int __E)
(__v8di)__A, (__mmask8)__B); {
return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__C, (__v8di) __D,
__E, (__v8di) __A, (__mmask8)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shrdi_epi64(__mmask8 __A, __m512i __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
int __D) { _mm512_maskz_shrdi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
return (__m512i)__builtin_ia32_vpshrd_v8di_mask( {
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(), return (__m512i)__builtin_ia32_vpshrd_v8di_mask ((__v8di)__B, (__v8di) __C,
(__mmask8)__A); __D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_shldi_epi16(__m512i __A, __m512i __B, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpshld_v32hi((__v32hi)__A, (__v32hi)__B, __C); _mm512_shldi_epi16 (__m512i __A, __m512i __B, int __C)
{
return (__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)__A, (__v32hi) __B,
__C);
} }
extern __inline __m512i
__funline __m512i _mm512_shldi_epi32(__m512i __A, __m512i __B, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpshld_v16si((__v16si)__A, (__v16si)__B, __C); _mm512_shldi_epi32 (__m512i __A, __m512i __B, int __C)
{
return (__m512i) __builtin_ia32_vpshld_v16si ((__v16si)__A, (__v16si) __B,
__C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shldi_epi32(__m512i __A, __mmask16 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D, int __E) { _mm512_mask_shldi_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D,
return (__m512i)__builtin_ia32_vpshld_v16si_mask( int __E)
(__v16si)__C, (__v16si)__D, __E, (__v16si)__A, (__mmask16)__B); {
return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__C,
(__v16si) __D, __E, (__v16si) __A, (__mmask16)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shldi_epi32(__mmask16 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, int __D) { _mm512_maskz_shldi_epi32 (__mmask16 __A, __m512i __B, __m512i __C, int __D)
return (__m512i)__builtin_ia32_vpshld_v16si_mask( {
(__v16si)__B, (__v16si)__C, __D, (__v16si)_mm512_setzero_si512(), return (__m512i)__builtin_ia32_vpshld_v16si_mask ((__v16si)__B,
(__mmask16)__A); (__v16si) __C, __D, (__v16si) _mm512_setzero_si512 (), (__mmask16)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_shldi_epi64(__m512i __A, __m512i __B, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldi_epi64 (__m512i __A, __m512i __B, int __C)
{
return (__m512i) __builtin_ia32_vpshld_v8di ((__v8di)__A, (__v8di) __B, __C); return (__m512i) __builtin_ia32_vpshld_v8di ((__v8di)__A, (__v8di) __B, __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shldi_epi64(__m512i __A, __mmask8 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D, int __E) { _mm512_mask_shldi_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D,
return (__m512i)__builtin_ia32_vpshld_v8di_mask((__v8di)__C, (__v8di)__D, __E, int __E)
(__v8di)__A, (__mmask8)__B); {
return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__C, (__v8di) __D,
__E, (__v8di) __A, (__mmask8)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shldi_epi64(__mmask8 __A, __m512i __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
int __D) { _mm512_maskz_shldi_epi64 (__mmask8 __A, __m512i __B, __m512i __C, int __D)
return (__m512i)__builtin_ia32_vpshld_v8di_mask( {
(__v8di)__B, (__v8di)__C, __D, (__v8di)_mm512_setzero_si512(), return (__m512i)__builtin_ia32_vpshld_v8di_mask ((__v8di)__B, (__v8di) __C,
(__mmask8)__A); __D, (__v8di) _mm512_setzero_si512 (), (__mmask8)__A);
} }
#else #else
#define _mm512_shrdi_epi16(A, B, C) \ #define _mm512_shrdi_epi16(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), (__v32hi)(__m512i)(B),(int)(C)))
((__m512i) __builtin_ia32_vpshrd_v32hi ((__v32hi)(__m512i)(A), \ #define _mm512_shrdi_epi32(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B),(int)(C)))
(__v32hi)(__m512i)(B),(int)(C)) #define _mm512_mask_shrdi_epi32(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), (__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A), (__mmask16)(B)))
#define _mm512_shrdi_epi32(A, B, C) \ #define _mm512_maskz_shrdi_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), (__v16si)(__m512i)(C),(int)(D), (__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)))
((__m512i) __builtin_ia32_vpshrd_v16si ((__v16si)(__m512i)(A), \ #define _mm512_shrdi_epi64(A, B, C) ((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B),(int)(C)))
(__v16si)(__m512i)(B),(int)(C)) #define _mm512_mask_shrdi_epi64(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), (__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A), (__mmask8)(B)))
#define _mm512_mask_shrdi_epi32(A, B, C, D, E) \ #define _mm512_maskz_shrdi_epi64(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), (__v8di)(__m512i)(C),(int)(D), (__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A)))
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(C), \ #define _mm512_shldi_epi16(A, B, C) ((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), (__v32hi)(__m512i)(B),(int)(C)))
(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B)) #define _mm512_shldi_epi32(A, B, C) ((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), (__v16si)(__m512i)(B),(int)(C)))
#define _mm512_maskz_shrdi_epi32(A, B, C, D) \ #define _mm512_mask_shldi_epi32(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), (__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A), (__mmask16)(B)))
((__m512i) __builtin_ia32_vpshrd_v16si_mask ((__v16si)(__m512i)(B), \ #define _mm512_maskz_shldi_epi32(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), (__v16si)(__m512i)(C),(int)(D), (__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)))
(__v16si)(__m512i)(C),(int)(D), \ #define _mm512_shldi_epi64(A, B, C) ((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (int)(C)))
(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A)) #define _mm512_mask_shldi_epi64(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), (__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A), (__mmask8)(B)))
#define _mm512_shrdi_epi64(A, B, C) \ #define _mm512_maskz_shldi_epi64(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), (__v8di)(__m512i)(C),(int)(D), (__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A)))
((__m512i) __builtin_ia32_vpshrd_v8di ((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B),(int)(C))
#define _mm512_mask_shrdi_epi64(A, B, C, D, E) \
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(C), \
(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
#define _mm512_maskz_shrdi_epi64(A, B, C, D) \
((__m512i) __builtin_ia32_vpshrd_v8di_mask ((__v8di)(__m512i)(B), \
(__v8di)(__m512i)(C),(int)(D), \
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
#define _mm512_shldi_epi16(A, B, C) \
((__m512i) __builtin_ia32_vpshld_v32hi ((__v32hi)(__m512i)(A), \
(__v32hi)(__m512i)(B),(int)(C))
#define _mm512_shldi_epi32(A, B, C) \
((__m512i) __builtin_ia32_vpshld_v16si ((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B),(int)(C))
#define _mm512_mask_shldi_epi32(A, B, C, D, E) \
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(C), \
(__v16si)(__m512i)(D), (int)(E), (__v16si)(__m512i)(A),(__mmask16)(B))
#define _mm512_maskz_shldi_epi32(A, B, C, D) \
((__m512i) __builtin_ia32_vpshld_v16si_mask ((__v16si)(__m512i)(B), \
(__v16si)(__m512i)(C),(int)(D), \
(__v16si)(__m512i)_mm512_setzero_si512 (), (__mmask16)(A))
#define _mm512_shldi_epi64(A, B, C) \
((__m512i) __builtin_ia32_vpshld_v8di ((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B),(int)(C))
#define _mm512_mask_shldi_epi64(A, B, C, D, E) \
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(C), \
(__v8di)(__m512i)(D), (int)(E), (__v8di)(__m512i)(A),(__mmask8)(B))
#define _mm512_maskz_shldi_epi64(A, B, C, D) \
((__m512i) __builtin_ia32_vpshld_v8di_mask ((__v8di)(__m512i)(B), \
(__v8di)(__m512i)(C),(int)(D), \
(__v8di)(__m512i)_mm512_setzero_si512 (), (__mmask8)(A))
#endif #endif
extern __inline __m512i
__funline __m512i _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdv_epi16 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdv_v32hi ((__v32hi)__A, (__v32hi) __B, return (__m512i) __builtin_ia32_vpshrdv_v32hi ((__v32hi)__A, (__v32hi) __B,
(__v32hi) __C); (__v32hi) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdv_epi32 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdv_v16si ((__v16si)__A, (__v16si) __B, return (__m512i) __builtin_ia32_vpshrdv_v16si ((__v16si)__A, (__v16si) __B,
(__v16si) __C); (__v16si) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D) { _mm512_mask_shrdv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpshrdv_v16si_mask( {
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B); return (__m512i)__builtin_ia32_vpshrdv_v16si_mask ((__v16si)__A,
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shrdv_epi32(__mmask16 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_maskz_shrdv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz( {
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A); return (__m512i)__builtin_ia32_vpshrdv_v16si_maskz ((__v16si)__B,
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shrdv_epi64 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshrdv_v8di ((__v8di)__A, (__v8di) __B, return (__m512i) __builtin_ia32_vpshrdv_v8di ((__v8di)__A, (__v8di) __B,
(__v8di) __C); (__v8di) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D) { _mm512_mask_shrdv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
{
return (__m512i)__builtin_ia32_vpshrdv_v8di_mask ((__v8di)__A, (__v8di) __C, return (__m512i)__builtin_ia32_vpshrdv_v8di_mask ((__v8di)__A, (__v8di) __C,
(__v8di) __D, (__mmask8)__B); (__v8di) __D, (__mmask8)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shrdv_epi64(__mmask8 __A, __m512i __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D) { _mm512_maskz_shrdv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
{
return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz ((__v8di)__B, (__v8di) __C, return (__m512i)__builtin_ia32_vpshrdv_v8di_maskz ((__v8di)__B, (__v8di) __C,
(__v8di) __D, (__mmask8)__A); (__v8di) __D, (__mmask8)__A);
} }
__funline __m512i _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) { extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldv_epi16 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldv_v32hi ((__v32hi)__A, (__v32hi) __B, return (__m512i) __builtin_ia32_vpshldv_v32hi ((__v32hi)__A, (__v32hi) __B,
(__v32hi) __C); (__v32hi) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldv_epi32 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldv_v16si ((__v16si)__A, (__v16si) __B, return (__m512i) __builtin_ia32_vpshldv_v16si ((__v16si)__A, (__v16si) __B,
(__v16si) __C); (__v16si) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D) { _mm512_mask_shldv_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpshldv_v16si_mask( {
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B); return (__m512i)__builtin_ia32_vpshldv_v16si_mask ((__v16si)__A,
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shldv_epi32(__mmask16 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_maskz_shldv_epi32 (__mmask16 __A, __m512i __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpshldv_v16si_maskz( {
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A); return (__m512i)__builtin_ia32_vpshldv_v16si_maskz ((__v16si)__B,
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_shldv_epi64 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpshldv_v8di ((__v8di)__A, (__v8di) __B, return (__m512i) __builtin_ia32_vpshldv_v8di ((__v8di)__A, (__v8di) __B,
(__v8di) __C); (__v8di) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D) { _mm512_mask_shldv_epi64 (__m512i __A, __mmask8 __B, __m512i __C, __m512i __D)
{
return (__m512i)__builtin_ia32_vpshldv_v8di_mask ((__v8di)__A, (__v8di) __C, return (__m512i)__builtin_ia32_vpshldv_v8di_mask ((__v8di)__A, (__v8di) __C,
(__v8di) __D, (__mmask8)__B); (__v8di) __D, (__mmask8)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shldv_epi64(__mmask8 __A, __m512i __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D) { _mm512_maskz_shldv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D)
{
return (__m512i)__builtin_ia32_vpshldv_v8di_maskz ((__v8di)__B, (__v8di) __C, return (__m512i)__builtin_ia32_vpshldv_v8di_maskz ((__v8di)__B, (__v8di) __C,
(__v8di) __D, (__mmask8)__A); (__v8di) __D, (__mmask8)__A);
} }
#ifdef __DISABLE_AVX512VBMI2__ #ifdef __DISABLE_AVX512VBMI2__
#undef __DISABLE_AVX512VBMI2__ #undef __DISABLE_AVX512VBMI2__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512VBMI2__ */ #endif
#if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__) #if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512vbmi2,avx512bw") #pragma GCC target("avx512vbmi2,avx512bw")
#define __DISABLE_AVX512VBMI2BW__ #define __DISABLE_AVX512VBMI2BW__
#endif /* __AVX512VBMI2BW__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_mask_compress_epi8(__m512i __A, __mmask64 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C) { _mm512_mask_compress_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
return (__m512i)__builtin_ia32_compressqi512_mask((__v64qi)__C, (__v64qi)__A, {
(__mmask64)__B); return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__C,
(__v64qi)__A, (__mmask64)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_compress_epi8(__mmask64 __A, __m512i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_compressqi512_mask( _mm512_maskz_compress_epi8 (__mmask64 __A, __m512i __B)
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A); {
return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi)__B,
(__v64qi)_mm512_setzero_si512 (), (__mmask64)__A);
} }
extern __inline void
__funline void _mm512_mask_compressstoreu_epi8(void *__A, __mmask64 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C) { _mm512_mask_compressstoreu_epi8 (void * __A, __mmask64 __B, __m512i __C)
{
__builtin_ia32_compressstoreuqi512_mask ((__v64qi *) __A, (__v64qi) __C, __builtin_ia32_compressstoreuqi512_mask ((__v64qi *) __A, (__v64qi) __C,
(__mmask64) __B); (__mmask64) __B);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_compress_epi16(__m512i __A, __mmask32 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C) { _mm512_mask_compress_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
return (__m512i)__builtin_ia32_compresshi512_mask((__v32hi)__C, (__v32hi)__A, {
(__mmask32)__B); return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__C,
(__v32hi)__A, (__mmask32)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_compress_epi16(__mmask32 __A, __m512i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_compresshi512_mask( _mm512_maskz_compress_epi16 (__mmask32 __A, __m512i __B)
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A); {
return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi)__B,
(__v32hi)_mm512_setzero_si512 (), (__mmask32)__A);
} }
extern __inline void
__funline void _mm512_mask_compressstoreu_epi16(void *__A, __mmask32 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C) { _mm512_mask_compressstoreu_epi16 (void * __A, __mmask32 __B, __m512i __C)
{
__builtin_ia32_compressstoreuhi512_mask ((__v32hi *) __A, (__v32hi) __C, __builtin_ia32_compressstoreuhi512_mask ((__v32hi *) __A, (__v32hi) __C,
(__mmask32) __B); (__mmask32) __B);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_expand_epi8(__m512i __A, __mmask64 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C) { _mm512_mask_expand_epi8 (__m512i __A, __mmask64 __B, __m512i __C)
return (__m512i)__builtin_ia32_expandqi512_mask((__v64qi)__C, (__v64qi)__A, {
return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __C,
(__v64qi) __A,
(__mmask64) __B); (__mmask64) __B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_expand_epi8(__mmask64 __A, __m512i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_expandqi512_maskz( _mm512_maskz_expand_epi8 (__mmask64 __A, __m512i __B)
(__v64qi)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A); {
return (__m512i) __builtin_ia32_expandqi512_maskz ((__v64qi) __B,
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_expandloadu_epi8(__m512i __A, __mmask64 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
const void *__C) { _mm512_mask_expandloadu_epi8 (__m512i __A, __mmask64 __B, const void * __C)
return (__m512i)__builtin_ia32_expandloadqi512_mask( {
(const __v64qi *)__C, (__v64qi)__A, (__mmask64)__B); return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *) __C,
(__v64qi) __A, (__mmask64) __B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_expandloadu_epi8(__mmask64 __A, const void *__B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_expandloadqi512_maskz( _mm512_maskz_expandloadu_epi8 (__mmask64 __A, const void * __B)
(const __v64qi *)__B, (__v64qi)_mm512_setzero_si512(), (__mmask64)__A); {
return (__m512i) __builtin_ia32_expandloadqi512_maskz ((const __v64qi *) __B,
(__v64qi) _mm512_setzero_si512 (), (__mmask64) __A);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_expand_epi16(__m512i __A, __mmask32 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C) { _mm512_mask_expand_epi16 (__m512i __A, __mmask32 __B, __m512i __C)
return (__m512i)__builtin_ia32_expandhi512_mask((__v32hi)__C, (__v32hi)__A, {
return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __C,
(__v32hi) __A,
(__mmask32) __B); (__mmask32) __B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_expand_epi16(__mmask32 __A, __m512i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_expandhi512_maskz( _mm512_maskz_expand_epi16 (__mmask32 __A, __m512i __B)
(__v32hi)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A); {
return (__m512i) __builtin_ia32_expandhi512_maskz ((__v32hi) __B,
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_expandloadu_epi16(__m512i __A, __mmask32 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
const void *__C) { _mm512_mask_expandloadu_epi16 (__m512i __A, __mmask32 __B, const void * __C)
return (__m512i)__builtin_ia32_expandloadhi512_mask( {
(const __v32hi *)__C, (__v32hi)__A, (__mmask32)__B); return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *) __C,
(__v32hi) __A, (__mmask32) __B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_expandloadu_epi16(__mmask32 __A, const void *__B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_expandloadhi512_maskz( _mm512_maskz_expandloadu_epi16 (__mmask32 __A, const void * __B)
(const __v32hi *)__B, (__v32hi)_mm512_setzero_si512(), (__mmask32)__A); {
return (__m512i) __builtin_ia32_expandloadhi512_maskz ((const __v32hi *) __B,
(__v32hi) _mm512_setzero_si512 (), (__mmask32) __A);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m512i _mm512_mask_shrdi_epi16(__m512i __A, __mmask32 __B, __m512i __C, extern __inline __m512i
__m512i __D, int __E) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask( _mm512_mask_shrdi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B); int __E)
{
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__C,
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shrdi_epi16(__mmask32 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, int __D) { _mm512_maskz_shrdi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
return (__m512i)__builtin_ia32_vpshrd_v32hi_mask( {
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(), return (__m512i)__builtin_ia32_vpshrd_v32hi_mask ((__v32hi)__B,
(__mmask32)__A); (__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shldi_epi16(__m512i __A, __mmask32 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D, int __E) { _mm512_mask_shldi_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D,
return (__m512i)__builtin_ia32_vpshld_v32hi_mask( int __E)
(__v32hi)__C, (__v32hi)__D, __E, (__v32hi)__A, (__mmask32)__B); {
return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__C,
(__v32hi) __D, __E, (__v32hi) __A, (__mmask32)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shldi_epi16(__mmask32 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, int __D) { _mm512_maskz_shldi_epi16 (__mmask32 __A, __m512i __B, __m512i __C, int __D)
return (__m512i)__builtin_ia32_vpshld_v32hi_mask( {
(__v32hi)__B, (__v32hi)__C, __D, (__v32hi)_mm512_setzero_si512(), return (__m512i)__builtin_ia32_vpshld_v32hi_mask ((__v32hi)__B,
(__mmask32)__A); (__v32hi) __C, __D, (__v32hi) _mm512_setzero_si512 (), (__mmask32)__A);
} }
#else #else
#define _mm512_mask_shrdi_epi16(A, B, C, D, E) \ #define _mm512_mask_shrdi_epi16(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), (__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A), (__mmask32)(B)))
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(C), \ #define _mm512_maskz_shrdi_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), (__v32hi)(__m512i)(C),(int)(D), (__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A)))
(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B)) #define _mm512_mask_shldi_epi16(A, B, C, D, E) ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), (__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A), (__mmask32)(B)))
#define _mm512_maskz_shrdi_epi16(A, B, C, D) \ #define _mm512_maskz_shldi_epi16(A, B, C, D) ((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), (__v32hi)(__m512i)(C),(int)(D), (__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A)))
((__m512i) __builtin_ia32_vpshrd_v32hi_mask ((__v32hi)(__m512i)(B), \
(__v32hi)(__m512i)(C),(int)(D), \
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
#define _mm512_mask_shldi_epi16(A, B, C, D, E) \
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(C), \
(__v32hi)(__m512i)(D), (int)(E), (__v32hi)(__m512i)(A),(__mmask32)(B))
#define _mm512_maskz_shldi_epi16(A, B, C, D) \
((__m512i) __builtin_ia32_vpshld_v32hi_mask ((__v32hi)(__m512i)(B), \
(__v32hi)(__m512i)(C),(int)(D), \
(__v32hi)(__m512i)_mm512_setzero_si512 (), (__mmask32)(A))
#endif #endif
extern __inline __m512i
__funline __m512i _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D) { _mm512_mask_shrdv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask( {
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B); return (__m512i)__builtin_ia32_vpshrdv_v32hi_mask ((__v32hi)__A,
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shrdv_epi16(__mmask32 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_maskz_shrdv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz( {
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A); return (__m512i)__builtin_ia32_vpshrdv_v32hi_maskz ((__v32hi)__B,
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __B, __m512i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __D) { _mm512_mask_shldv_epi16 (__m512i __A, __mmask32 __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpshldv_v32hi_mask( {
(__v32hi)__A, (__v32hi)__C, (__v32hi)__D, (__mmask32)__B); return (__m512i)__builtin_ia32_vpshldv_v32hi_mask ((__v32hi)__A,
(__v32hi) __C, (__v32hi) __D, (__mmask32)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_shldv_epi16(__mmask32 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_maskz_shldv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz( {
(__v32hi)__B, (__v32hi)__C, (__v32hi)__D, (__mmask32)__A); return (__m512i)__builtin_ia32_vpshldv_v32hi_maskz ((__v32hi)__B,
(__v32hi) __C, (__v32hi) __D, (__mmask32)__A);
} }
#ifdef __DISABLE_AVX512VBMI2BW__ #ifdef __DISABLE_AVX512VBMI2BW__
#undef __DISABLE_AVX512VBMI2BW__ #undef __DISABLE_AVX512VBMI2BW__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512VBMI2BW__ */ #endif
#endif
#endif /* __AVX512VBMI2INTRIN_H_INCLUDED */ #endif

File diff suppressed because it is too large Load diff

View file

@ -1,90 +1,124 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead." #error "Never use <avx512vbmiintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512VBMIINTRIN_H_INCLUDED #ifndef _AVX512VBMIINTRIN_H_INCLUDED
#define _AVX512VBMIINTRIN_H_INCLUDED #define _AVX512VBMIINTRIN_H_INCLUDED
#ifndef __AVX512VBMI__ #ifndef __AVX512VBMI__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512vbmi") #pragma GCC target("avx512vbmi")
#define __DISABLE_AVX512VBMI__ #define __DISABLE_AVX512VBMI__
#endif /* __AVX512VBMI__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_mask_multishift_epi64_epi8(__m512i __W, __mmask64 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __X, __m512i __Y) { _mm512_mask_multishift_epi64_epi8 (__m512i __W, __mmask64 __M, __m512i __X, __m512i __Y)
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask( {
(__v64qi)__X, (__v64qi)__Y, (__v64qi)__W, (__mmask64)__M); return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
} (__v64qi) __Y,
(__v64qi) __W,
__funline __m512i _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X,
__m512i __Y) {
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask(
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_setzero_si512(),
(__mmask64) __M); (__mmask64) __M);
} }
extern __inline __m512i
__funline __m512i _mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpmultishiftqb512_mask( _mm512_maskz_multishift_epi64_epi8 (__mmask64 __M, __m512i __X, __m512i __Y)
(__v64qi)__X, (__v64qi)__Y, (__v64qi)_mm512_undefined_epi32(), {
(__mmask64)-1); return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
} (__v64qi) __Y,
(__v64qi)
__funline __m512i _mm512_permutexvar_epi8(__m512i __A, __m512i __B) { _mm512_setzero_si512 (),
return (__m512i)__builtin_ia32_permvarqi512_mask(
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_undefined_epi32(),
(__mmask64)-1);
}
__funline __m512i _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A,
__m512i __B) {
return (__m512i)__builtin_ia32_permvarqi512_mask(
(__v64qi)__B, (__v64qi)__A, (__v64qi)_mm512_setzero_si512(),
(__mmask64) __M); (__mmask64) __M);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __A, __m512i __B) { _mm512_multishift_epi64_epi8 (__m512i __X, __m512i __Y)
return (__m512i)__builtin_ia32_permvarqi512_mask( {
(__v64qi)__B, (__v64qi)__A, (__v64qi)__W, (__mmask64)__M); return (__m512i) __builtin_ia32_vpmultishiftqb512_mask ((__v64qi) __X,
(__v64qi) __Y,
(__v64qi)
_mm512_undefined_epi32 (),
(__mmask64) -1);
} }
extern __inline __m512i
__funline __m512i _mm512_permutex2var_epi8(__m512i __A, __m512i __I, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B) { _mm512_permutexvar_epi8 (__m512i __A, __m512i __B)
return (__m512i)__builtin_ia32_vpermt2varqi512_mask( {
(__v64qi)__I return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
/* idx */, (__v64qi) __A,
(__v64qi)__A, (__v64qi)__B, (__mmask64)-1); (__v64qi)
_mm512_undefined_epi32 (),
(__mmask64) -1);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __I, __m512i __B) { _mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A,
return (__m512i)__builtin_ia32_vpermt2varqi512_mask( __m512i __B)
(__v64qi)__I {
/* idx */, return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U); (__v64qi) __A,
(__v64qi)
_mm512_setzero_si512(),
(__mmask64) __M);
} }
extern __inline __m512i
__funline __m512i _mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__mmask64 __U, __m512i __B) { _mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A,
__m512i __B)
{
return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B,
(__v64qi) __A,
(__v64qi) __W,
(__mmask64) __M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_permutex2var_epi8 (__m512i __A, __m512i __I, __m512i __B)
{
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
,
(__v64qi) __A,
(__v64qi) __B,
(__mmask64) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_permutex2var_epi8 (__m512i __A, __mmask64 __U,
__m512i __I, __m512i __B)
{
return (__m512i) __builtin_ia32_vpermt2varqi512_mask ((__v64qi) __I
,
(__v64qi) __A,
(__v64qi) __B,
(__mmask64)
__U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask2_permutex2var_epi8 (__m512i __A, __m512i __I,
__mmask64 __U, __m512i __B)
{
return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A, return (__m512i) __builtin_ia32_vpermi2varqi512_mask ((__v64qi) __A,
(__v64qi) __I (__v64qi) __I
/* idx */, ,
(__v64qi) __B, (__v64qi) __B,
(__mmask64)__U); (__mmask64)
__U);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m512i __I, __m512i __B) { _mm512_maskz_permutex2var_epi8 (__mmask64 __U, __m512i __A,
return (__m512i)__builtin_ia32_vpermt2varqi512_maskz( __m512i __I, __m512i __B)
(__v64qi)__I {
/* idx */, return (__m512i) __builtin_ia32_vpermt2varqi512_maskz ((__v64qi) __I
(__v64qi)__A, (__v64qi)__B, (__mmask64)__U); ,
(__v64qi) __A,
(__v64qi) __B,
(__mmask64)
__U);
} }
#ifdef __DISABLE_AVX512VBMI__ #ifdef __DISABLE_AVX512VBMI__
#undef __DISABLE_AVX512VBMI__ #undef __DISABLE_AVX512VBMI__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512VBMI__ */ #endif
#endif
#endif /* _AVX512VBMIINTRIN_H_INCLUDED */ #endif

View file

@ -1,159 +1,229 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error \ #error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
"Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512VBMIVLINTRIN_H_INCLUDED #ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
#define _AVX512VBMIVLINTRIN_H_INCLUDED #define _AVX512VBMIVLINTRIN_H_INCLUDED
#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) #if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512vbmi,avx512vl") #pragma GCC target("avx512vbmi,avx512vl")
#define __DISABLE_AVX512VBMIVL__ #define __DISABLE_AVX512VBMIVL__
#endif /* __AVX512VBMIVL__ */ #endif
extern __inline __m256i
__funline __m256i _mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __X, __m256i __Y) { _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask( {
(__v32qi)__X, (__v32qi)__Y, (__v32qi)__W, (__mmask32)__M); return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
} (__v32qi) __Y,
(__v32qi) __W,
__funline __m256i _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X,
__m256i __Y) {
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask(
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_setzero_si256(),
(__mmask32) __M); (__mmask32) __M);
} }
extern __inline __m256i
__funline __m256i _mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpmultishiftqb256_mask( _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
(__v32qi)__X, (__v32qi)__Y, (__v32qi)_mm256_undefined_si256(), {
(__mmask32)-1); return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
} (__v32qi) __Y,
(__v32qi)
__funline __m128i _mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, _mm256_setzero_si256 (),
__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
(__v16qi)__X, (__v16qi)__Y, (__v16qi)__W, (__mmask16)__M);
}
__funline __m128i _mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X,
__m128i __Y) {
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_setzero_si128(), (__mmask16)__M);
}
__funline __m128i _mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_vpmultishiftqb128_mask(
(__v16qi)__X, (__v16qi)__Y, (__v16qi)_mm_undefined_si128(),
(__mmask16)-1);
}
__funline __m256i _mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_permvarqi256_mask(
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_undefined_si256(),
(__mmask32)-1);
}
__funline __m256i _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A,
__m256i __B) {
return (__m256i)__builtin_ia32_permvarqi256_mask(
(__v32qi)__B, (__v32qi)__A, (__v32qi)_mm256_setzero_si256(),
(__mmask32) __M); (__mmask32) __M);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __A, __m256i __B) { _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
return (__m256i)__builtin_ia32_permvarqi256_mask( {
(__v32qi)__B, (__v32qi)__A, (__v32qi)__W, (__mmask32)__M); return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
(__v32qi) __Y,
(__v32qi)
_mm256_undefined_si256 (),
(__mmask32) -1);
} }
extern __inline __m128i
__funline __m128i _mm_permutexvar_epi8(__m128i __A, __m128i __B) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_permvarqi128_mask( _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_undefined_si128(), {
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
(__v16qi) __Y,
(__v16qi) __W,
(__mmask16) __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
(__v16qi) __Y,
(__v16qi)
_mm_setzero_si128 (),
(__mmask16) __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
{
return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
(__v16qi) __Y,
(__v16qi)
_mm_undefined_si128 (),
(__mmask16) -1); (__mmask16) -1);
} }
extern __inline __m256i
__funline __m128i _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m128i __B) { _mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
return (__m128i)__builtin_ia32_permvarqi128_mask( {
(__v16qi)__B, (__v16qi)__A, (__v16qi)_mm_setzero_si128(), (__mmask16)__M); return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
(__v32qi) __A,
(__v32qi)
_mm256_undefined_si256 (),
(__mmask32) -1);
} }
extern __inline __m256i
__funline __m128i _mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m128i __A, __m128i __B) { _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
return (__m128i)__builtin_ia32_permvarqi128_mask( __m256i __B)
(__v16qi)__B, (__v16qi)__A, (__v16qi)__W, (__mmask16)__M); {
return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
(__v32qi) __A,
(__v32qi)
_mm256_setzero_si256 (),
(__mmask32) __M);
} }
extern __inline __m256i
__funline __m256i _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __B) { _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
return (__m256i)__builtin_ia32_vpermt2varqi256_mask( __m256i __B)
(__v32qi)__I {
/* idx */, return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B,
(__v32qi)__A, (__v32qi)__B, (__mmask32)-1); (__v32qi) __A,
(__v32qi) __W,
(__mmask32) __M);
} }
extern __inline __m128i
__funline __m256i _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __I, __m256i __B) { _mm_permutexvar_epi8 (__m128i __A, __m128i __B)
return (__m256i)__builtin_ia32_vpermt2varqi256_mask( {
(__v32qi)__I return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
/* idx */, (__v16qi) __A,
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U); (__v16qi)
_mm_undefined_si128 (),
(__mmask16) -1);
} }
extern __inline __m128i
__funline __m256i _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__mmask32 __U, __m256i __B) { _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
(__v16qi) __A,
(__v16qi)
_mm_setzero_si128 (),
(__mmask16) __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
__m128i __B)
{
return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B,
(__v16qi) __A,
(__v16qi) __W,
(__mmask16) __M);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B)
{
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
,
(__v32qi) __A,
(__v32qi) __B,
(__mmask32) -1);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U,
__m256i __I, __m256i __B)
{
return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I
,
(__v32qi) __A,
(__v32qi) __B,
(__mmask32)
__U);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I,
__mmask32 __U, __m256i __B)
{
return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A, return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A,
(__v32qi) __I (__v32qi) __I
/* idx */, ,
(__v32qi) __B, (__v32qi) __B,
(__mmask32)__U); (__mmask32)
__U);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m256i __I, __m256i __B) { _mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A,
return (__m256i)__builtin_ia32_vpermt2varqi256_maskz( __m256i __I, __m256i __B)
(__v32qi)__I {
/* idx */, return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I
(__v32qi)__A, (__v32qi)__B, (__mmask32)__U); ,
(__v32qi) __A,
(__v32qi) __B,
(__mmask32)
__U);
} }
extern __inline __m128i
__funline __m128i _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) { __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpermt2varqi128_mask( _mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B)
(__v16qi)__I {
/* idx */, return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
(__v16qi)__A, (__v16qi)__B, (__mmask16)-1); ,
(__v16qi) __A,
(__v16qi) __B,
(__mmask16) -1);
} }
extern __inline __m128i
__funline __m128i _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m128i __I, __m128i __B) { _mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I,
return (__m128i)__builtin_ia32_vpermt2varqi128_mask( __m128i __B)
(__v16qi)__I {
/* idx */, return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U); ,
(__v16qi) __A,
(__v16qi) __B,
(__mmask16)
__U);
} }
extern __inline __m128i
__funline __m128i _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__mmask16 __U, __m128i __B) { _mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U,
__m128i __B)
{
return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A, return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A,
(__v16qi) __I (__v16qi) __I
/* idx */, ,
(__v16qi) __B, (__v16qi) __B,
(__mmask16)__U); (__mmask16)
__U);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
__m128i __I, __m128i __B) { _mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I,
return (__m128i)__builtin_ia32_vpermt2varqi128_maskz( __m128i __B)
(__v16qi)__I {
/* idx */, return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I
(__v16qi)__A, (__v16qi)__B, (__mmask16)__U); ,
(__v16qi) __A,
(__v16qi) __B,
(__mmask16)
__U);
} }
#ifdef __DISABLE_AVX512VBMIVL__ #ifdef __DISABLE_AVX512VBMIVL__
#undef __DISABLE_AVX512VBMIVL__ #undef __DISABLE_AVX512VBMIVL__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512VBMIVL__ */ #endif
#endif
#endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */ #endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,87 +1,108 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead." #error "Never use <avx512vnniintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef __AVX512VNNIINTRIN_H_INCLUDED #ifndef __AVX512VNNIINTRIN_H_INCLUDED
#define __AVX512VNNIINTRIN_H_INCLUDED #define __AVX512VNNIINTRIN_H_INCLUDED
#if !defined(__AVX512VNNI__) #if !defined(__AVX512VNNI__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512vnni") #pragma GCC target("avx512vnni")
#define __DISABLE_AVX512VNNI__ #define __DISABLE_AVX512VNNI__
#endif /* __AVX512VNNI__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_dpbusd_epi32(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpbusd_epi32 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpdpbusd_v16si ((__v16si)__A, (__v16si) __B, return (__m512i) __builtin_ia32_vpdpbusd_v16si ((__v16si)__A, (__v16si) __B,
(__v16si) __C); (__v16si) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_dpbusd_epi32(__m512i __A, __mmask16 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_mask_dpbusd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask( {
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B); return (__m512i)__builtin_ia32_vpdpbusd_v16si_mask ((__v16si)__A,
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_dpbusd_epi32(__mmask16 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_maskz_dpbusd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz( __m512i __D)
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A); {
return (__m512i)__builtin_ia32_vpdpbusd_v16si_maskz ((__v16si)__B,
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_dpbusds_epi32(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpbusds_epi32 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpdpbusds_v16si ((__v16si)__A, (__v16si) __B, return (__m512i) __builtin_ia32_vpdpbusds_v16si ((__v16si)__A, (__v16si) __B,
(__v16si) __C); (__v16si) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_dpbusds_epi32(__m512i __A, __mmask16 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_mask_dpbusds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask( __m512i __D)
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B); {
return (__m512i)__builtin_ia32_vpdpbusds_v16si_mask ((__v16si)__A,
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_dpbusds_epi32(__mmask16 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_maskz_dpbusds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz( __m512i __D)
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A); {
return (__m512i)__builtin_ia32_vpdpbusds_v16si_maskz ((__v16si)__B,
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_dpwssd_epi32(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpdpwssd_v16si ((__v16si)__A, (__v16si) __B, return (__m512i) __builtin_ia32_vpdpwssd_v16si ((__v16si)__A, (__v16si) __B,
(__v16si) __C); (__v16si) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_dpwssd_epi32(__m512i __A, __mmask16 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_mask_dpwssd_epi32 (__m512i __A, __mmask16 __B, __m512i __C, __m512i __D)
return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask( {
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B); return (__m512i)__builtin_ia32_vpdpwssd_v16si_mask ((__v16si)__A,
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_dpwssd_epi32(__mmask16 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_maskz_dpwssd_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz( __m512i __D)
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A); {
return (__m512i)__builtin_ia32_vpdpwssd_v16si_maskz ((__v16si)__B,
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
} }
extern __inline __m512i
__funline __m512i _mm512_dpwssds_epi32(__m512i __A, __m512i __B, __m512i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C)
{
return (__m512i) __builtin_ia32_vpdpwssds_v16si ((__v16si)__A, (__v16si) __B, return (__m512i) __builtin_ia32_vpdpwssds_v16si ((__v16si)__A, (__v16si) __B,
(__v16si) __C); (__v16si) __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_dpwssds_epi32(__m512i __A, __mmask16 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_mask_dpwssds_epi32 (__m512i __A, __mmask16 __B, __m512i __C,
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask( __m512i __D)
(__v16si)__A, (__v16si)__C, (__v16si)__D, (__mmask16)__B); {
return (__m512i)__builtin_ia32_vpdpwssds_v16si_mask ((__v16si)__A,
(__v16si) __C, (__v16si) __D, (__mmask16)__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_dpwssds_epi32(__mmask16 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_maskz_dpwssds_epi32 (__mmask16 __A, __m512i __B, __m512i __C,
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz( __m512i __D)
(__v16si)__B, (__v16si)__C, (__v16si)__D, (__mmask16)__A); {
return (__m512i)__builtin_ia32_vpdpwssds_v16si_maskz ((__v16si)__B,
(__v16si) __C, (__v16si) __D, (__mmask16)__A);
} }
#ifdef __DISABLE_AVX512VNNI__ #ifdef __DISABLE_AVX512VNNI__
#undef __DISABLE_AVX512VNNI__ #undef __DISABLE_AVX512VNNI__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512VNNI__ */ #endif
#endif
#endif /* __AVX512VNNIINTRIN_H_INCLUDED */ #endif

View file

@ -1,154 +1,140 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error \ #error "Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
"Never use <avx512vnnivlintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512VNNIVLINTRIN_H_INCLUDED #ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
#define _AVX512VNNIVLINTRIN_H_INCLUDED #define _AVX512VNNIVLINTRIN_H_INCLUDED
#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) #if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512vnni,avx512vl") #pragma GCC target("avx512vnni,avx512vl")
#define __DISABLE_AVX512VNNIVL__ #define __DISABLE_AVX512VNNIVL__
#endif /* __AVX512VNNIVL__ */ #endif
#define _mm256_dpbusd_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
__funline __m256i _mm256_dpbusd_epi32(__m256i __A, __m256i __B, __m256i __C) { extern __inline __m256i
return (__m256i)__builtin_ia32_vpdpbusd_v8si((__v8si)__A, (__v8si)__B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v8si)__C); _mm256_mask_dpbusd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
} {
__funline __m256i _mm256_mask_dpbusd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C, return (__m256i)__builtin_ia32_vpdpbusd_v8si_mask ((__v8si)__A, (__v8si) __C,
(__v8si) __D, (__mmask8)__B); (__v8si) __D, (__mmask8)__B);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_dpbusd_epi32(__mmask8 __A, __m256i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C, __m256i __D) { _mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz( {
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A); return (__m256i)__builtin_ia32_vpdpbusd_v8si_maskz ((__v8si)__B,
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
} }
#define _mm_dpbusd_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
__funline __m128i _mm_dpbusd_epi32(__m128i __A, __m128i __B, __m128i __C) { extern __inline __m128i
return (__m128i)__builtin_ia32_vpdpbusd_v4si((__v4si)__A, (__v4si)__B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v4si)__C); _mm_mask_dpbusd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
} {
__funline __m128i _mm_mask_dpbusd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C, return (__m128i)__builtin_ia32_vpdpbusd_v4si_mask ((__v4si)__A, (__v4si) __C,
(__v4si) __D, (__mmask8)__B); (__v4si) __D, (__mmask8)__B);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_dpbusd_epi32(__mmask8 __A, __m128i __B, __m128i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128i __D) { _mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz( {
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A); return (__m128i)__builtin_ia32_vpdpbusd_v4si_maskz ((__v4si)__B,
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
} }
#define _mm256_dpbusds_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
__funline __m256i _mm256_dpbusds_epi32(__m256i __A, __m256i __B, __m256i __C) { extern __inline __m256i
return (__m256i)__builtin_ia32_vpdpbusds_v8si((__v8si)__A, (__v8si)__B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v8si)__C); _mm256_mask_dpbusds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
{
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask ((__v8si)__A,
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_dpbusds_epi32(__m256i __A, __mmask8 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C, __m256i __D) { _mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
return (__m256i)__builtin_ia32_vpdpbusds_v8si_mask( __m256i __D)
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B); {
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz ((__v8si)__B,
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
} }
#define _mm_dpbusds_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
__funline __m256i _mm256_maskz_dpbusds_epi32(__mmask8 __A, __m256i __B, extern __inline __m128i
__m256i __C, __m256i __D) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpdpbusds_v8si_maskz( _mm_mask_dpbusds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A); {
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask ((__v4si)__A,
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
} }
extern __inline __m128i
__funline __m128i _mm_dpbusds_epi32(__m128i __A, __m128i __B, __m128i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpdpbusds_v4si((__v4si)__A, (__v4si)__B, _mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
(__v4si)__C); {
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz ((__v4si)__B,
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
} }
#define _mm256_dpwssd_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
__funline __m128i _mm_mask_dpbusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, extern __inline __m256i
__m128i __D) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpdpbusds_v4si_mask( _mm256_mask_dpwssd_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B); {
}
__funline __m128i _mm_maskz_dpbusds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpbusds_v4si_maskz(
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
}
__funline __m256i _mm256_dpwssd_epi32(__m256i __A, __m256i __B, __m256i __C) {
return (__m256i)__builtin_ia32_vpdpwssd_v8si((__v8si)__A, (__v8si)__B,
(__v8si)__C);
}
__funline __m256i _mm256_mask_dpwssd_epi32(__m256i __A, __mmask8 __B, __m256i __C,
__m256i __D) {
return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C, return (__m256i)__builtin_ia32_vpdpwssd_v8si_mask ((__v8si)__A, (__v8si) __C,
(__v8si) __D, (__mmask8)__B); (__v8si) __D, (__mmask8)__B);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_dpwssd_epi32(__mmask8 __A, __m256i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C, __m256i __D) { _mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D)
return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz( {
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A); return (__m256i)__builtin_ia32_vpdpwssd_v8si_maskz ((__v8si)__B,
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
} }
#define _mm_dpwssd_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
__funline __m128i _mm_dpwssd_epi32(__m128i __A, __m128i __B, __m128i __C) { extern __inline __m128i
return (__m128i)__builtin_ia32_vpdpwssd_v4si((__v4si)__A, (__v4si)__B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v4si)__C); _mm_mask_dpwssd_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
} {
__funline __m128i _mm_mask_dpwssd_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C, return (__m128i)__builtin_ia32_vpdpwssd_v4si_mask ((__v4si)__A, (__v4si) __C,
(__v4si) __D, (__mmask8)__B); (__v4si) __D, (__mmask8)__B);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_dpwssd_epi32(__mmask8 __A, __m128i __B, __m128i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128i __D) { _mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz( {
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A); return (__m128i)__builtin_ia32_vpdpwssd_v4si_maskz ((__v4si)__B,
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
} }
#define _mm256_dpwssds_epi32(A, B, C) ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), (__v8si) (B), (__v8si) (C)))
__funline __m256i _mm256_dpwssds_epi32(__m256i __A, __m256i __B, __m256i __C) { extern __inline __m256i
return (__m256i)__builtin_ia32_vpdpwssds_v8si((__v8si)__A, (__v8si)__B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
(__v8si)__C); _mm256_mask_dpwssds_epi32 (__m256i __A, __mmask8 __B, __m256i __C, __m256i __D)
{
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask ((__v8si)__A,
(__v8si) __C, (__v8si) __D, (__mmask8)__B);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_dpwssds_epi32(__m256i __A, __mmask8 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C, __m256i __D) { _mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C,
return (__m256i)__builtin_ia32_vpdpwssds_v8si_mask( __m256i __D)
(__v8si)__A, (__v8si)__C, (__v8si)__D, (__mmask8)__B); {
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz ((__v8si)__B,
(__v8si) __C, (__v8si) __D, (__mmask8)__A);
} }
#define _mm_dpwssds_epi32(A, B, C) ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), (__v4si) (B), (__v4si) (C)))
__funline __m256i _mm256_maskz_dpwssds_epi32(__mmask8 __A, __m256i __B, extern __inline __m128i
__m256i __C, __m256i __D) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpdpwssds_v8si_maskz( _mm_mask_dpwssds_epi32 (__m128i __A, __mmask8 __B, __m128i __C, __m128i __D)
(__v8si)__B, (__v8si)__C, (__v8si)__D, (__mmask8)__A); {
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask ((__v4si)__A,
(__v4si) __C, (__v4si) __D, (__mmask8)__B);
} }
extern __inline __m128i
__funline __m128i _mm_dpwssds_epi32(__m128i __A, __m128i __B, __m128i __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpdpwssds_v4si((__v4si)__A, (__v4si)__B, _mm_maskz_dpwssds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D)
(__v4si)__C); {
} return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz ((__v4si)__B,
(__v4si) __C, (__v4si) __D, (__mmask8)__A);
__funline __m128i _mm_mask_dpwssds_epi32(__m128i __A, __mmask8 __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpwssds_v4si_mask(
(__v4si)__A, (__v4si)__C, (__v4si)__D, (__mmask8)__B);
}
__funline __m128i _mm_maskz_dpwssds_epi32(__mmask8 __A, __m128i __B, __m128i __C,
__m128i __D) {
return (__m128i)__builtin_ia32_vpdpwssds_v4si_maskz(
(__v4si)__B, (__v4si)__C, (__v4si)__D, (__mmask8)__A);
} }
#ifdef __DISABLE_AVX512VNNIVL__ #ifdef __DISABLE_AVX512VNNIVL__
#undef __DISABLE_AVX512VNNIVL__ #undef __DISABLE_AVX512VNNIVL__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512VNNIVL__ */ #endif
#endif /* __DISABLE_AVX512VNNIVL__ */ #endif
#endif

View file

@ -0,0 +1,32 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
#define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
#if !defined(__AVX512VP2INTERSECT__)
#pragma GCC push_options
#pragma GCC target("avx512vp2intersect")
#define __DISABLE_AVX512VP2INTERSECT__
#endif
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_2intersect_epi32 (__m512i __A, __m512i __B, __mmask16 *__U,
__mmask16 *__M)
{
__builtin_ia32_2intersectd512 (__U, __M, (__v16si) __A, (__v16si) __B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_2intersect_epi64 (__m512i __A, __m512i __B, __mmask8 *__U,
__mmask8 *__M)
{
__builtin_ia32_2intersectq512 (__U, __M, (__v8di) __A, (__v8di) __B);
}
#ifdef __DISABLE_AVX512VP2INTERSECT__
#undef __DISABLE_AVX512VP2INTERSECT__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -0,0 +1,44 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED
#error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
#define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
#pragma GCC push_options
#pragma GCC target("avx512vp2intersect,avx512vl")
#define __DISABLE_AVX512VP2INTERSECTVL__
#endif
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_2intersect_epi32 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
{
__builtin_ia32_2intersectd128 (__U, __M, (__v4si) __A, (__v4si) __B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_2intersect_epi32 (__m256i __A, __m256i __B, __mmask8 *__U,
__mmask8 *__M)
{
__builtin_ia32_2intersectd256 (__U, __M, (__v8si) __A, (__v8si) __B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_2intersect_epi64 (__m128i __A, __m128i __B, __mmask8 *__U, __mmask8 *__M)
{
__builtin_ia32_2intersectq128 (__U, __M, (__v2di) __A, (__v2di) __B);
}
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_2intersect_epi64 (__m256i __A, __m256i __B, __mmask8 *__U,
__mmask8 *__M)
{
__builtin_ia32_2intersectq256 (__U, __M, (__v4di) __A, (__v4di) __B);
}
#ifdef __DISABLE_AVX512VP2INTERSECTVL__
#undef __DISABLE_AVX512VP2INTERSECTVL__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -1,50 +1,64 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED #if !defined _IMMINTRIN_H_INCLUDED
#error \ # error "Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
"Never use <avx512vpopcntdqintrin.h> directly; include <x86intrin.h> instead."
#endif #endif
#ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED #ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
#define _AVX512VPOPCNTDQINTRIN_H_INCLUDED #define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
#ifndef __AVX512VPOPCNTDQ__ #ifndef __AVX512VPOPCNTDQ__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512vpopcntdq") #pragma GCC target("avx512vpopcntdq")
#define __DISABLE_AVX512VPOPCNTDQ__ #define __DISABLE_AVX512VPOPCNTDQ__
#endif /* __AVX512VPOPCNTDQ__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_popcnt_epi32(__m512i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_popcnt_epi32 (__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcountd_v16si ((__v16si) __A); return (__m512i) __builtin_ia32_vpopcountd_v16si ((__v16si) __A);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_popcnt_epi32(__m512i __A, __mmask16 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B) { _mm512_mask_popcnt_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask( {
(__v16si)__A, (__v16si)__B, (__mmask16)__U); return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
(__v16si) __W,
(__mmask16) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpopcountd_v16si_mask( _mm512_maskz_popcnt_epi32 (__mmask16 __U, __m512i __A)
(__v16si)__A, (__v16si)_mm512_setzero_si512(), (__mmask16)__U); {
return (__m512i) __builtin_ia32_vpopcountd_v16si_mask ((__v16si) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_popcnt_epi64(__m512i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_popcnt_epi64 (__m512i __A)
{
return (__m512i) __builtin_ia32_vpopcountq_v8di ((__v8di) __A); return (__m512i) __builtin_ia32_vpopcountq_v8di ((__v8di) __A);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_popcnt_epi64(__m512i __A, __mmask8 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B) { _mm512_mask_popcnt_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask((__v8di)__A, (__v8di)__B, {
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
(__v8di) __W,
(__mmask8) __U); (__mmask8) __U);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m512i)__builtin_ia32_vpopcountq_v8di_mask( _mm512_maskz_popcnt_epi64 (__mmask8 __U, __m512i __A)
(__v8di)__A, (__v8di)_mm512_setzero_si512(), (__mmask8)__U); {
return (__m512i) __builtin_ia32_vpopcountq_v8di_mask ((__v8di) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
} }
#ifdef __DISABLE_AVX512VPOPCNTDQ__ #ifdef __DISABLE_AVX512VPOPCNTDQ__
#undef __DISABLE_AVX512VPOPCNTDQ__ #undef __DISABLE_AVX512VPOPCNTDQ__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512VPOPCNTDQ__ */ #endif
#endif
#endif /* _AVX512VPOPCNTDQINTRIN_H_INCLUDED */ #endif

View file

@ -1,78 +1,110 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED #if !defined _IMMINTRIN_H_INCLUDED
#error \ # error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
"Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED #ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED #define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) #if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx512vpopcntdq,avx512vl") #pragma GCC target("avx512vpopcntdq,avx512vl")
#define __DISABLE_AVX512VPOPCNTDQVL__ #define __DISABLE_AVX512VPOPCNTDQVL__
#endif /* __AVX512VPOPCNTDQVL__ */ #endif
extern __inline __m128i
__funline __m128i _mm_popcnt_epi32(__m128i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_epi32 (__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A); return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
} }
extern __inline __m128i
__funline __m128i _mm_mask_popcnt_epi32(__m128i __A, __mmask16 __U, __m128i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask((__v4si)__A, (__v4si)__B, _mm_mask_popcnt_epi32 (__m128i __W, __mmask16 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
(__v4si) __W,
(__mmask16) __U); (__mmask16) __U);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_popcnt_epi32(__mmask16 __U, __m128i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpopcountd_v4si_mask( _mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
(__v4si)__A, (__v4si)_mm_setzero_si128(), (__mmask16)__U); {
return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
(__v4si)
_mm_setzero_si128 (),
(__mmask16) __U);
} }
extern __inline __m256i
__funline __m256i _mm256_popcnt_epi32(__m256i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_popcnt_epi32 (__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A); return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_popcnt_epi32(__m256i __A, __mmask16 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __B) { _mm256_mask_popcnt_epi32 (__m256i __W, __mmask16 __U, __m256i __A)
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask((__v8si)__A, (__v8si)__B, {
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
(__v8si) __W,
(__mmask16) __U); (__mmask16) __U);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_popcnt_epi32(__mmask16 __U, __m256i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpopcountd_v8si_mask( _mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
(__v8si)__A, (__v8si)_mm256_setzero_si256(), (__mmask16)__U); {
return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
(__v8si)
_mm256_setzero_si256 (),
(__mmask16) __U);
} }
extern __inline __m128i
__funline __m128i _mm_popcnt_epi64(__m128i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_epi64 (__m128i __A)
{
return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A); return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
} }
extern __inline __m128i
__funline __m128i _mm_mask_popcnt_epi64(__m128i __A, __mmask8 __U, __m128i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask((__v2di)__A, (__v2di)__B, _mm_mask_popcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
(__v2di) __W,
(__mmask8) __U); (__mmask8) __U);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vpopcountq_v2di_mask( _mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
(__v2di)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); {
return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
(__v2di)
_mm_setzero_si128 (),
(__mmask8) __U);
} }
extern __inline __m256i
__funline __m256i _mm256_popcnt_epi64(__m256i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_popcnt_epi64 (__m256i __A)
{
return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A); return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_popcnt_epi64(__m256i __A, __mmask8 __U, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __B) { _mm256_mask_popcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask((__v4di)__A, (__v4di)__B, {
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
(__v4di) __W,
(__mmask8) __U); (__mmask8) __U);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vpopcountq_v4di_mask( _mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
(__v4di)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); {
return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
(__v4di)
_mm256_setzero_si256 (),
(__mmask8) __U);
} }
#ifdef __DISABLE_AVX512VPOPCNTDQVL__ #ifdef __DISABLE_AVX512VPOPCNTDQVL__
#undef __DISABLE_AVX512VPOPCNTDQVL__ #undef __DISABLE_AVX512VPOPCNTDQVL__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */ #endif
#endif
#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */ #endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,82 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avxvnniintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVXVNNIINTRIN_H_INCLUDED
#define _AVXVNNIINTRIN_H_INCLUDED
#if !defined(__AVXVNNI__)
#pragma GCC push_options
#pragma GCC target("avxvnni")
#define __DISABLE_AVXVNNIVL__
#endif
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbusd_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) __A,
(__v8si) __B,
(__v8si) __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbusd_avx_epi32(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) __A,
(__v4si) __B,
(__v4si) __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbusds_avx_epi32(__m256i __A, __m256i __B, __m256i __C)
{
return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) __A,
(__v8si) __B,
(__v8si) __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbusds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
{
return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) __A,
(__v4si) __B,
(__v4si) __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpwssd_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
{
return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) __A,
(__v8si) __B,
(__v8si) __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpwssd_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
{
return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) __A,
(__v4si) __B,
(__v4si) __C);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpwssds_avx_epi32(__m256i __A,__m256i __B,__m256i __C)
{
return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) __A,
(__v8si) __B,
(__v8si) __C);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpwssds_avx_epi32(__m128i __A,__m128i __B,__m128i __C)
{
return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) __A,
(__v4si) __B,
(__v4si) __C);
}
#ifdef __DISABLE_AVXVNNIVL__
#undef __DISABLE_AVXVNNIVL__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -1,67 +1,74 @@
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <bmi2intrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _BMI2INTRIN_H_INCLUDED #ifndef _BMI2INTRIN_H_INCLUDED
#define _BMI2INTRIN_H_INCLUDED #define _BMI2INTRIN_H_INCLUDED
#ifndef __BMI2__ #ifndef __BMI2__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("bmi2") #pragma GCC target("bmi2")
#define __DISABLE_BMI2__ #define __DISABLE_BMI2__
#endif /* __BMI2__ */ #endif
extern __inline unsigned int
__funline unsigned int _bzhi_u32(unsigned int __X, unsigned int __Y) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_bzhi_u32 (unsigned int __X, unsigned int __Y)
{
return __builtin_ia32_bzhi_si (__X, __Y); return __builtin_ia32_bzhi_si (__X, __Y);
} }
extern __inline unsigned int
__funline unsigned int _pdep_u32(unsigned int __X, unsigned int __Y) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pdep_u32 (unsigned int __X, unsigned int __Y)
{
return __builtin_ia32_pdep_si (__X, __Y); return __builtin_ia32_pdep_si (__X, __Y);
} }
extern __inline unsigned int
__funline unsigned int _pext_u32(unsigned int __X, unsigned int __Y) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pext_u32 (unsigned int __X, unsigned int __Y)
{
return __builtin_ia32_pext_si (__X, __Y); return __builtin_ia32_pext_si (__X, __Y);
} }
#ifdef __x86_64__ #ifdef __x86_64__
extern __inline unsigned long long
__funline unsigned long long _bzhi_u64(unsigned long long __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned long long __Y) { _bzhi_u64 (unsigned long long __X, unsigned long long __Y)
{
return __builtin_ia32_bzhi_di (__X, __Y); return __builtin_ia32_bzhi_di (__X, __Y);
} }
extern __inline unsigned long long
__funline unsigned long long _pdep_u64(unsigned long long __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned long long __Y) { _pdep_u64 (unsigned long long __X, unsigned long long __Y)
{
return __builtin_ia32_pdep_di (__X, __Y); return __builtin_ia32_pdep_di (__X, __Y);
} }
extern __inline unsigned long long
__funline unsigned long long _pext_u64(unsigned long long __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned long long __Y) { _pext_u64 (unsigned long long __X, unsigned long long __Y)
{
return __builtin_ia32_pext_di (__X, __Y); return __builtin_ia32_pext_di (__X, __Y);
} }
extern __inline unsigned long long
__funline unsigned long long _mulx_u64(unsigned long long __X, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned long long __Y, _mulx_u64 (unsigned long long __X, unsigned long long __Y,
unsigned long long *__P) { unsigned long long *__P)
{
unsigned __int128 __res = (unsigned __int128) __X * __Y; unsigned __int128 __res = (unsigned __int128) __X * __Y;
*__P = (unsigned long long) (__res >> 64); *__P = (unsigned long long) (__res >> 64);
return (unsigned long long) __res; return (unsigned long long) __res;
} }
#else
#else /* !__x86_64__ */ extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline unsigned int _mulx_u32(unsigned int __X, unsigned int __Y, _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
unsigned int *__P) { {
unsigned long long __res = (unsigned long long) __X * __Y; unsigned long long __res = (unsigned long long) __X * __Y;
*__P = (unsigned int) (__res >> 32); *__P = (unsigned int) (__res >> 32);
return (unsigned int) __res; return (unsigned int) __res;
} }
#endif
#endif /* !__x86_64__ */
#ifdef __DISABLE_BMI2__ #ifdef __DISABLE_BMI2__
#undef __DISABLE_BMI2__ #undef __DISABLE_BMI2__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_BMI2__ */ #endif
#endif
#endif /* _BMI2INTRIN_H_INCLUDED */ #endif

View file

@ -1,160 +1,135 @@
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <bmiintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _BMIINTRIN_H_INCLUDED #ifndef _BMIINTRIN_H_INCLUDED
#define _BMIINTRIN_H_INCLUDED #define _BMIINTRIN_H_INCLUDED
#ifndef __BMI__ #ifndef __BMI__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("bmi") #pragma GCC target("bmi")
#define __DISABLE_BMI__ #define __DISABLE_BMI__
#endif /* __BMI__ */ #endif
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned short __tzcnt_u16 (unsigned short __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__tzcnt_u16(unsigned short __X) {
return __builtin_ia32_tzcnt_u16 (__X); return __builtin_ia32_tzcnt_u16 (__X);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int __andn_u32 (unsigned int __X, unsigned int __Y)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__andn_u32(unsigned int __X, unsigned int __Y) {
return ~__X & __Y; return ~__X & __Y;
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int __bextr_u32 (unsigned int __X, unsigned int __Y)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__bextr_u32(unsigned int __X, unsigned int __Y) {
return __builtin_ia32_bextr_u32 (__X, __Y); return __builtin_ia32_bextr_u32 (__X, __Y);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int _bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_bextr_u32(unsigned int __X, unsigned int __Y, unsigned __Z) {
return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int __blsi_u32 (unsigned int __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__blsi_u32(unsigned int __X) {
return __X & -__X; return __X & -__X;
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int _blsi_u32 (unsigned int __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_blsi_u32(unsigned int __X) {
return __blsi_u32 (__X); return __blsi_u32 (__X);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int __blsmsk_u32 (unsigned int __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__blsmsk_u32(unsigned int __X) {
return __X ^ (__X - 1); return __X ^ (__X - 1);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int _blsmsk_u32 (unsigned int __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_blsmsk_u32(unsigned int __X) {
return __blsmsk_u32 (__X); return __blsmsk_u32 (__X);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int __blsr_u32 (unsigned int __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__blsr_u32(unsigned int __X) {
return __X & (__X - 1); return __X & (__X - 1);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int _blsr_u32 (unsigned int __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_blsr_u32(unsigned int __X) {
return __blsr_u32 (__X); return __blsr_u32 (__X);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int __tzcnt_u32 (unsigned int __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__tzcnt_u32(unsigned int __X) {
return __builtin_ia32_tzcnt_u32 (__X); return __builtin_ia32_tzcnt_u32 (__X);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned int _tzcnt_u32 (unsigned int __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_tzcnt_u32(unsigned int __X) {
return __builtin_ia32_tzcnt_u32 (__X); return __builtin_ia32_tzcnt_u32 (__X);
} }
#ifdef __x86_64__ #ifdef __x86_64__
extern __inline unsigned long long extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __andn_u64 (unsigned long long __X, unsigned long long __Y)
__andn_u64(unsigned long long __X, unsigned long long __Y) { {
return ~__X & __Y; return ~__X & __Y;
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long __bextr_u64 (unsigned long long __X, unsigned long long __Y)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__bextr_u64(unsigned long long __X, unsigned long long __Y) {
return __builtin_ia32_bextr_u64 (__X, __Y); return __builtin_ia32_bextr_u64 (__X, __Y);
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long _bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) {
return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long __blsi_u64 (unsigned long long __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__blsi_u64(unsigned long long __X) {
return __X & -__X; return __X & -__X;
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long _blsi_u64 (unsigned long long __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_blsi_u64(unsigned long long __X) {
return __blsi_u64 (__X); return __blsi_u64 (__X);
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long __blsmsk_u64 (unsigned long long __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__blsmsk_u64(unsigned long long __X) {
return __X ^ (__X - 1); return __X ^ (__X - 1);
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long _blsmsk_u64 (unsigned long long __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_blsmsk_u64(unsigned long long __X) {
return __blsmsk_u64 (__X); return __blsmsk_u64 (__X);
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long __blsr_u64 (unsigned long long __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__blsr_u64(unsigned long long __X) {
return __X & (__X - 1); return __X & (__X - 1);
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long _blsr_u64 (unsigned long long __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_blsr_u64(unsigned long long __X) {
return __blsr_u64 (__X); return __blsr_u64 (__X);
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long __tzcnt_u64 (unsigned long long __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
__tzcnt_u64(unsigned long long __X) {
return __builtin_ia32_tzcnt_u64 (__X); return __builtin_ia32_tzcnt_u64 (__X);
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline unsigned long long _tzcnt_u64 (unsigned long long __X)
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) {
_tzcnt_u64(unsigned long long __X) {
return __builtin_ia32_tzcnt_u64 (__X); return __builtin_ia32_tzcnt_u64 (__X);
} }
#endif
#endif /* __x86_64__ */
#ifdef __DISABLE_BMI__ #ifdef __DISABLE_BMI__
#undef __DISABLE_BMI__ #undef __DISABLE_BMI__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_BMI__ */ #endif
#endif
#endif /* _BMIINTRIN_H_INCLUDED */ #endif

View file

@ -1,73 +1,95 @@
#if !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <cetintrin.h> directly; include <x86intrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <cetintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _CETINTRIN_H_INCLUDED #ifndef _CETINTRIN_H_INCLUDED
#define _CETINTRIN_H_INCLUDED #define _CETINTRIN_H_INCLUDED
#ifndef __SHSTK__ #ifndef __SHSTK__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target ("shstk") #pragma GCC target ("shstk")
#define __DISABLE_SHSTK__ #define __DISABLE_SHSTK__
#endif /* __SHSTK__ */ #endif
#ifdef __x86_64__ #ifdef __x86_64__
__funline unsigned long long _get_ssp(void) { extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_get_ssp (void)
{
return __builtin_ia32_rdsspq (); return __builtin_ia32_rdsspq ();
} }
#else #else
__funline unsigned int _get_ssp(void) { extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_get_ssp (void)
{
return __builtin_ia32_rdsspd (); return __builtin_ia32_rdsspd ();
} }
#endif #endif
extern __inline void
__funline void _inc_ssp(unsigned int __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_inc_ssp (unsigned int __B)
{
#ifdef __x86_64__ #ifdef __x86_64__
__builtin_ia32_incsspq ((unsigned long long) __B); __builtin_ia32_incsspq ((unsigned long long) __B);
#else #else
__builtin_ia32_incsspd (__B); __builtin_ia32_incsspd (__B);
#endif #endif
} }
extern __inline void
__funline void _saveprevssp(void) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_saveprevssp (void)
{
__builtin_ia32_saveprevssp (); __builtin_ia32_saveprevssp ();
} }
extern __inline void
__funline void _rstorssp(void *__B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rstorssp (void *__B)
{
__builtin_ia32_rstorssp (__B); __builtin_ia32_rstorssp (__B);
} }
extern __inline void
__funline void _wrssd(unsigned int __B, void *__C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrssd (unsigned int __B, void *__C)
{
__builtin_ia32_wrssd (__B, __C); __builtin_ia32_wrssd (__B, __C);
} }
#ifdef __x86_64__ #ifdef __x86_64__
__funline void _wrssq(unsigned long long __B, void *__C) { extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrssq (unsigned long long __B, void *__C)
{
__builtin_ia32_wrssq (__B, __C); __builtin_ia32_wrssq (__B, __C);
} }
#endif #endif
extern __inline void
__funline void _wrussd(unsigned int __B, void *__C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrussd (unsigned int __B, void *__C)
{
__builtin_ia32_wrussd (__B, __C); __builtin_ia32_wrussd (__B, __C);
} }
#ifdef __x86_64__ #ifdef __x86_64__
__funline void _wrussq(unsigned long long __B, void *__C) { extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_wrussq (unsigned long long __B, void *__C)
{
__builtin_ia32_wrussq (__B, __C); __builtin_ia32_wrussq (__B, __C);
} }
#endif #endif
extern __inline void
__funline void _setssbsy(void) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_setssbsy (void)
{
__builtin_ia32_setssbsy (); __builtin_ia32_setssbsy ();
} }
extern __inline void
__funline void _clrssbsy(void *__B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_clrssbsy (void *__B)
{
__builtin_ia32_clrssbsy (__B); __builtin_ia32_clrssbsy (__B);
} }
#ifdef __DISABLE_SHSTK__ #ifdef __DISABLE_SHSTK__
#undef __DISABLE_SHSTK__ #undef __DISABLE_SHSTK__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_SHSTK__ */ #endif
#endif
#endif /* _CETINTRIN_H_INCLUDED. */ #endif

View file

@ -1,21 +1,24 @@
#if !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <cldemoteintrin.h> directly; include <immintrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <cldemoteintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _CLDEMOTE_H_INCLUDED #ifndef _CLDEMOTE_H_INCLUDED
#define _CLDEMOTE_H_INCLUDED #define _CLDEMOTE_H_INCLUDED
#ifndef __CLDEMOTE__ #ifndef __CLDEMOTE__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("cldemote") #pragma GCC target("cldemote")
#define __DISABLE_CLDEMOTE__ #define __DISABLE_CLDEMOTE__
#endif /* __CLDEMOTE__ */ #endif
__funline void _cldemote(void *__A) { extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_cldemote (void *__A)
{
__builtin_ia32_cldemote (__A); __builtin_ia32_cldemote (__A);
} }
#ifdef __DISABLE_CLDEMOTE__ #ifdef __DISABLE_CLDEMOTE__
#undef __DISABLE_CLDEMOTE__ #undef __DISABLE_CLDEMOTE__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_CLDEMOTE__ */ #endif
#endif
#endif /* _CLDEMOTE_H_INCLUDED */ #endif

View file

@ -1,23 +1,24 @@
#if !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <clflushoptintrin.h> directly; include <immintrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <clflushoptintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _CLFLUSHOPTINTRIN_H_INCLUDED #ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
#define _CLFLUSHOPTINTRIN_H_INCLUDED #define _CLFLUSHOPTINTRIN_H_INCLUDED
#ifndef __CLFLUSHOPT__ #ifndef __CLFLUSHOPT__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("clflushopt") #pragma GCC target("clflushopt")
#define __DISABLE_CLFLUSHOPT__ #define __DISABLE_CLFLUSHOPT__
#endif /* __CLFLUSHOPT__ */ #endif
extern __inline void
__funline void _mm_clflushopt(void *__A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clflushopt (void *__A)
{
__builtin_ia32_clflushopt (__A); __builtin_ia32_clflushopt (__A);
} }
#ifdef __DISABLE_CLFLUSHOPT__ #ifdef __DISABLE_CLFLUSHOPT__
#undef __DISABLE_CLFLUSHOPT__ #undef __DISABLE_CLFLUSHOPT__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_CLFLUSHOPT__ */ #endif
#endif
#endif /* _CLFLUSHOPTINTRIN_H_INCLUDED */ #endif

View file

@ -1,23 +1,24 @@
#if !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <clwbintrin.h> directly; include <immintrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <clwbintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _CLWBINTRIN_H_INCLUDED #ifndef _CLWBINTRIN_H_INCLUDED
#define _CLWBINTRIN_H_INCLUDED #define _CLWBINTRIN_H_INCLUDED
#ifndef __CLWB__ #ifndef __CLWB__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("clwb") #pragma GCC target("clwb")
#define __DISABLE_CLWB__ #define __DISABLE_CLWB__
#endif /* __CLWB__ */ #endif
extern __inline void
__funline void _mm_clwb(void *__A) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_clwb (void *__A)
{
__builtin_ia32_clwb (__A); __builtin_ia32_clwb (__A);
} }
#ifdef __DISABLE_CLWB__ #ifdef __DISABLE_CLWB__
#undef __DISABLE_CLWB__ #undef __DISABLE_CLWB__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_CLWB__ */ #endif
#endif
#endif /* _CLWBINTRIN_H_INCLUDED */ #endif

View file

@ -1,21 +1,20 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _CLZEROINTRIN_H_INCLUDED #ifndef _CLZEROINTRIN_H_INCLUDED
#define _CLZEROINTRIN_H_INCLUDED #define _CLZEROINTRIN_H_INCLUDED
#ifdef __x86_64__
#ifndef __CLZERO__ #ifndef __CLZERO__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("clzero") #pragma GCC target("clzero")
#define __DISABLE_CLZERO__ #define __DISABLE_CLZERO__
#endif /* __CLZERO__ */ #endif
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void _mm_clzero(void* __I) { _mm_clzero (void * __I)
{
__builtin_ia32_clzero (__I); __builtin_ia32_clzero (__I);
} }
#ifdef __DISABLE_CLZERO__ #ifdef __DISABLE_CLZERO__
#undef __DISABLE_CLZERO__ #undef __DISABLE_CLZERO__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_CLZERO__ */ #endif
#endif
#endif /* __x86_64__ */ #endif
#endif /* _CLZEROINTRIN_H_INCLUDED */

View file

@ -1,8 +1,10 @@
#ifndef COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_ /* clang-format off */
#define COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_ #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifdef __x86_64__ #ifndef _CPUID_H_INCLUDED
#if !(__ASSEMBLER__ + __LINKER__ + 0) #define _CPUID_H_INCLUDED
#define bit_AVXVNNI (1 << 4)
#define bit_AVX512BF16 (1 << 5)
#define bit_HRESET (1 << 22)
#define bit_SSE3 (1 << 0) #define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1) #define bit_PCLMUL (1 << 1)
#define bit_LZCNT (1 << 5) #define bit_LZCNT (1 << 5)
@ -19,14 +21,12 @@
#define bit_AVX (1 << 28) #define bit_AVX (1 << 28)
#define bit_F16C (1 << 29) #define bit_F16C (1 << 29)
#define bit_RDRND (1 << 30) #define bit_RDRND (1 << 30)
#define bit_CMPXCHG8B (1 << 8) #define bit_CMPXCHG8B (1 << 8)
#define bit_CMOV (1 << 15) #define bit_CMOV (1 << 15)
#define bit_MMX (1 << 23) #define bit_MMX (1 << 23)
#define bit_FXSAVE (1 << 24) #define bit_FXSAVE (1 << 24)
#define bit_SSE (1 << 25) #define bit_SSE (1 << 25)
#define bit_SSE2 (1 << 26) #define bit_SSE2 (1 << 26)
#define bit_LAHF_LM (1 << 0) #define bit_LAHF_LM (1 << 0)
#define bit_ABM (1 << 5) #define bit_ABM (1 << 5)
#define bit_SSE4a (1 << 6) #define bit_SSE4a (1 << 6)
@ -36,15 +36,12 @@
#define bit_FMA4 (1 << 16) #define bit_FMA4 (1 << 16)
#define bit_TBM (1 << 21) #define bit_TBM (1 << 21)
#define bit_MWAITX (1 << 29) #define bit_MWAITX (1 << 29)
#define bit_MMXEXT (1 << 22) #define bit_MMXEXT (1 << 22)
#define bit_LM (1 << 29) #define bit_LM (1 << 29)
#define bit_3DNOWP (1 << 30) #define bit_3DNOWP (1 << 30)
#define bit_3DNOW (1u << 31) #define bit_3DNOW (1u << 31)
#define bit_CLZERO (1 << 0) #define bit_CLZERO (1 << 0)
#define bit_WBNOINVD (1 << 9) #define bit_WBNOINVD (1 << 9)
#define bit_FSGSBASE (1 << 0) #define bit_FSGSBASE (1 << 0)
#define bit_SGX (1 << 2) #define bit_SGX (1 << 2)
#define bit_BMI (1 << 3) #define bit_BMI (1 << 3)
@ -66,7 +63,6 @@
#define bit_SHA (1 << 29) #define bit_SHA (1 << 29)
#define bit_AVX512BW (1 << 30) #define bit_AVX512BW (1 << 30)
#define bit_AVX512VL (1u << 31) #define bit_AVX512VL (1u << 31)
#define bit_PREFETCHWT1 (1 << 0) #define bit_PREFETCHWT1 (1 << 0)
#define bit_AVX512VBMI (1 << 1) #define bit_AVX512VBMI (1 << 1)
#define bit_PKU (1 << 3) #define bit_PKU (1 << 3)
@ -83,97 +79,76 @@
#define bit_RDPID (1 << 22) #define bit_RDPID (1 << 22)
#define bit_MOVDIRI (1 << 27) #define bit_MOVDIRI (1 << 27)
#define bit_MOVDIR64B (1 << 28) #define bit_MOVDIR64B (1 << 28)
#define bit_ENQCMD (1 << 29)
#define bit_CLDEMOTE (1 << 25) #define bit_CLDEMOTE (1 << 25)
#define bit_KL (1 << 23)
#define bit_AVX5124VNNIW (1 << 2) #define bit_AVX5124VNNIW (1 << 2)
#define bit_AVX5124FMAPS (1 << 3) #define bit_AVX5124FMAPS (1 << 3)
#define bit_AVX512VP2INTERSECT (1 << 8)
#define bit_IBT (1 << 20) #define bit_IBT (1 << 20)
#define bit_UINTR (1 << 5)
#define bit_PCONFIG (1 << 18) #define bit_PCONFIG (1 << 18)
#define bit_SERIALIZE (1 << 14)
#define bit_TSXLDTRK (1 << 16)
#define bit_AMX_BF16 (1 << 22)
#define bit_AMX_TILE (1 << 24)
#define bit_AMX_INT8 (1 << 25)
#define bit_BNDREGS (1 << 3) #define bit_BNDREGS (1 << 3)
#define bit_BNDCSR (1 << 4) #define bit_BNDCSR (1 << 4)
#define bit_XSAVEOPT (1 << 0) #define bit_XSAVEOPT (1 << 0)
#define bit_XSAVEC (1 << 1) #define bit_XSAVEC (1 << 1)
#define bit_XSAVES (1 << 3) #define bit_XSAVES (1 << 3)
#define bit_PTWRITE (1 << 4) #define bit_PTWRITE (1 << 4)
#define bit_AESKLE ( 1<<0 )
#define bit_WIDEKL ( 1<<2 )
#define signature_AMD_ebx 0x68747541 #define signature_AMD_ebx 0x68747541
#define signature_AMD_ecx 0x444d4163 #define signature_AMD_ecx 0x444d4163
#define signature_AMD_edx 0x69746e65 #define signature_AMD_edx 0x69746e65
#define signature_CENTAUR_ebx 0x746e6543 #define signature_CENTAUR_ebx 0x746e6543
#define signature_CENTAUR_ecx 0x736c7561 #define signature_CENTAUR_ecx 0x736c7561
#define signature_CENTAUR_edx 0x48727561 #define signature_CENTAUR_edx 0x48727561
#define signature_CYRIX_ebx 0x69727943 #define signature_CYRIX_ebx 0x69727943
#define signature_CYRIX_ecx 0x64616574 #define signature_CYRIX_ecx 0x64616574
#define signature_CYRIX_edx 0x736e4978 #define signature_CYRIX_edx 0x736e4978
#define signature_INTEL_ebx 0x756e6547 #define signature_INTEL_ebx 0x756e6547
#define signature_INTEL_ecx 0x6c65746e #define signature_INTEL_ecx 0x6c65746e
#define signature_INTEL_edx 0x49656e69 #define signature_INTEL_edx 0x49656e69
#define signature_TM1_ebx 0x6e617254 #define signature_TM1_ebx 0x6e617254
#define signature_TM1_ecx 0x55504361 #define signature_TM1_ecx 0x55504361
#define signature_TM1_edx 0x74656d73 #define signature_TM1_edx 0x74656d73
#define signature_TM2_ebx 0x756e6547 #define signature_TM2_ebx 0x756e6547
#define signature_TM2_ecx 0x3638784d #define signature_TM2_ecx 0x3638784d
#define signature_TM2_edx 0x54656e69 #define signature_TM2_edx 0x54656e69
#define signature_NSC_ebx 0x646f6547 #define signature_NSC_ebx 0x646f6547
#define signature_NSC_ecx 0x43534e20 #define signature_NSC_ecx 0x43534e20
#define signature_NSC_edx 0x79622065 #define signature_NSC_edx 0x79622065
#define signature_NEXGEN_ebx 0x4778654e #define signature_NEXGEN_ebx 0x4778654e
#define signature_NEXGEN_ecx 0x6e657669 #define signature_NEXGEN_ecx 0x6e657669
#define signature_NEXGEN_edx 0x72446e65 #define signature_NEXGEN_edx 0x72446e65
#define signature_RISE_ebx 0x65736952 #define signature_RISE_ebx 0x65736952
#define signature_RISE_ecx 0x65736952 #define signature_RISE_ecx 0x65736952
#define signature_RISE_edx 0x65736952 #define signature_RISE_edx 0x65736952
#define signature_SIS_ebx 0x20536953 #define signature_SIS_ebx 0x20536953
#define signature_SIS_ecx 0x20536953 #define signature_SIS_ecx 0x20536953
#define signature_SIS_edx 0x20536953 #define signature_SIS_edx 0x20536953
#define signature_UMC_ebx 0x20434d55 #define signature_UMC_ebx 0x20434d55
#define signature_UMC_ecx 0x20434d55 #define signature_UMC_ecx 0x20434d55
#define signature_UMC_edx 0x20434d55 #define signature_UMC_edx 0x20434d55
#define signature_VIA_ebx 0x20414956 #define signature_VIA_ebx 0x20414956
#define signature_VIA_ecx 0x20414956 #define signature_VIA_ecx 0x20414956
#define signature_VIA_edx 0x20414956 #define signature_VIA_edx 0x20414956
#define signature_VORTEX_ebx 0x74726f56 #define signature_VORTEX_ebx 0x74726f56
#define signature_VORTEX_ecx 0x436f5320 #define signature_VORTEX_ecx 0x436f5320
#define signature_VORTEX_edx 0x36387865 #define signature_VORTEX_edx 0x36387865
#ifndef __x86_64__ #ifndef __x86_64__
#define __cpuid(level, a, b, c, d) do { if (__builtin_constant_p (level) && (level) != 1) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level)); else __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level), "1" (0), "2" (0)); } while (0)
#define __cpuid(level, a, b, c, d) \
do { \
if (__builtin_constant_p(level) && (level) != 1) \
__asm__("cpuid\n\t" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(level)); \
else \
__asm__("cpuid\n\t" \
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \
: "0"(level), "1"(0), "2"(0)); \
} while (0)
#else #else
#define __cpuid(level, a, b, c, d) \ #define __cpuid(level, a, b, c, d) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level))
__asm__("cpuid\n\t" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(level))
#endif #endif
#define __cpuid_count(level, count, a, b, c, d) __asm__ __volatile__ ("cpuid\n\t" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "0" (level), "2" (count))
#define __cpuid_count(level, count, a, b, c, d) \ static __inline unsigned int
__asm__("cpuid\n\t" \ __get_cpuid_max (unsigned int __ext, unsigned int *__sig)
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) \ {
: "0"(level), "2"(count))
static __inline unsigned int __get_cpuid_max(unsigned int __ext,
unsigned int *__sig) {
unsigned int __eax, __ebx, __ecx, __edx; unsigned int __eax, __ebx, __ecx, __edx;
#ifndef __x86_64__ #ifndef __x86_64__
#if __GNUC__ >= 3 #if __GNUC__ >= 3
@ -203,35 +178,43 @@ static __inline unsigned int __get_cpuid_max(unsigned int __ext,
: "=&r" (__eax), "=&r" (__ebx) : "=&r" (__eax), "=&r" (__ebx)
: "i" (0x00200000)); : "i" (0x00200000));
#endif #endif
if (!((__eax ^ __ebx) & 0x00200000)) return 0; if (!((__eax ^ __ebx) & 0x00200000))
return 0;
#endif #endif
__cpuid (__ext, __eax, __ebx, __ecx, __edx); __cpuid (__ext, __eax, __ebx, __ecx, __edx);
if (__sig) *__sig = __ebx; if (__sig)
*__sig = __ebx;
return __eax; return __eax;
} }
static __inline int
static __inline int __get_cpuid(unsigned int __leaf, unsigned int *__eax, __get_cpuid (unsigned int __leaf,
unsigned int *__ebx, unsigned int *__ecx, unsigned int *__eax, unsigned int *__ebx,
unsigned int *__edx) { unsigned int *__ecx, unsigned int *__edx)
{
unsigned int __ext = __leaf & 0x80000000; unsigned int __ext = __leaf & 0x80000000;
unsigned int __maxlevel = __get_cpuid_max (__ext, 0); unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
if (__maxlevel == 0 || __maxlevel < __leaf) return 0; if (__maxlevel == 0 || __maxlevel < __leaf)
return 0;
__cpuid (__leaf, *__eax, *__ebx, *__ecx, *__edx); __cpuid (__leaf, *__eax, *__ebx, *__ecx, *__edx);
return 1; return 1;
} }
static __inline int
static __inline int __get_cpuid_count(unsigned int __leaf, __get_cpuid_count (unsigned int __leaf, unsigned int __subleaf,
unsigned int __subleaf,
unsigned int *__eax, unsigned int *__ebx, unsigned int *__eax, unsigned int *__ebx,
unsigned int *__ecx, unsigned int *__ecx, unsigned int *__edx)
unsigned int *__edx) { {
unsigned int __ext = __leaf & 0x80000000; unsigned int __ext = __leaf & 0x80000000;
unsigned int __maxlevel = __get_cpuid_max (__ext, 0); unsigned int __maxlevel = __get_cpuid_max (__ext, 0);
if (__maxlevel == 0 || __maxlevel < __leaf) return 0; if (__maxlevel == 0 || __maxlevel < __leaf)
return 0;
__cpuid_count (__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx); __cpuid_count (__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx);
return 1; return 1;
} }
static __inline void
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */ __cpuidex (int __cpuid_info[4], int __leaf, int __subleaf)
#endif /* __x86_64__ */ {
#endif /* COSMOPOLITAN_THIRD_PARTY_INTEL_CPUID_INTERNAL_H_ */ __cpuid_count (__leaf, __subleaf, __cpuid_info[0], __cpuid_info[1],
__cpuid_info[2], __cpuid_info[3]);
}
#endif
#endif

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,30 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <enqcmdintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _ENQCMDINTRIN_H_INCLUDED
#define _ENQCMDINTRIN_H_INCLUDED
#ifndef __ENQCMD__
#pragma GCC push_options
#pragma GCC target ("enqcmd")
#define __DISABLE_ENQCMD__
#endif
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_enqcmd (void * __P, const void * __Q)
{
return __builtin_ia32_enqcmd (__P, __Q);
}
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_enqcmds (void * __P, const void * __Q)
{
return __builtin_ia32_enqcmds (__P, __Q);
}
#ifdef __DISABLE_ENQCMD__
#undef __DISABLE_ENQCMD__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -1,75 +1,58 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED #if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
#error \ # error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
"Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
#endif #endif
#ifndef _F16CINTRIN_H_INCLUDED #ifndef _F16CINTRIN_H_INCLUDED
#define _F16CINTRIN_H_INCLUDED #define _F16CINTRIN_H_INCLUDED
#ifndef __F16C__ #ifndef __F16C__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("f16c") #pragma GCC target("f16c")
#define __DISABLE_F16C__ #define __DISABLE_F16C__
#endif /* __F16C__ */ #endif
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline float _cvtsh_ss(unsigned short __S) { _cvtsh_ss (unsigned short __S)
{
__v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 }; __v8hi __H = __extension__ (__v8hi){ (short) __S, 0, 0, 0, 0, 0, 0, 0 };
__v4sf __A = __builtin_ia32_vcvtph2ps (__H); __v4sf __A = __builtin_ia32_vcvtph2ps (__H);
return __builtin_ia32_vec_ext_v4sf (__A, 0); return __builtin_ia32_vec_ext_v4sf (__A, 0);
} }
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
/** _mm_cvtph_ps (__m128i __A)
* Converts four half-precision (16-bit) floating point values to {
* single-precision floating point values.
*/
__funline __m128 _mm_cvtph_ps(__m128i __A) {
return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A); return (__m128) __builtin_ia32_vcvtph2ps ((__v8hi) __A);
} }
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
/** _mm256_cvtph_ps (__m128i __A)
* Converts eight half-precision (16-bit) floating point values to {
* single-precision floating point values.
*/
__funline __m256 _mm256_cvtph_ps(__m128i __A) {
return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A); return (__m256) __builtin_ia32_vcvtph2ps256 ((__v8hi) __A);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline unsigned short _cvtss_sh(float __F, const int __I) { extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_cvtss_sh (float __F, const int __I)
{
__v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 };
__v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I);
return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); return (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0);
} }
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m128i _mm_cvtps_ph(__m128 __A, const int __I) { _mm_cvtps_ph (__m128 __A, const int __I)
{
return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I); return (__m128i) __builtin_ia32_vcvtps2ph ((__v4sf) __A, __I);
} }
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
/** _mm256_cvtps_ph (__m256 __A, const int __I)
* Converts eight single-precision floating point values to {
* half-precision (16-bit) floating point values.
*/
__funline __m128i _mm256_cvtps_ph(__m256 __A, const int __I) {
return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I); return (__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf) __A, __I);
} }
#else #else
#define _cvtss_sh(__F, __I) \ #define _cvtss_sh(__F, __I) (__extension__ ({ __v4sf __A = __extension__ (__v4sf){ __F, 0, 0, 0 }; __v8hi __H = __builtin_ia32_vcvtps2ph (__A, __I); (unsigned short) __builtin_ia32_vec_ext_v8hi (__H, 0); }))
(__extension__({ \ #define _mm_cvtps_ph(A, I) ((__m128i) __builtin_ia32_vcvtps2ph ((__v4sf)(__m128) (A), (int) (I)))
__v4sf __A = __extension__(__v4sf){__F, 0, 0, 0}; \ #define _mm256_cvtps_ph(A, I) ((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) (A), (int) (I)))
__v8hi __H = __builtin_ia32_vcvtps2ph(__A, __I); \ #endif
(unsigned short)__builtin_ia32_vec_ext_v8hi(__H, 0); \
}))
#define _mm_cvtps_ph(A, I) \
((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)A, (int)(I)))
#define _mm256_cvtps_ph(A, I) \
((__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)A, (int)(I)))
#endif /* __OPTIMIZE */
#ifdef __DISABLE_F16C__ #ifdef __DISABLE_F16C__
#undef __DISABLE_F16C__ #undef __DISABLE_F16C__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_F16C__ */ #endif
#endif
#endif /* _F16CINTRIN_H_INCLUDED */ #endif

View file

@ -1,184 +1,179 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86INTRIN_H_INCLUDED #ifndef _X86INTRIN_H_INCLUDED
# error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead." # error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
#endif #endif
#ifndef _FMA4INTRIN_H_INCLUDED #ifndef _FMA4INTRIN_H_INCLUDED
#define _FMA4INTRIN_H_INCLUDED #define _FMA4INTRIN_H_INCLUDED
#include "third_party/intel/ammintrin.internal.h" #include "third_party/intel/ammintrin.internal.h"
#ifndef __FMA4__ #ifndef __FMA4__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("fma4") #pragma GCC target("fma4")
#define __DISABLE_FMA4__ #define __DISABLE_FMA4__
#endif /* __FMA4__ */ #endif
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { _mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
} }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m128d _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { _mm_macc_pd (__m128d __A, __m128d __B, __m128d __C)
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, {
(__v2df)__C); return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
} }
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { _mm_macc_ss (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
} }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m128d _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { _mm_macc_sd (__m128d __A, __m128d __B, __m128d __C)
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
__funline __m128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
{ {
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
-(__v4sf)__C);
} }
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m128d _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { _mm_msub_ps (__m128 __A, __m128 __B, __m128 __C)
return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
__funline __m128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
__funline __m128d _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
__funline __m128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
__funline __m128d _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
__funline __m128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
__funline __m128d _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
__funline __m128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
__funline __m128d _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
__funline __m128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
__funline __m128d _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
__funline __m128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
}
__funline __m128d _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
(__v2df)__C);
}
__funline __m128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) {
return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C);
}
__funline __m128d _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) {
return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B,
-(__v2df)__C);
}
/* 256b Floating point multiply/add type instructions. */
__funline __m256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) {
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C);
}
__funline __m256d _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) {
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B,
(__v4df)__C);
}
__funline __m256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
{ {
return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
-(__v8sf)__C);
} }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256d _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { _mm_msub_pd (__m128d __A, __m128d __B, __m128d __C)
return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, {
-(__v4df)__C); return (__m128d) __builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
} }
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { _mm_msub_ss (__m128 __A, __m128 __B, __m128 __C)
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, {
(__v8sf)__C); return (__m128) __builtin_ia32_vfmaddss ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
} }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256d _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { _mm_msub_sd (__m128d __A, __m128d __B, __m128d __C)
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, {
(__v4df)__C); return (__m128d) __builtin_ia32_vfmaddsd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
} }
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { _mm_nmacc_ps (__m128 __A, __m128 __B, __m128 __C)
return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, {
-(__v8sf)__C); return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
} }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256d _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { _mm_nmacc_pd (__m128d __A, __m128d __B, __m128d __C)
return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, {
-(__v4df)__C); return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
} }
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) { _mm_nmacc_ss (__m128 __A, __m128 __B, __m128 __C)
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, {
(__v8sf)__C); return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
} }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256d _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) { _mm_nmacc_sd (__m128d __A, __m128d __B, __m128d __C)
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, {
(__v4df)__C); return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, (__v2df)__C);
} }
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) { _mm_nmsub_ps (__m128 __A, __m128 __B, __m128 __C)
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, {
-(__v8sf)__C); return (__m128) __builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
} }
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m256d _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) { _mm_nmsub_pd (__m128d __A, __m128d __B, __m128d __C)
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, {
-(__v4df)__C); return (__m128d) __builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_ss (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_vfmaddss (-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_nmsub_sd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_vfmaddsd (-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maddsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, (__v2df)__C);
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msubadd_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_msubadd_pd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, -(__v2df)__C);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_macc_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_macc_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msub_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256) __builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msub_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d) __builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmacc_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmacc_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, (__v4df)__C);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256) __builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_nmsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d) __builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maddsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maddsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, (__v4df)__C);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msubadd_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256) __builtin_ia32_vfmaddsubps256 ((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
} }
#ifdef __DISABLE_FMA4__ #ifdef __DISABLE_FMA4__
#undef __DISABLE_FMA4__ #undef __DISABLE_FMA4__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_FMA4__ */ #endif
#endif
#endif #endif

View file

@ -1,177 +1,246 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." # error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _FMAINTRIN_H_INCLUDED #ifndef _FMAINTRIN_H_INCLUDED
#define _FMAINTRIN_H_INCLUDED #define _FMAINTRIN_H_INCLUDED
#ifndef __FMA__ #ifndef __FMA__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("fma") #pragma GCC target("fma")
#define __DISABLE_FMA__ #define __DISABLE_FMA__
#endif /* __FMA__ */ #endif
extern __inline __m128d
__funline __m128d _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m256d
__funline __m256d _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
(__v4df)__C); (__v4df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); _mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
} }
extern __inline __m256
__funline __m256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C); (__v8sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C); (__v4sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m256d
__funline __m256d _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B, return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
(__v4df)__C); (__v4df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C);
} }
extern __inline __m256
__funline __m256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B, return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C); (__v8sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B, return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C); (__v4sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfnmaddpd ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m256d
__funline __m256d _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B, return (__m256d)__builtin_ia32_vfnmaddpd256 ((__v4df)__A, (__v4df)__B,
(__v4df)__C); (__v4df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B, return (__m128)__builtin_ia32_vfnmaddps ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C); (__v4sf)__C);
} }
extern __inline __m256
__funline __m256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B, return (__m256)__builtin_ia32_vfnmaddps256 ((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C); (__v8sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfnmaddsd3 ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B, return (__m128)__builtin_ia32_vfnmaddss3 ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C); (__v4sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m256d
__funline __m256d _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B, return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
(__v4df)__C); (__v4df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B, return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C); (__v4sf)__C);
} }
extern __inline __m256
__funline __m256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B, return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
(__v8sf)__C); (__v8sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B, return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C); (__v4sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
(__v2df)__C); (__v2df)__C);
} }
extern __inline __m256d
__funline __m256d _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, _mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
(__v4df)__B,
(__v4df)__C); (__v4df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
(__v4sf)__C); (__v4sf)__C);
} }
extern __inline __m256
__funline __m256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, _mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
(__v8sf)__B,
(__v8sf)__C); (__v8sf)__C);
} }
extern __inline __m128d
__funline __m128d _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
{
return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
-(__v2df)__C); -(__v2df)__C);
} }
extern __inline __m256d
__funline __m256d _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, _mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
{
return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
(__v4df)__B,
-(__v4df)__C); -(__v4df)__C);
} }
extern __inline __m128
__funline __m128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
{
return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
-(__v4sf)__C); -(__v4sf)__C);
} }
extern __inline __m256
__funline __m256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
{
return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
(__v8sf)__B,
-(__v8sf)__C); -(__v8sf)__C);
} }
#ifdef __DISABLE_FMA__ #ifdef __DISABLE_FMA__
#undef __DISABLE_FMA__ #undef __DISABLE_FMA__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_FMA__ */ #endif
#endif
#endif #endif

View file

@ -1,37 +1,44 @@
#if !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <fxsrintrin.h> directly; include <immintrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <fxsrintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _FXSRINTRIN_H_INCLUDED #ifndef _FXSRINTRIN_H_INCLUDED
#define _FXSRINTRIN_H_INCLUDED #define _FXSRINTRIN_H_INCLUDED
#ifndef __FXSR__ #ifndef __FXSR__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("fxsr") #pragma GCC target("fxsr")
#define __DISABLE_FXSR__ #define __DISABLE_FXSR__
#endif /* __FXSR__ */ #endif
extern __inline void
__funline void _fxsave(void *__P) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxsave (void *__P)
{
__builtin_ia32_fxsave (__P); __builtin_ia32_fxsave (__P);
} }
extern __inline void
__funline void _fxrstor(void *__P) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxrstor (void *__P)
{
__builtin_ia32_fxrstor (__P); __builtin_ia32_fxrstor (__P);
} }
#ifdef __x86_64__ #ifdef __x86_64__
__funline void _fxsave64(void *__P) { extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxsave64 (void *__P)
{
__builtin_ia32_fxsave64 (__P); __builtin_ia32_fxsave64 (__P);
} }
extern __inline void
__funline void _fxrstor64(void *__P) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxrstor64 (void *__P)
{
__builtin_ia32_fxrstor64 (__P); __builtin_ia32_fxrstor64 (__P);
} }
#endif #endif
#ifdef __DISABLE_FXSR__ #ifdef __DISABLE_FXSR__
#undef __DISABLE_FXSR__ #undef __DISABLE_FXSR__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_FXSR__ */ #endif
#endif
#endif /* _FXSRINTRIN_H_INCLUDED */ #endif

View file

@ -1,311 +1,310 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead." #error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
#endif #endif
#ifndef _GFNIINTRIN_H_INCLUDED #ifndef _GFNIINTRIN_H_INCLUDED
#define _GFNIINTRIN_H_INCLUDED #define _GFNIINTRIN_H_INCLUDED
#if !defined(__GFNI__) || !defined(__SSE2__) #if !defined(__GFNI__) || !defined(__SSE2__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("gfni,sse2") #pragma GCC target("gfni,sse2")
#define __DISABLE_GFNI__ #define __DISABLE_GFNI__
#endif /* __GFNI__ */ #endif
extern __inline __m128i
__funline __m128i _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi((__v16qi)__A, (__v16qi)__B); _mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
{
return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
(__v16qi) __B);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m128i _mm_gf2p8affineinv_epi64_epi8(__m128i __A, __m128i __B, extern __inline __m128i
const int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
{
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A, return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
(__v16qi)__B, __C); (__v16qi) __B,
__C);
} }
extern __inline __m128i
__funline __m128i _mm_gf2p8affine_epi64_epi8(__m128i __A, __m128i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
const int __C) { _mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
{
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A, return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
(__v16qi) __B, __C); (__v16qi) __B, __C);
} }
#else #else
#define _mm_gf2p8affineinv_epi64_epi8(A, B, C) \ #define _mm_gf2p8affineinv_epi64_epi8(A, B, C) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi( \ #define _mm_gf2p8affine_epi64_epi8(A, B, C) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
(__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
#define _mm_gf2p8affine_epi64_epi8(A, B, C) \
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi( \
(__v16qi)(__m128i)(A), (__v16qi)(__m128i)(B), (int)(C)))
#endif #endif
#ifdef __DISABLE_GFNI__ #ifdef __DISABLE_GFNI__
#undef __DISABLE_GFNI__ #undef __DISABLE_GFNI__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_GFNI__ */ #endif
#if !defined(__GFNI__) || !defined(__AVX__) #if !defined(__GFNI__) || !defined(__AVX__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("gfni,avx") #pragma GCC target("gfni,avx")
#define __DISABLE_GFNIAVX__ #define __DISABLE_GFNIAVX__
#endif /* __GFNIAVX__ */ #endif
extern __inline __m256i
__funline __m256i _mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi((__v32qi)__A, (__v32qi)__B); _mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
{
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
(__v32qi) __B);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m256i _mm256_gf2p8affineinv_epi64_epi8(__m256i __A, __m256i __B, extern __inline __m256i
const int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
{
return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A, return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
(__v32qi)__B, __C); (__v32qi) __B,
__C);
} }
extern __inline __m256i
__funline __m256i _mm256_gf2p8affine_epi64_epi8(__m256i __A, __m256i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
const int __C) { _mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
{
return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A, return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
(__v32qi) __B, __C); (__v32qi) __B, __C);
} }
#else #else
#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) \ #define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi( \ #define _mm256_gf2p8affine_epi64_epi8(A, B, C) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A), ( __v32qi)(__m256i)(B), (int)(C)))
(__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
#define _mm256_gf2p8affine_epi64_epi8(A, B, C) \
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi( \
(__v32qi)(__m256i)(A), (__v32qi)(__m256i)(B), (int)(C)))
#endif #endif
#ifdef __DISABLE_GFNIAVX__ #ifdef __DISABLE_GFNIAVX__
#undef __DISABLE_GFNIAVX__ #undef __DISABLE_GFNIAVX__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __GFNIAVX__ */ #endif
#if !defined(__GFNI__) || !defined(__AVX512VL__) #if !defined(__GFNI__) || !defined(__AVX512VL__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("gfni,avx512vl") #pragma GCC target("gfni,avx512vl")
#define __DISABLE_GFNIAVX512VL__ #define __DISABLE_GFNIAVX512VL__
#endif /* __GFNIAVX512VL__ */ #endif
extern __inline __m128i
__funline __m128i _mm_mask_gf2p8mul_epi8(__m128i __A, __mmask16 __B, __m128i __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128i __D) { _mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask( {
(__v16qi)__C, (__v16qi)__D, (__v16qi)__A, __B); return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
(__v16qi) __D,
(__v16qi)__A, __B);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_gf2p8mul_epi8(__mmask16 __A, __m128i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128i __C) { _mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
return (__m128i)__builtin_ia32_vgf2p8mulb_v16qi_mask( {
(__v16qi)__B, (__v16qi)__C, (__v16qi)_mm_setzero_si128(), __A); return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m128i _mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A, __mmask16 __B, extern __inline __m128i
__m128i __C, __m128i __D, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
const int __E) { _mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( __m128i __D, const int __E)
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B); {
return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
(__v16qi) __D,
__E,
(__v16qi)__A,
__B);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A, __m128i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128i __C, _mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
const int __D) { const int __D)
return (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( {
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A); return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
(__v16qi) __C, __D,
(__v16qi) _mm_setzero_si128 (),
__A);
} }
extern __inline __m128i
__funline __m128i _mm_mask_gf2p8affine_epi64_epi8(__m128i __A, __mmask16 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128i __C, __m128i __D, _mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
const int __E) { __m128i __D, const int __E)
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( {
(__v16qi)__C, (__v16qi)__D, __E, (__v16qi)__A, __B); return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
(__v16qi) __D, __E, (__v16qi)__A, __B);
} }
extern __inline __m128i
__funline __m128i _mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A, __m128i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m128i __C, const int __D) { _mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
return (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( const int __D)
(__v16qi)__B, (__v16qi)__C, __D, (__v16qi)_mm_setzero_si128(), __A); {
return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
(__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
} }
#else #else
#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ #define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( (__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \ #define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( (__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), \ #define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
(__v16qi)(__m128i)(A), (__mmask16)(B))) #define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), \
(__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)(A)))
#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( \
(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), (int)(E), \
(__v16qi)(__m128i)(A), (__mmask16)(B)))
#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
((__m128i)__builtin_ia32_vgf2p8affineqb_v16qi_mask( \
(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), (int)(D), \
(__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)(A)))
#endif #endif
#ifdef __DISABLE_GFNIAVX512VL__ #ifdef __DISABLE_GFNIAVX512VL__
#undef __DISABLE_GFNIAVX512VL__ #undef __DISABLE_GFNIAVX512VL__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __GFNIAVX512VL__ */ #endif
#if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__) #if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("gfni,avx512vl,avx512bw") #pragma GCC target("gfni,avx512vl,avx512bw")
#define __DISABLE_GFNIAVX512VLBW__ #define __DISABLE_GFNIAVX512VLBW__
#endif /* __GFNIAVX512VLBW__ */ #endif
extern __inline __m256i
__funline __m256i _mm256_mask_gf2p8mul_epi8(__m256i __A, __mmask32 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C, __m256i __D) { _mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask( __m256i __D)
(__v32qi)__C, (__v32qi)__D, (__v32qi)__A, __B); {
return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
(__v32qi) __D,
(__v32qi)__A, __B);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_gf2p8mul_epi8(__mmask32 __A, __m256i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C) { _mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
return (__m256i)__builtin_ia32_vgf2p8mulb_v32qi_mask( {
(__v32qi)__B, (__v32qi)__C, (__v32qi)_mm256_setzero_si256(), __A); return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m256i _mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A, extern __inline __m256i
__mmask32 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C, __m256i __D, _mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
const int __E) { __m256i __C, __m256i __D, const int __E)
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( {
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B); return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
(__v32qi) __D,
__E,
(__v32qi)__A,
__B);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __B, __m256i __C, _mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
const int __D) { __m256i __C, const int __D)
return (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( {
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A); return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
(__v32qi) __C, __D,
(__v32qi) _mm256_setzero_si256 (), __A);
} }
extern __inline __m256i
__funline __m256i _mm256_mask_gf2p8affine_epi64_epi8(__m256i __A, __mmask32 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C, __m256i __D, _mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
const int __E) { __m256i __D, const int __E)
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( {
(__v32qi)__C, (__v32qi)__D, __E, (__v32qi)__A, __B); return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
(__v32qi) __D,
__E,
(__v32qi)__A,
__B);
} }
extern __inline __m256i
__funline __m256i _mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A, __m256i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m256i __C, _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
const int __D) { __m256i __C, const int __D)
return (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( {
(__v32qi)__B, (__v32qi)__C, __D, (__v32qi)_mm256_setzero_si256(), __A); return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
} }
#else #else
#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ #define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( (__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \ #define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( (__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \ #define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
(__v32qi)(__m256i)(A), (__mmask32)(B))) #define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
((__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
(__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)(A)))
#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( \
(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \
(__v32qi)(__m256i)(A), (__mmask32)(B)))
#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
((__m256i)__builtin_ia32_vgf2p8affineqb_v32qi_mask( \
(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \
(__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)(A)))
#endif #endif
#ifdef __DISABLE_GFNIAVX512VLBW__ #ifdef __DISABLE_GFNIAVX512VLBW__
#undef __DISABLE_GFNIAVX512VLBW__ #undef __DISABLE_GFNIAVX512VLBW__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __GFNIAVX512VLBW__ */ #endif
#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__) #if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("gfni,avx512f,avx512bw") #pragma GCC target("gfni,avx512f,avx512bw")
#define __DISABLE_GFNIAVX512FBW__ #define __DISABLE_GFNIAVX512FBW__
#endif /* __GFNIAVX512FBW__ */ #endif
extern __inline __m512i
__funline __m512i _mm512_mask_gf2p8mul_epi8(__m512i __A, __mmask64 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D) { _mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask( __m512i __D)
(__v64qi)__C, (__v64qi)__D, (__v64qi)__A, __B); {
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
(__v64qi) __D, (__v64qi)__A, __B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_gf2p8mul_epi8(__mmask64 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C) { _mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi_mask( {
(__v64qi)__B, (__v64qi)__C, (__v64qi)_mm512_setzero_si512(), __A); return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
} }
__funline __m512i _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) { extern __inline __m512i
return (__m512i)__builtin_ia32_vgf2p8mulb_v64qi((__v64qi)__A, (__v64qi)__B); __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
(__v64qi) __B);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline __m512i _mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A, extern __inline __m512i
__mmask64 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D, _mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
const int __E) { __m512i __D, const int __E)
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( {
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B); return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
(__v64qi) __D,
__E,
(__v64qi)__A,
__B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __B, __m512i __C, _mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
const int __D) { __m512i __C, const int __D)
return (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( {
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A); return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
(__v64qi) __C, __D,
(__v64qi) _mm512_setzero_si512 (), __A);
} }
extern __inline __m512i
__funline __m512i _mm512_gf2p8affineinv_epi64_epi8(__m512i __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
const int __C) { _mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
{
return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A, return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
(__v64qi) __B, __C); (__v64qi) __B, __C);
} }
extern __inline __m512i
__funline __m512i _mm512_mask_gf2p8affine_epi64_epi8(__m512i __A, __mmask64 __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, __m512i __D, _mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
const int __E) { __m512i __D, const int __E)
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( {
(__v64qi)__C, (__v64qi)__D, __E, (__v64qi)__A, __B); return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
(__v64qi) __D, __E, (__v64qi)__A, __B);
} }
extern __inline __m512i
__funline __m512i _mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A, __m512i __B, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__m512i __C, _mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
const int __D) { const int __D)
return (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( {
(__v64qi)__B, (__v64qi)__C, __D, (__v64qi)_mm512_setzero_si512(), __A); return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
(__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
} }
__funline __m512i _mm512_gf2p8affine_epi64_epi8(__m512i __A, __m512i __B, extern __inline __m512i
const int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
{
return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A, return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
(__v64qi) __B, __C); (__v64qi) __B, __C);
} }
#else #else
#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ #define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( (__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \ #define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( (__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \ #define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
(__v64qi)(__m512i)(A), (__mmask64)(B))) #define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \ #define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \ #define _mm512_gf2p8affine_epi64_epi8(A, B, C) ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
(__v64qi)(__m512i)_mm512_setzero_si512(), (__mmask64)(A)))
#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \
((__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi( \
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( \
(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \
(__v64qi)(__m512i)(A), (__mmask64)(B)))
#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi_mask( \
(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \
(__v64qi)(__m512i)_mm512_setzero_si512(), (__mmask64)(A)))
#define _mm512_gf2p8affine_epi64_epi8(A, B, C) \
((__m512i)__builtin_ia32_vgf2p8affineqb_v64qi( \
(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
#endif #endif
#ifdef __DISABLE_GFNIAVX512FBW__ #ifdef __DISABLE_GFNIAVX512FBW__
#undef __DISABLE_GFNIAVX512FBW__ #undef __DISABLE_GFNIAVX512FBW__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __GFNIAVX512FBW__ */ #endif
#endif
#endif /* _GFNIINTRIN_H_INCLUDED */ #endif

View file

@ -0,0 +1,24 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _X86GPRINTRIN_H_INCLUDED
# error "Never use <hresetintrin.h> directly; include <x86gprintrin.h> instead."
#endif
#ifndef _HRESETINTRIN_H_INCLUDED
#define _HRESETINTRIN_H_INCLUDED
#ifndef __HRESET__
#pragma GCC push_options
#pragma GCC target ("hreset")
#define __DISABLE_HRESET__
#endif
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_hreset (unsigned int __EAX)
{
__builtin_ia32_hreset (__EAX);
}
#ifdef __DISABLE_HRESET__
#undef __DISABLE_HRESET__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -1,166 +1,199 @@
#ifndef _X86INTRIN_H_INCLUDED /* clang-format off */
#error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <ia32intrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
extern __inline int
__funline int __bsfd(int __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsfd (int __X)
{
return __builtin_ctz (__X); return __builtin_ctz (__X);
} }
extern __inline int
__funline int __bsrd(int __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsrd (int __X)
{
return __builtin_ia32_bsrsi (__X); return __builtin_ia32_bsrsi (__X);
} }
extern __inline int
__funline int __bswapd(int __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bswapd (int __X)
{
return __builtin_bswap32 (__X); return __builtin_bswap32 (__X);
} }
#ifndef __iamcu__ #ifndef __iamcu__
#ifndef __SSE4_2__ #ifndef __SSE4_2__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("sse4.2") #pragma GCC target("sse4.2")
#define __DISABLE_SSE4_2__ #define __DISABLE_SSE4_2__
#endif /* __SSE4_2__ */ #endif
extern __inline unsigned int
__funline unsigned int __crc32b(unsigned int __C, unsigned char __V) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32b (unsigned int __C, unsigned char __V)
{
return __builtin_ia32_crc32qi (__C, __V); return __builtin_ia32_crc32qi (__C, __V);
} }
extern __inline unsigned int
__funline unsigned int __crc32w(unsigned int __C, unsigned short __V) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32w (unsigned int __C, unsigned short __V)
{
return __builtin_ia32_crc32hi (__C, __V); return __builtin_ia32_crc32hi (__C, __V);
} }
extern __inline unsigned int
__funline unsigned int __crc32d(unsigned int __C, unsigned int __V) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__crc32d (unsigned int __C, unsigned int __V)
{
return __builtin_ia32_crc32si (__C, __V); return __builtin_ia32_crc32si (__C, __V);
} }
#ifdef __DISABLE_SSE4_2__ #ifdef __DISABLE_SSE4_2__
#undef __DISABLE_SSE4_2__ #undef __DISABLE_SSE4_2__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_SSE4_2__ */ #endif
#endif
#endif /* __iamcu__ */ extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline int __popcntd(unsigned int __X) { __popcntd (unsigned int __X)
{
return __builtin_popcount (__X); return __builtin_popcount (__X);
} }
#ifndef __iamcu__ #ifndef __iamcu__
extern __inline unsigned long long
__funline unsigned long long __rdpmc(int __S) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rdpmc (int __S)
{
return __builtin_ia32_rdpmc (__S); return __builtin_ia32_rdpmc (__S);
} }
#endif
#endif /* __iamcu__ */ #define __rdtsc() __builtin_ia32_rdtsc ()
__funline unsigned long long __rdtsc(void) {
return __builtin_ia32_rdtsc();
}
#ifndef __iamcu__ #ifndef __iamcu__
#define __rdtscp(a) __builtin_ia32_rdtscp (a)
__funline unsigned long long __rdtscp(unsigned int *__A) { #endif
return __builtin_ia32_rdtscp(__A); extern __inline unsigned char
} __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolb (unsigned char __X, int __C)
#endif /* __iamcu__ */ {
__funline unsigned char __rolb(unsigned char __X, int __C) {
return __builtin_ia32_rolqi (__X, __C); return __builtin_ia32_rolqi (__X, __C);
} }
extern __inline unsigned short
__funline unsigned short __rolw(unsigned short __X, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolw (unsigned short __X, int __C)
{
return __builtin_ia32_rolhi (__X, __C); return __builtin_ia32_rolhi (__X, __C);
} }
extern __inline unsigned int
__funline unsigned int __rold(unsigned int __X, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rold (unsigned int __X, int __C)
{
__C &= 31; __C &= 31;
return (__X << __C) | (__X >> (-__C & 31)); return (__X << __C) | (__X >> (-__C & 31));
} }
extern __inline unsigned char
__funline unsigned char __rorb(unsigned char __X, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorb (unsigned char __X, int __C)
{
return __builtin_ia32_rorqi (__X, __C); return __builtin_ia32_rorqi (__X, __C);
} }
extern __inline unsigned short
__funline unsigned short __rorw(unsigned short __X, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorw (unsigned short __X, int __C)
{
return __builtin_ia32_rorhi (__X, __C); return __builtin_ia32_rorhi (__X, __C);
} }
extern __inline unsigned int
__funline unsigned int __rord(unsigned int __X, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rord (unsigned int __X, int __C)
{
__C &= 31; __C &= 31;
return (__X >> __C) | (__X << (-__C & 31)); return (__X >> __C) | (__X << (-__C & 31));
} }
extern __inline void
__funline void __pause(void) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__pause (void)
{
__builtin_ia32_pause (); __builtin_ia32_pause ();
} }
#ifdef __x86_64__ #ifdef __x86_64__
extern __inline int
__funline int __bsfq(long long __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsfq (long long __X)
{
return __builtin_ctzll (__X); return __builtin_ctzll (__X);
} }
extern __inline int
__funline int __bsrq(long long __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bsrq (long long __X)
{
return __builtin_ia32_bsrdi (__X); return __builtin_ia32_bsrdi (__X);
} }
extern __inline long long
__funline long long __bswapq(long long __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bswapq (long long __X)
{
return __builtin_bswap64 (__X); return __builtin_bswap64 (__X);
} }
#ifndef __SSE4_2__ #ifndef __SSE4_2__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("sse4.2") #pragma GCC target("sse4.2")
#define __DISABLE_SSE4_2__ #define __DISABLE_SSE4_2__
#endif /* __SSE4_2__ */ #endif
extern __inline unsigned long long
__funline unsigned long long __crc32q(unsigned long long __C, __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned long long __V) { __crc32q (unsigned long long __C, unsigned long long __V)
{
return __builtin_ia32_crc32di (__C, __V); return __builtin_ia32_crc32di (__C, __V);
} }
#ifdef __DISABLE_SSE4_2__ #ifdef __DISABLE_SSE4_2__
#undef __DISABLE_SSE4_2__ #undef __DISABLE_SSE4_2__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_SSE4_2__ */ #endif
extern __inline long long
__funline long long __popcntq(unsigned long long __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__popcntq (unsigned long long __X)
{
return __builtin_popcountll (__X); return __builtin_popcountll (__X);
} }
extern __inline unsigned long long
__funline unsigned long long __rolq(unsigned long long __X, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rolq (unsigned long long __X, int __C)
{
__C &= 63; __C &= 63;
return (__X << __C) | (__X >> (-__C & 63)); return (__X << __C) | (__X >> (-__C & 63));
} }
extern __inline unsigned long long
__funline unsigned long long __rorq(unsigned long long __X, int __C) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__rorq (unsigned long long __X, int __C)
{
__C &= 63; __C &= 63;
return (__X >> __C) | (__X << (-__C & 63)); return (__X >> __C) | (__X << (-__C & 63));
} }
extern __inline unsigned long long
__funline unsigned long long __readeflags(void) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__readeflags (void)
{
return __builtin_ia32_readeflags_u64 (); return __builtin_ia32_readeflags_u64 ();
} }
extern __inline void
__funline void __writeeflags(unsigned long long __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__writeeflags (unsigned long long __X)
{
__builtin_ia32_writeeflags_u64 (__X); __builtin_ia32_writeeflags_u64 (__X);
} }
#define _bswap64(a) __bswapq(a) #define _bswap64(a) __bswapq(a)
#define _popcnt64(a) __popcntq(a) #define _popcnt64(a) __popcntq(a)
#else #else
extern __inline unsigned int
__funline unsigned int __readeflags(void) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__readeflags (void)
{
return __builtin_ia32_readeflags_u32 (); return __builtin_ia32_readeflags_u32 ();
} }
extern __inline void
__funline void __writeeflags(unsigned int __X) { __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__writeeflags (unsigned int __X)
{
__builtin_ia32_writeeflags_u32 (__X); __builtin_ia32_writeeflags_u32 (__X);
} }
#endif #endif
#ifdef __LP64__ #ifdef __LP64__
#define _lrotl(a,b) __rolq((a), (b)) #define _lrotl(a,b) __rolq((a), (b))
#define _lrotr(a,b) __rorq((a), (b)) #define _lrotr(a,b) __rorq((a), (b))
@ -168,7 +201,6 @@ __funline void __writeeflags(unsigned int __X) {
#define _lrotl(a,b) __rold((a), (b)) #define _lrotl(a,b) __rold((a), (b))
#define _lrotr(a,b) __rord((a), (b)) #define _lrotr(a,b) __rord((a), (b))
#endif #endif
#define _bit_scan_forward(a) __bsfd(a) #define _bit_scan_forward(a) __bsfd(a)
#define _bit_scan_reverse(a) __bsrd(a) #define _bit_scan_reverse(a) __bsrd(a)
#define _bswap(a) __bswapd(a) #define _bswap(a) __bswapd(a)
@ -176,9 +208,10 @@ __funline void __writeeflags(unsigned int __X) {
#ifndef __iamcu__ #ifndef __iamcu__
#define _rdpmc(a) __rdpmc(a) #define _rdpmc(a) __rdpmc(a)
#define _rdtscp(a) __rdtscp(a) #define _rdtscp(a) __rdtscp(a)
#endif /* __iamcu__ */ #endif
#define _rdtsc() __rdtsc() #define _rdtsc() __rdtsc()
#define _rotwl(a,b) __rolw((a), (b)) #define _rotwl(a,b) __rolw((a), (b))
#define _rotwr(a,b) __rorw((a), (b)) #define _rotwr(a,b) __rorw((a), (b))
#define _rotl(a,b) __rold((a), (b)) #define _rotl(a,b) __rold((a), (b))
#define _rotr(a,b) __rord((a), (b)) #define _rotr(a,b) __rord((a), (b))
#endif

View file

@ -1,8 +1,8 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _IMMINTRIN_H_INCLUDED #ifndef _IMMINTRIN_H_INCLUDED
#define _IMMINTRIN_H_INCLUDED #define _IMMINTRIN_H_INCLUDED
#ifdef __x86_64__ #include "third_party/intel/x86gprintrin.internal.h"
/* clang-format off */
#include "third_party/intel/mmintrin.internal.h" #include "third_party/intel/mmintrin.internal.h"
#include "third_party/intel/xmmintrin.internal.h" #include "third_party/intel/xmmintrin.internal.h"
#include "third_party/intel/emmintrin.internal.h" #include "third_party/intel/emmintrin.internal.h"
@ -10,12 +10,8 @@
#include "third_party/intel/tmmintrin.internal.h" #include "third_party/intel/tmmintrin.internal.h"
#include "third_party/intel/smmintrin.internal.h" #include "third_party/intel/smmintrin.internal.h"
#include "third_party/intel/wmmintrin.internal.h" #include "third_party/intel/wmmintrin.internal.h"
#include "third_party/intel/fxsrintrin.internal.h"
#include "third_party/intel/xsaveintrin.internal.h"
#include "third_party/intel/xsaveoptintrin.internal.h"
#include "third_party/intel/xsavesintrin.internal.h"
#include "third_party/intel/xsavecintrin.internal.h"
#include "third_party/intel/avxintrin.internal.h" #include "third_party/intel/avxintrin.internal.h"
#include "third_party/intel/avxvnniintrin.internal.h"
#include "third_party/intel/avx2intrin.internal.h" #include "third_party/intel/avx2intrin.internal.h"
#include "third_party/intel/avx512fintrin.internal.h" #include "third_party/intel/avx512fintrin.internal.h"
#include "third_party/intel/avx512erintrin.internal.h" #include "third_party/intel/avx512erintrin.internal.h"
@ -39,143 +35,21 @@
#include "third_party/intel/avx512vnnivlintrin.internal.h" #include "third_party/intel/avx512vnnivlintrin.internal.h"
#include "third_party/intel/avx512vpopcntdqvlintrin.internal.h" #include "third_party/intel/avx512vpopcntdqvlintrin.internal.h"
#include "third_party/intel/avx512bitalgintrin.internal.h" #include "third_party/intel/avx512bitalgintrin.internal.h"
#include "third_party/intel/avx512vp2intersectintrin.internal.h"
#include "third_party/intel/avx512vp2intersectvlintrin.internal.h"
#include "third_party/intel/shaintrin.internal.h" #include "third_party/intel/shaintrin.internal.h"
#include "third_party/intel/lzcntintrin.internal.h"
#include "third_party/intel/bmiintrin.internal.h"
#include "third_party/intel/bmi2intrin.internal.h"
#include "third_party/intel/fmaintrin.internal.h" #include "third_party/intel/fmaintrin.internal.h"
#include "third_party/intel/f16cintrin.internal.h" #include "third_party/intel/f16cintrin.internal.h"
#include "third_party/intel/rtmintrin.internal.h" #include "third_party/intel/rtmintrin.internal.h"
#include "third_party/intel/xtestintrin.internal.h"
#include "third_party/intel/cetintrin.internal.h"
#include "third_party/intel/gfniintrin.internal.h" #include "third_party/intel/gfniintrin.internal.h"
#include "third_party/intel/vaesintrin.internal.h" #include "third_party/intel/vaesintrin.internal.h"
#include "third_party/intel/vpclmulqdqintrin.internal.h" #include "third_party/intel/vpclmulqdqintrin.internal.h"
#include "third_party/intel/movdirintrin.internal.h" #include "third_party/intel/avx512bf16vlintrin.internal.h"
#include "third_party/intel/sgxintrin.internal.h" #include "third_party/intel/avx512bf16intrin.internal.h"
#include "third_party/intel/pconfigintrin.internal.h" #include "third_party/intel/amxtileintrin.internal.h"
#include "third_party/intel/waitpkgintrin.internal.h" #include "third_party/intel/amxint8intrin.internal.h"
#include "third_party/intel/cldemoteintrin.internal.h" #include "third_party/intel/amxbf16intrin.internal.h"
#include "third_party/intel/rdseedintrin.internal.h"
#include "third_party/intel/prfchwintrin.internal.h" #include "third_party/intel/prfchwintrin.internal.h"
#include "third_party/intel/adxintrin.internal.h" #include "third_party/intel/keylockerintrin.internal.h"
#include "third_party/intel/clwbintrin.internal.h" #endif
#include "third_party/intel/clflushoptintrin.internal.h"
#include "third_party/intel/wbnoinvdintrin.internal.h"
#include "third_party/intel/pkuintrin.internal.h"
/* clang-format on */
__funline void _wbinvd(void) {
__builtin_ia32_wbinvd();
}
#ifndef __RDRND__
#pragma GCC push_options
#pragma GCC target("rdrnd")
#define __DISABLE_RDRND__
#endif /* __RDRND__ */
__funline int _rdrand16_step(unsigned short *__P) {
return __builtin_ia32_rdrand16_step(__P);
}
__funline int _rdrand32_step(unsigned int *__P) {
return __builtin_ia32_rdrand32_step(__P);
}
#ifdef __DISABLE_RDRND__
#undef __DISABLE_RDRND__
#pragma GCC pop_options
#endif /* __DISABLE_RDRND__ */
#ifndef __RDPID__
#pragma GCC push_options
#pragma GCC target("rdpid")
#define __DISABLE_RDPID__
#endif /* __RDPID__ */
__funline unsigned int _rdpid_u32(void) {
return __builtin_ia32_rdpid();
}
#ifdef __DISABLE_RDPID__
#undef __DISABLE_RDPID__
#pragma GCC pop_options
#endif /* __DISABLE_RDPID__ */
#ifdef __x86_64__
#ifndef __FSGSBASE__
#pragma GCC push_options
#pragma GCC target("fsgsbase")
#define __DISABLE_FSGSBASE__
#endif /* __FSGSBASE__ */
__funline unsigned int _readfsbase_u32(void) {
return __builtin_ia32_rdfsbase32();
}
__funline unsigned long long _readfsbase_u64(void) {
return __builtin_ia32_rdfsbase64();
}
__funline unsigned int _readgsbase_u32(void) {
return __builtin_ia32_rdgsbase32();
}
__funline unsigned long long _readgsbase_u64(void) {
return __builtin_ia32_rdgsbase64();
}
__funline void _writefsbase_u32(unsigned int __B) {
__builtin_ia32_wrfsbase32(__B);
}
__funline void _writefsbase_u64(unsigned long long __B) {
__builtin_ia32_wrfsbase64(__B);
}
__funline void _writegsbase_u32(unsigned int __B) {
__builtin_ia32_wrgsbase32(__B);
}
__funline void _writegsbase_u64(unsigned long long __B) {
__builtin_ia32_wrgsbase64(__B);
}
#ifdef __DISABLE_FSGSBASE__
#undef __DISABLE_FSGSBASE__
#pragma GCC pop_options
#endif /* __DISABLE_FSGSBASE__ */
#ifndef __RDRND__
#pragma GCC push_options
#pragma GCC target("rdrnd")
#define __DISABLE_RDRND__
#endif /* __RDRND__ */
__funline int _rdrand64_step(unsigned long long *__P) {
return __builtin_ia32_rdrand64_step(__P);
}
#ifdef __DISABLE_RDRND__
#undef __DISABLE_RDRND__
#pragma GCC pop_options
#endif /* __DISABLE_RDRND__ */
#endif /* __x86_64__ */
#ifndef __PTWRITE__
#pragma GCC push_options
#pragma GCC target("ptwrite")
#define __DISABLE_PTWRITE__
#endif #endif
#ifdef __x86_64__
__funline void _ptwrite64(unsigned long long __B) {
__builtin_ia32_ptwrite64(__B);
}
#endif /* __x86_64__ */
__funline void _ptwrite32(unsigned __B) {
__builtin_ia32_ptwrite32(__B);
}
#ifdef __DISABLE_PTWRITE__
#undef __DISABLE_PTWRITE__
#pragma GCC pop_options
#endif /* __DISABLE_PTWRITE__ */
#endif /* __x86_64__ */
#endif /* _IMMINTRIN_H_INCLUDED */

View file

@ -0,0 +1,93 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#if !defined _IMMINTRIN_H_INCLUDED
# error "Never use <keylockerintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _KEYLOCKERINTRIN_H_INCLUDED
#define _KEYLOCKERINTRIN_H_INCLUDED
#ifndef __KL__
#pragma GCC push_options
#pragma GCC target("kl")
#define __DISABLE_KL__
#endif
extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadiwkey (unsigned int __I, __m128i __A, __m128i __B, __m128i __C)
{
__builtin_ia32_loadiwkey ((__v2di) __B, (__v2di) __C, (__v2di) __A, __I);
}
extern __inline
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_encodekey128_u32 (unsigned int __I, __m128i __A, void * __P)
{
return __builtin_ia32_encodekey128_u32 (__I, (__v2di)__A, __P);
}
extern __inline
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_encodekey256_u32 (unsigned int __I, __m128i __A, __m128i __B, void * __P)
{
return __builtin_ia32_encodekey256_u32 (__I, (__v2di)__A, (__v2di)__B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdec128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
{
return __builtin_ia32_aesdec128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdec256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
{
return __builtin_ia32_aesdec256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesenc128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
{
return __builtin_ia32_aesenc128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesenc256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
{
return __builtin_ia32_aesenc256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
}
#ifdef __DISABLE_KL__
#undef __DISABLE_KL__
#pragma GCC pop_options
#endif
#ifndef __WIDEKL__
#pragma GCC push_options
#pragma GCC target("widekl")
#define __DISABLE_WIDEKL__
#endif
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdecwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
{
return __builtin_ia32_aesdecwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdecwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
{
return __builtin_ia32_aesdecwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesencwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
{
return __builtin_ia32_aesencwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesencwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
{
return __builtin_ia32_aesencwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
}
#ifdef __DISABLE_WIDEKL__
#undef __DISABLE_WIDEKL__
#pragma GCC pop_options
#endif
#endif
#endif

View file

@ -1,73 +1,68 @@
#ifndef _X86INTRIN_H_INCLUDED /* clang-format off */
#error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <lwpintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _LWPINTRIN_H_INCLUDED #ifndef _LWPINTRIN_H_INCLUDED
#define _LWPINTRIN_H_INCLUDED #define _LWPINTRIN_H_INCLUDED
#ifndef __LWP__ #ifndef __LWP__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("lwp") #pragma GCC target("lwp")
#define __DISABLE_LWP__ #define __DISABLE_LWP__
#endif /* __LWP__ */ #endif
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void __llwpcb(void *__pcbAddress) { __llwpcb (void *__pcbAddress)
{
__builtin_ia32_llwpcb (__pcbAddress); __builtin_ia32_llwpcb (__pcbAddress);
} }
extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void *__slwpcb(void) { __slwpcb (void)
{
return __builtin_ia32_slwpcb (); return __builtin_ia32_slwpcb ();
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline void __lwpval32(unsigned int __data2, unsigned int __data1, extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned int __flags) { __lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
{
__builtin_ia32_lwpval32 (__data2, __data1, __flags); __builtin_ia32_lwpval32 (__data2, __data1, __flags);
} }
#ifdef __x86_64__ #ifdef __x86_64__
__funline void __lwpval64(unsigned long long __data2, unsigned int __data1, extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned int __flags) { __lwpval64 (unsigned long long __data2, unsigned int __data1,
unsigned int __flags)
{
__builtin_ia32_lwpval64 (__data2, __data1, __flags); __builtin_ia32_lwpval64 (__data2, __data1, __flags);
} }
#endif #endif
#else #else
#define __lwpval32(D2, D1, F) \ #define __lwpval32(D2, D1, F) (__builtin_ia32_lwpval32 ((unsigned int) (D2), (unsigned int) (D1), (unsigned int) (F)))
(__builtin_ia32_lwpval32((unsigned int)(D2), (unsigned int)(D1), \
(unsigned int)(F)))
#ifdef __x86_64__ #ifdef __x86_64__
#define __lwpval64(D2, D1, F) \ #define __lwpval64(D2, D1, F) (__builtin_ia32_lwpval64 ((unsigned long long) (D2), (unsigned int) (D1), (unsigned int) (F)))
(__builtin_ia32_lwpval64((unsigned long long)(D2), (unsigned int)(D1), \
(unsigned int)(F)))
#endif #endif
#endif #endif
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
__funline unsigned char __lwpins32(unsigned int __data2, unsigned int __data1, extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned int __flags) { __lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
{
return __builtin_ia32_lwpins32 (__data2, __data1, __flags); return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
} }
#ifdef __x86_64__ #ifdef __x86_64__
__funline unsigned char __lwpins64(unsigned long long __data2, extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
unsigned int __data1, unsigned int __flags) { __lwpins64 (unsigned long long __data2, unsigned int __data1,
unsigned int __flags)
{
return __builtin_ia32_lwpins64 (__data2, __data1, __flags); return __builtin_ia32_lwpins64 (__data2, __data1, __flags);
} }
#endif #endif
#else #else
#define __lwpins32(D2, D1, F) \ #define __lwpins32(D2, D1, F) (__builtin_ia32_lwpins32 ((unsigned int) (D2), (unsigned int) (D1), (unsigned int) (F)))
(__builtin_ia32_lwpins32((unsigned int)(D2), (unsigned int)(D1), \
(unsigned int)(F)))
#ifdef __x86_64__ #ifdef __x86_64__
#define __lwpins64(D2, D1, F) \ #define __lwpins64(D2, D1, F) (__builtin_ia32_lwpins64 ((unsigned long long) (D2), (unsigned int) (D1), (unsigned int) (F)))
(__builtin_ia32_lwpins64((unsigned long long)(D2), (unsigned int)(D1), \
(unsigned int)(F)))
#endif #endif
#endif #endif
#ifdef __DISABLE_LWP__ #ifdef __DISABLE_LWP__
#undef __DISABLE_LWP__ #undef __DISABLE_LWP__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_LWP__ */ #endif
#endif
#endif /* _LWPINTRIN_H_INCLUDED */ #endif

View file

@ -1,41 +1,45 @@
#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED /* clang-format off */
#error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead." #if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _X86GPRINTRIN_H_INCLUDED
# error "Never use <lzcntintrin.h> directly; include <x86gprintrin.h> instead."
#endif #endif
#ifndef _LZCNTINTRIN_H_INCLUDED #ifndef _LZCNTINTRIN_H_INCLUDED
#define _LZCNTINTRIN_H_INCLUDED #define _LZCNTINTRIN_H_INCLUDED
#ifndef __LZCNT__ #ifndef __LZCNT__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("lzcnt") #pragma GCC target("lzcnt")
#define __DISABLE_LZCNT__ #define __DISABLE_LZCNT__
#endif /* __LZCNT__ */ #endif
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline unsigned short __lzcnt16(unsigned short __X) { __lzcnt16 (unsigned short __X)
{
return __builtin_ia32_lzcnt_u16 (__X); return __builtin_ia32_lzcnt_u16 (__X);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline unsigned int __lzcnt32(unsigned int __X) { __lzcnt32 (unsigned int __X)
{
return __builtin_ia32_lzcnt_u32 (__X); return __builtin_ia32_lzcnt_u32 (__X);
} }
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline unsigned int _lzcnt_u32(unsigned int __X) { _lzcnt_u32 (unsigned int __X)
{
return __builtin_ia32_lzcnt_u32 (__X); return __builtin_ia32_lzcnt_u32 (__X);
} }
#ifdef __x86_64__ #ifdef __x86_64__
__funline unsigned long long __lzcnt64(unsigned long long __X) { extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lzcnt64 (unsigned long long __X)
{
return __builtin_ia32_lzcnt_u64 (__X); return __builtin_ia32_lzcnt_u64 (__X);
} }
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline unsigned long long _lzcnt_u64(unsigned long long __X) { _lzcnt_u64 (unsigned long long __X)
{
return __builtin_ia32_lzcnt_u64 (__X); return __builtin_ia32_lzcnt_u64 (__X);
} }
#endif #endif
#ifdef __DISABLE_LZCNT__ #ifdef __DISABLE_LZCNT__
#undef __DISABLE_LZCNT__ #undef __DISABLE_LZCNT__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_LZCNT__ */ #endif
#endif
#endif /* _LZCNTINTRIN_H_INCLUDED */ #endif

View file

@ -1,9 +1,9 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _MM3DNOW_H_INCLUDED #ifndef _MM3DNOW_H_INCLUDED
#define _MM3DNOW_H_INCLUDED #define _MM3DNOW_H_INCLUDED
#ifdef __x86_64__
#include "third_party/intel/mmintrin.internal.h" #include "third_party/intel/mmintrin.internal.h"
#include "third_party/intel/prfchwintrin.internal.h" #include "third_party/intel/prfchwintrin.internal.h"
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW__ #if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW__
#pragma GCC push_options #pragma GCC push_options
#ifdef __x86_64__ #ifdef __x86_64__
@ -12,110 +12,128 @@
#pragma GCC target("3dnow") #pragma GCC target("3dnow")
#endif #endif
#define __DISABLE_3dNOW__ #define __DISABLE_3dNOW__
#endif /* __3dNOW__ */ #endif
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void _m_femms(void) { _m_femms (void)
{
__builtin_ia32_femms(); __builtin_ia32_femms();
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pavgusb(__m64 __A, __m64 __B) { _m_pavgusb (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B); return (__m64)__builtin_ia32_pavgusb ((__v8qi)__A, (__v8qi)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pf2id(__m64 __A) { _m_pf2id (__m64 __A)
{
return (__m64)__builtin_ia32_pf2id ((__v2sf)__A); return (__m64)__builtin_ia32_pf2id ((__v2sf)__A);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfacc(__m64 __A, __m64 __B) { _m_pfacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfacc ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfadd(__m64 __A, __m64 __B) { _m_pfadd (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfadd ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfcmpeq(__m64 __A, __m64 __B) { _m_pfcmpeq (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfcmpeq ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfcmpge(__m64 __A, __m64 __B) { _m_pfcmpge (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfcmpge ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfcmpgt(__m64 __A, __m64 __B) { _m_pfcmpgt (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfcmpgt ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfmax(__m64 __A, __m64 __B) { _m_pfmax (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfmax ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfmin(__m64 __A, __m64 __B) { _m_pfmin (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfmin ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfmul(__m64 __A, __m64 __B) { _m_pfmul (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfmul ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfrcp(__m64 __A) { _m_pfrcp (__m64 __A)
{
return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A); return (__m64)__builtin_ia32_pfrcp ((__v2sf)__A);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfrcpit1(__m64 __A, __m64 __B) { _m_pfrcpit1 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfrcpit1 ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfrcpit2(__m64 __A, __m64 __B) { _m_pfrcpit2 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfrcpit2 ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfrsqrt(__m64 __A) { _m_pfrsqrt (__m64 __A)
{
return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A); return (__m64)__builtin_ia32_pfrsqrt ((__v2sf)__A);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfrsqit1(__m64 __A, __m64 __B) { _m_pfrsqit1 (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfrsqit1 ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfsub(__m64 __A, __m64 __B) { _m_pfsub (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfsub ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfsubr(__m64 __A, __m64 __B) { _m_pfsubr (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfsubr ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pi2fd(__m64 __A) { _m_pi2fd (__m64 __A)
{
return (__m64)__builtin_ia32_pi2fd ((__v2si)__A); return (__m64)__builtin_ia32_pi2fd ((__v2si)__A);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pmulhrw(__m64 __A, __m64 __B) { _m_pmulhrw (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B); return (__m64)__builtin_ia32_pmulhrw ((__v4hi)__A, (__v4hi)__B);
} }
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void _m_prefetch(void *__P) { _m_prefetch (void *__P)
__builtin_prefetch(__P, 0, 3 /* _MM_HINT_T0 */); {
__builtin_prefetch (__P, 0, 3 );
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_from_float(float __A) { _m_from_float (float __A)
{
return __extension__ (__m64)(__v2sf){ __A, 0.0f }; return __extension__ (__m64)(__v2sf){ __A, 0.0f };
} }
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline float _m_to_float(__m64 __A) { _m_to_float (__m64 __A)
union { {
__v2sf v; union { __v2sf v; float a[2]; } __tmp;
float a[2];
} __tmp;
__tmp.v = (__v2sf)__A; __tmp.v = (__v2sf)__A;
return __tmp.a[0]; return __tmp.a[0];
} }
#ifdef __DISABLE_3dNOW__ #ifdef __DISABLE_3dNOW__
#undef __DISABLE_3dNOW__ #undef __DISABLE_3dNOW__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_3dNOW__ */ #endif
#if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW_A__ #if defined __x86_64__ && !defined __SSE__ || !defined __3dNOW_A__
#pragma GCC push_options #pragma GCC push_options
#ifdef __x86_64__ #ifdef __x86_64__
@ -124,32 +142,35 @@ __funline float _m_to_float(__m64 __A) {
#pragma GCC target("3dnowa") #pragma GCC target("3dnowa")
#endif #endif
#define __DISABLE_3dNOW_A__ #define __DISABLE_3dNOW_A__
#endif /* __3dNOW_A__ */ #endif
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pf2iw(__m64 __A) { _m_pf2iw (__m64 __A)
{
return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A); return (__m64)__builtin_ia32_pf2iw ((__v2sf)__A);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfnacc(__m64 __A, __m64 __B) { _m_pfnacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfnacc ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pfpnacc(__m64 __A, __m64 __B) { _m_pfpnacc (__m64 __A, __m64 __B)
{
return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B); return (__m64)__builtin_ia32_pfpnacc ((__v2sf)__A, (__v2sf)__B);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pi2fw(__m64 __A) { _m_pi2fw (__m64 __A)
{
return (__m64)__builtin_ia32_pi2fw ((__v2si)__A); return (__m64)__builtin_ia32_pi2fw ((__v2si)__A);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pswapd(__m64 __A) { _m_pswapd (__m64 __A)
{
return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A); return (__m64)__builtin_ia32_pswapdsf ((__v2sf)__A);
} }
#ifdef __DISABLE_3dNOW_A__ #ifdef __DISABLE_3dNOW_A__
#undef __DISABLE_3dNOW_A__ #undef __DISABLE_3dNOW_A__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_3dNOW_A__ */ #endif
#endif
#endif /* __x86_64__ */ #endif
#endif /* _MM3DNOW_H_INCLUDED */

View file

@ -1,15 +1,14 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _MM_MALLOC_H_INCLUDED #ifndef _MM_MALLOC_H_INCLUDED
#define _MM_MALLOC_H_INCLUDED #define _MM_MALLOC_H_INCLUDED
#ifdef __x86_64__
#include "libc/mem/mem.h" #include "libc/mem/mem.h"
#ifndef __cplusplus #ifndef __cplusplus
extern int _mm_posix_memalign(void **, size_t, size_t) extern int _mm_posix_memalign(void **, size_t, size_t)
#else #else
extern "C" int _mm_posix_memalign(void **, size_t, size_t) throw() extern "C" int _mm_posix_memalign(void **, size_t, size_t) throw()
#endif #endif
__asm__("posix_memalign"); __asm__("posix_memalign");
static __inline void *_mm_malloc(size_t __size, size_t __alignment) { static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
void *__ptr; void *__ptr;
if (__alignment == 1) return malloc(__size); if (__alignment == 1) return malloc(__size);
@ -20,10 +19,8 @@ static __inline void *_mm_malloc(size_t __size, size_t __alignment) {
else else
return NULL; return NULL;
} }
static __inline void _mm_free(void *__ptr) { static __inline void _mm_free(void *__ptr) {
free(__ptr); free(__ptr);
} }
#endif
#endif /* __x86_64__ */ #endif
#endif /* _MM_MALLOC_H_INCLUDED */

View file

@ -1,576 +1,710 @@
/* clang-format off */
#if defined(__x86_64__) && !(__ASSEMBLER__ + __LINKER__ + 0)
#ifndef _MMINTRIN_H_INCLUDED #ifndef _MMINTRIN_H_INCLUDED
#define _MMINTRIN_H_INCLUDED #define _MMINTRIN_H_INCLUDED
#ifdef __x86_64__
#if defined __x86_64__ && !defined __SSE__ || !defined __MMX__ #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
#pragma GCC push_options #pragma GCC push_options
#ifdef __x86_64__ #ifdef __MMX_WITH_SSE__
#pragma GCC target("sse2")
#elif defined __x86_64__
#pragma GCC target("sse,mmx") #pragma GCC target("sse,mmx")
#else #else
#pragma GCC target("mmx") #pragma GCC target("mmx")
#endif #endif
#define __DISABLE_MMX__ #define __DISABLE_MMX__
#endif /* __MMX__ */ #endif
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
typedef int __m64_u typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
__attribute__((__vector_size__(8), __may_alias__, __aligned__(1))); typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
typedef int __m32_u __attribute__ ((__vector_size__ (4), __may_alias__, __aligned__ (1)));
typedef short __m16_u __attribute__ ((__vector_size__ (2), __may_alias__, __aligned__ (1)));
typedef int __v2si __attribute__ ((__vector_size__ (8))); typedef int __v2si __attribute__ ((__vector_size__ (8)));
typedef short __v4hi __attribute__ ((__vector_size__ (8))); typedef short __v4hi __attribute__ ((__vector_size__ (8)));
typedef char __v8qi __attribute__ ((__vector_size__ (8))); typedef char __v8qi __attribute__ ((__vector_size__ (8)));
typedef long long __v1di __attribute__ ((__vector_size__ (8))); typedef long long __v1di __attribute__ ((__vector_size__ (8)));
typedef float __v2sf __attribute__ ((__vector_size__ (8))); typedef float __v2sf __attribute__ ((__vector_size__ (8)));
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void _mm_empty(void) { _mm_empty (void)
{
__builtin_ia32_emms (); __builtin_ia32_emms ();
} }
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline void _m_empty(void) { _m_empty (void)
{
_mm_empty (); _mm_empty ();
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cvtsi32_si64(int __i) { _mm_cvtsi32_si64 (int __i)
{
return (__m64) __builtin_ia32_vec_init_v2si (__i, 0); return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_from_int(int __i) { _m_from_int (int __i)
{
return _mm_cvtsi32_si64 (__i); return _mm_cvtsi32_si64 (__i);
} }
#ifdef __x86_64__ #ifdef __x86_64__
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_from_int64(long long __i) { _m_from_int64 (long long __i)
{
return (__m64) __i; return (__m64) __i;
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cvtsi64_m64(long long __i) { _mm_cvtsi64_m64 (long long __i)
{
return (__m64) __i; return (__m64) __i;
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cvtsi64x_si64(long long __i) { _mm_cvtsi64x_si64 (long long __i)
{
return (__m64) __i; return (__m64) __i;
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_set_pi64x(long long __i) { _mm_set_pi64x (long long __i)
{
return (__m64) __i; return (__m64) __i;
} }
#endif #endif
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline int _mm_cvtsi64_si32(__m64 __i) { _mm_cvtsi64_si32 (__m64 __i)
{
return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0); return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
} }
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline int _m_to_int(__m64 __i) { _m_to_int (__m64 __i)
{
return _mm_cvtsi64_si32 (__i); return _mm_cvtsi64_si32 (__i);
} }
#ifdef __x86_64__ #ifdef __x86_64__
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline long long _m_to_int64(__m64 __i) { _m_to_int64 (__m64 __i)
{
return (long long)__i; return (long long)__i;
} }
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline long long _mm_cvtm64_si64(__m64 __i) { _mm_cvtm64_si64 (__m64 __i)
{
return (long long)__i; return (long long)__i;
} }
extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline long long _mm_cvtsi64_si64x(__m64 __i) { _mm_cvtsi64_si64x (__m64 __i)
{
return (long long)__i; return (long long)__i;
} }
#endif #endif
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_packs_pi16(__m64 __m1, __m64 __m2) { _mm_packs_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_packsswb(__m64 __m1, __m64 __m2) { _m_packsswb (__m64 __m1, __m64 __m2)
{
return _mm_packs_pi16 (__m1, __m2); return _mm_packs_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_packs_pi32(__m64 __m1, __m64 __m2) { _mm_packs_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2); return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_packssdw(__m64 __m1, __m64 __m2) { _m_packssdw (__m64 __m1, __m64 __m2)
{
return _mm_packs_pi32 (__m1, __m2); return _mm_packs_pi32 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_packs_pu16(__m64 __m1, __m64 __m2) { _mm_packs_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_packuswb(__m64 __m1, __m64 __m2) { _m_packuswb (__m64 __m1, __m64 __m2)
{
return _mm_packs_pu16 (__m1, __m2); return _mm_packs_pu16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_punpckhbw(__m64 __m1, __m64 __m2) { _m_punpckhbw (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi8 (__m1, __m2); return _mm_unpackhi_pi8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_punpckhwd(__m64 __m1, __m64 __m2) { _m_punpckhwd (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi16 (__m1, __m2); return _mm_unpackhi_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2); return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_punpckhdq(__m64 __m1, __m64 __m2) { _m_punpckhdq (__m64 __m1, __m64 __m2)
{
return _mm_unpackhi_pi32 (__m1, __m2); return _mm_unpackhi_pi32 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_punpcklbw(__m64 __m1, __m64 __m2) { _m_punpcklbw (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi8 (__m1, __m2); return _mm_unpacklo_pi8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_punpcklwd(__m64 __m1, __m64 __m2) { _m_punpcklwd (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi16 (__m1, __m2); return _mm_unpacklo_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2); return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_punpckldq(__m64 __m1, __m64 __m2) { _m_punpckldq (__m64 __m1, __m64 __m2)
{
return _mm_unpacklo_pi32 (__m1, __m2); return _mm_unpacklo_pi32 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_add_pi8(__m64 __m1, __m64 __m2) { _mm_add_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_paddb(__m64 __m1, __m64 __m2) { _m_paddb (__m64 __m1, __m64 __m2)
{
return _mm_add_pi8 (__m1, __m2); return _mm_add_pi8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_add_pi16(__m64 __m1, __m64 __m2) { _mm_add_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_paddw(__m64 __m1, __m64 __m2) { _m_paddw (__m64 __m1, __m64 __m2)
{
return _mm_add_pi16 (__m1, __m2); return _mm_add_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_add_pi32(__m64 __m1, __m64 __m2) { _mm_add_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2); return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_paddd(__m64 __m1, __m64 __m2) { _m_paddd (__m64 __m1, __m64 __m2)
{
return _mm_add_pi32 (__m1, __m2); return _mm_add_pi32 (__m1, __m2);
} }
#ifndef __SSE2__ #ifndef __SSE2__
#pragma GCC push_options #pragma GCC push_options
#ifdef __MMX_WITH_SSE__
#pragma GCC target("sse2")
#else
#pragma GCC target("sse2,mmx") #pragma GCC target("sse2,mmx")
#endif
#define __DISABLE_SSE2__ #define __DISABLE_SSE2__
#endif /* __SSE2__ */ #endif
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_add_si64(__m64 __m1, __m64 __m2) { _mm_add_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2); return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
} }
#ifdef __DISABLE_SSE2__ #ifdef __DISABLE_SSE2__
#undef __DISABLE_SSE2__ #undef __DISABLE_SSE2__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_SSE2__ */ #endif
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_adds_pi8(__m64 __m1, __m64 __m2) { _mm_adds_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_paddsb(__m64 __m1, __m64 __m2) { _m_paddsb (__m64 __m1, __m64 __m2)
{
return _mm_adds_pi8 (__m1, __m2); return _mm_adds_pi8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_adds_pi16(__m64 __m1, __m64 __m2) { _mm_adds_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_paddsw(__m64 __m1, __m64 __m2) { _m_paddsw (__m64 __m1, __m64 __m2)
{
return _mm_adds_pi16 (__m1, __m2); return _mm_adds_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_adds_pu8(__m64 __m1, __m64 __m2) { _mm_adds_pu8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_paddusb(__m64 __m1, __m64 __m2) { _m_paddusb (__m64 __m1, __m64 __m2)
{
return _mm_adds_pu8 (__m1, __m2); return _mm_adds_pu8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_adds_pu16(__m64 __m1, __m64 __m2) { _mm_adds_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_paddusw(__m64 __m1, __m64 __m2) { _m_paddusw (__m64 __m1, __m64 __m2)
{
return _mm_adds_pu16 (__m1, __m2); return _mm_adds_pu16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sub_pi8(__m64 __m1, __m64 __m2) { _mm_sub_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psubb(__m64 __m1, __m64 __m2) { _m_psubb (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi8 (__m1, __m2); return _mm_sub_pi8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sub_pi16(__m64 __m1, __m64 __m2) { _mm_sub_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psubw(__m64 __m1, __m64 __m2) { _m_psubw (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi16 (__m1, __m2); return _mm_sub_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sub_pi32(__m64 __m1, __m64 __m2) { _mm_sub_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2); return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psubd(__m64 __m1, __m64 __m2) { _m_psubd (__m64 __m1, __m64 __m2)
{
return _mm_sub_pi32 (__m1, __m2); return _mm_sub_pi32 (__m1, __m2);
} }
#ifndef __SSE2__ #ifndef __SSE2__
#pragma GCC push_options #pragma GCC push_options
#ifdef __MMX_WITH_SSE__
#pragma GCC target("sse2")
#else
#pragma GCC target("sse2,mmx") #pragma GCC target("sse2,mmx")
#endif
#define __DISABLE_SSE2__ #define __DISABLE_SSE2__
#endif /* __SSE2__ */ #endif
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sub_si64(__m64 __m1, __m64 __m2) { _mm_sub_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2); return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
} }
#ifdef __DISABLE_SSE2__ #ifdef __DISABLE_SSE2__
#undef __DISABLE_SSE2__ #undef __DISABLE_SSE2__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_SSE2__ */ #endif
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_subs_pi8(__m64 __m1, __m64 __m2) { _mm_subs_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psubsb(__m64 __m1, __m64 __m2) { _m_psubsb (__m64 __m1, __m64 __m2)
{
return _mm_subs_pi8 (__m1, __m2); return _mm_subs_pi8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_subs_pi16(__m64 __m1, __m64 __m2) { _mm_subs_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psubsw(__m64 __m1, __m64 __m2) { _m_psubsw (__m64 __m1, __m64 __m2)
{
return _mm_subs_pi16 (__m1, __m2); return _mm_subs_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_subs_pu8(__m64 __m1, __m64 __m2) { _mm_subs_pu8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psubusb(__m64 __m1, __m64 __m2) { _m_psubusb (__m64 __m1, __m64 __m2)
{
return _mm_subs_pu8 (__m1, __m2); return _mm_subs_pu8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_subs_pu16(__m64 __m1, __m64 __m2) { _mm_subs_pu16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psubusw(__m64 __m1, __m64 __m2) { _m_psubusw (__m64 __m1, __m64 __m2)
{
return _mm_subs_pu16 (__m1, __m2); return _mm_subs_pu16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_madd_pi16(__m64 __m1, __m64 __m2) { _mm_madd_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pmaddwd(__m64 __m1, __m64 __m2) { _m_pmaddwd (__m64 __m1, __m64 __m2)
{
return _mm_madd_pi16 (__m1, __m2); return _mm_madd_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pmulhw(__m64 __m1, __m64 __m2) { _m_pmulhw (__m64 __m1, __m64 __m2)
{
return _mm_mulhi_pi16 (__m1, __m2); return _mm_mulhi_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_mullo_pi16(__m64 __m1, __m64 __m2) { _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pmullw(__m64 __m1, __m64 __m2) { _m_pmullw (__m64 __m1, __m64 __m2)
{
return _mm_mullo_pi16 (__m1, __m2); return _mm_mullo_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sll_pi16(__m64 __m, __m64 __count) { _mm_sll_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count); return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psllw(__m64 __m, __m64 __count) { _m_psllw (__m64 __m, __m64 __count)
{
return _mm_sll_pi16 (__m, __count); return _mm_sll_pi16 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_slli_pi16(__m64 __m, int __count) { _mm_slli_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count); return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psllwi(__m64 __m, int __count) { _m_psllwi (__m64 __m, int __count)
{
return _mm_slli_pi16 (__m, __count); return _mm_slli_pi16 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sll_pi32(__m64 __m, __m64 __count) { _mm_sll_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count); return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pslld(__m64 __m, __m64 __count) { _m_pslld (__m64 __m, __m64 __count)
{
return _mm_sll_pi32 (__m, __count); return _mm_sll_pi32 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_slli_pi32(__m64 __m, int __count) { _mm_slli_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count); return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pslldi(__m64 __m, int __count) { _m_pslldi (__m64 __m, int __count)
{
return _mm_slli_pi32 (__m, __count); return _mm_slli_pi32 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sll_si64(__m64 __m, __m64 __count) { _mm_sll_si64 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count); return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psllq(__m64 __m, __m64 __count) { _m_psllq (__m64 __m, __m64 __count)
{
return _mm_sll_si64 (__m, __count); return _mm_sll_si64 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_slli_si64(__m64 __m, int __count) { _mm_slli_si64 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count); return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psllqi(__m64 __m, int __count) { _m_psllqi (__m64 __m, int __count)
{
return _mm_slli_si64 (__m, __count); return _mm_slli_si64 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sra_pi16(__m64 __m, __m64 __count) { _mm_sra_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count); return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psraw(__m64 __m, __m64 __count) { _m_psraw (__m64 __m, __m64 __count)
{
return _mm_sra_pi16 (__m, __count); return _mm_sra_pi16 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_srai_pi16(__m64 __m, int __count) { _mm_srai_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count); return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psrawi(__m64 __m, int __count) { _m_psrawi (__m64 __m, int __count)
{
return _mm_srai_pi16 (__m, __count); return _mm_srai_pi16 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_sra_pi32(__m64 __m, __m64 __count) { _mm_sra_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count); return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psrad(__m64 __m, __m64 __count) { _m_psrad (__m64 __m, __m64 __count)
{
return _mm_sra_pi32 (__m, __count); return _mm_sra_pi32 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_srai_pi32(__m64 __m, int __count) { _mm_srai_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count); return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psradi(__m64 __m, int __count) { _m_psradi (__m64 __m, int __count)
{
return _mm_srai_pi32 (__m, __count); return _mm_srai_pi32 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_srl_pi16(__m64 __m, __m64 __count) { _mm_srl_pi16 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count); return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psrlw(__m64 __m, __m64 __count) { _m_psrlw (__m64 __m, __m64 __count)
{
return _mm_srl_pi16 (__m, __count); return _mm_srl_pi16 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_srli_pi16(__m64 __m, int __count) { _mm_srli_pi16 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count); return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psrlwi(__m64 __m, int __count) { _m_psrlwi (__m64 __m, int __count)
{
return _mm_srli_pi16 (__m, __count); return _mm_srli_pi16 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_srl_pi32(__m64 __m, __m64 __count) { _mm_srl_pi32 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count); return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psrld(__m64 __m, __m64 __count) { _m_psrld (__m64 __m, __m64 __count)
{
return _mm_srl_pi32 (__m, __count); return _mm_srl_pi32 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_srli_pi32(__m64 __m, int __count) { _mm_srli_pi32 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count); return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psrldi(__m64 __m, int __count) { _m_psrldi (__m64 __m, int __count)
{
return _mm_srli_pi32 (__m, __count); return _mm_srli_pi32 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_srl_si64(__m64 __m, __m64 __count) { _mm_srl_si64 (__m64 __m, __m64 __count)
{
return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count); return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psrlq(__m64 __m, __m64 __count) { _m_psrlq (__m64 __m, __m64 __count)
{
return _mm_srl_si64 (__m, __count); return _mm_srl_si64 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_srli_si64(__m64 __m, int __count) { _mm_srli_si64 (__m64 __m, int __count)
{
return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count); return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_psrlqi(__m64 __m, int __count) { _m_psrlqi (__m64 __m, int __count)
{
return _mm_srli_si64 (__m, __count); return _mm_srli_si64 (__m, __count);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_and_si64(__m64 __m1, __m64 __m2) { _mm_and_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pand (__m1, __m2); return __builtin_ia32_pand (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pand(__m64 __m1, __m64 __m2) { _m_pand (__m64 __m1, __m64 __m2)
{
return _mm_and_si64 (__m1, __m2); return _mm_and_si64 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_andnot_si64(__m64 __m1, __m64 __m2) { _mm_andnot_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pandn (__m1, __m2); return __builtin_ia32_pandn (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pandn(__m64 __m1, __m64 __m2) { _m_pandn (__m64 __m1, __m64 __m2)
{
return _mm_andnot_si64 (__m1, __m2); return _mm_andnot_si64 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_or_si64(__m64 __m1, __m64 __m2) { _mm_or_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_por (__m1, __m2); return __builtin_ia32_por (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_por(__m64 __m1, __m64 __m2) { _m_por (__m64 __m1, __m64 __m2)
{
return _mm_or_si64 (__m1, __m2); return _mm_or_si64 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_xor_si64(__m64 __m1, __m64 __m2) { _mm_xor_si64 (__m64 __m1, __m64 __m2)
{
return __builtin_ia32_pxor (__m1, __m2); return __builtin_ia32_pxor (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pxor(__m64 __m1, __m64 __m2) { _m_pxor (__m64 __m1, __m64 __m2)
{
return _mm_xor_si64 (__m1, __m2); return _mm_xor_si64 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pcmpeqb(__m64 __m1, __m64 __m2) { _m_pcmpeqb (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi8 (__m1, __m2); return _mm_cmpeq_pi8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2); return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pcmpgtb(__m64 __m1, __m64 __m2) { _m_pcmpgtb (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi8 (__m1, __m2); return _mm_cmpgt_pi8 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pcmpeqw(__m64 __m1, __m64 __m2) { _m_pcmpeqw (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi16 (__m1, __m2); return _mm_cmpeq_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2); return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pcmpgtw(__m64 __m1, __m64 __m2) { _m_pcmpgtw (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi16 (__m1, __m2); return _mm_cmpgt_pi16 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2); return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pcmpeqd(__m64 __m1, __m64 __m2) { _m_pcmpeqd (__m64 __m1, __m64 __m2)
{
return _mm_cmpeq_pi32 (__m1, __m2); return _mm_cmpeq_pi32 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2); return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _m_pcmpgtd(__m64 __m1, __m64 __m2) { _m_pcmpgtd (__m64 __m1, __m64 __m2)
{
return _mm_cmpgt_pi32 (__m1, __m2); return _mm_cmpgt_pi32 (__m1, __m2);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_setzero_si64(void) { _mm_setzero_si64 (void)
{
return (__m64)0LL; return (__m64)0LL;
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_set_pi32(int __i1, int __i0) { _mm_set_pi32 (int __i1, int __i0)
{
return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1); return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_set_pi16(short __w3, short __w2, short __w1, short __w0) { _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
{
return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3); return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
char __b2, char __b1, char __b0) { char __b3, char __b2, char __b1, char __b0)
return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, __b4, __b5, {
__b6, __b7); return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
__b4, __b5, __b6, __b7);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_setr_pi32(int __i0, int __i1) { _mm_setr_pi32 (int __i0, int __i1)
{
return _mm_set_pi32 (__i1, __i0); return _mm_set_pi32 (__i1, __i0);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
{
return _mm_set_pi16 (__w3, __w2, __w1, __w0); return _mm_set_pi16 (__w3, __w2, __w1, __w0);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
char __b4, char __b5, char __b6, char __b7) { char __b4, char __b5, char __b6, char __b7)
{
return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_set1_pi32(int __i) { _mm_set1_pi32 (int __i)
{
return _mm_set_pi32 (__i, __i); return _mm_set_pi32 (__i, __i);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_set1_pi16(short __w) { _mm_set1_pi16 (short __w)
{
return _mm_set_pi16 (__w, __w, __w, __w); return _mm_set_pi16 (__w, __w, __w, __w);
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__funline __m64 _mm_set1_pi8(char __b) { _mm_set1_pi8 (char __b)
{
return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b); return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
} }
#ifdef __DISABLE_MMX__ #ifdef __DISABLE_MMX__
#undef __DISABLE_MMX__ #undef __DISABLE_MMX__
#pragma GCC pop_options #pragma GCC pop_options
#endif /* __DISABLE_MMX__ */ #endif
#endif
#endif /* __x86_64__ */ #endif
#endif /* _MMINTRIN_H_INCLUDED */

Some files were not shown because too many files have changed in this diff Show more