CUDA: add BF16 support (#11093)

* CUDA: add BF16 support
This commit is contained in:
Johannes Gäßler 2025-01-06 02:33:52 +01:00 committed by GitHub
parent b56f079e28
commit 46e3556e01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 87 additions and 39 deletions

View file

@ -3,6 +3,7 @@
#include <cuda_runtime.h>
#include <cuda.h>
#include <cublas_v2.h>
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#if CUDART_VERSION < 11020

View file

@ -3,6 +3,7 @@
#include <hip/hip_runtime.h>
#include <hipblas/hipblas.h>
#include <hip/hip_fp16.h>
#include <hip/hip_bfloat16.h>
#ifdef __HIP_PLATFORM_AMD__
// for rocblas_initialize()
#include "rocblas/rocblas.h"
@ -121,6 +122,8 @@
#define __has_builtin(x) 0
#endif
typedef hip_bfloat16 nv_bfloat16;
typedef int8_t int8x4_t __attribute__((ext_vector_type(4)));
typedef uint8_t uint8x4_t __attribute__((ext_vector_type(4)));
static __device__ __forceinline__ int __vsubss4(const int a, const int b) {

View file

@ -3,6 +3,7 @@
#include <musa_runtime.h>
#include <musa.h>
#include <mublas.h>
#include <musa_bf16.h>
#include <musa_fp16.h>
#define CUBLAS_COMPUTE_16F CUDA_R_16F
#define CUBLAS_COMPUTE_32F CUDA_R_32F
@ -132,3 +133,5 @@
#define cudaKernelNodeParams musaKernelNodeParams
#define cudaStreamCaptureModeRelaxed musaStreamCaptureModeRelaxed
#define cudaStreamEndCapture musaStreamEndCapture
typedef mt_bfloat16 nv_bfloat16;