parent
b56f079e28
commit
46e3556e01
6 changed files with 87 additions and 39 deletions
1
ggml/src/ggml-cuda/vendors/cuda.h
vendored
1
ggml/src/ggml-cuda/vendors/cuda.h
vendored
|
|
@ -3,6 +3,7 @@
|
|||
#include <cuda_runtime.h>
|
||||
#include <cuda.h>
|
||||
#include <cublas_v2.h>
|
||||
#include <cuda_bf16.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#if CUDART_VERSION < 11020
|
||||
|
|
|
|||
3
ggml/src/ggml-cuda/vendors/hip.h
vendored
3
ggml/src/ggml-cuda/vendors/hip.h
vendored
|
|
@ -3,6 +3,7 @@
|
|||
#include <hip/hip_runtime.h>
|
||||
#include <hipblas/hipblas.h>
|
||||
#include <hip/hip_fp16.h>
|
||||
#include <hip/hip_bfloat16.h>
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
// for rocblas_initialize()
|
||||
#include "rocblas/rocblas.h"
|
||||
|
|
@ -121,6 +122,8 @@
|
|||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
typedef hip_bfloat16 nv_bfloat16;
|
||||
|
||||
typedef int8_t int8x4_t __attribute__((ext_vector_type(4)));
|
||||
typedef uint8_t uint8x4_t __attribute__((ext_vector_type(4)));
|
||||
static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
|
||||
|
|
|
|||
3
ggml/src/ggml-cuda/vendors/musa.h
vendored
3
ggml/src/ggml-cuda/vendors/musa.h
vendored
|
|
@ -3,6 +3,7 @@
|
|||
#include <musa_runtime.h>
|
||||
#include <musa.h>
|
||||
#include <mublas.h>
|
||||
#include <musa_bf16.h>
|
||||
#include <musa_fp16.h>
|
||||
#define CUBLAS_COMPUTE_16F CUDA_R_16F
|
||||
#define CUBLAS_COMPUTE_32F CUDA_R_32F
|
||||
|
|
@ -132,3 +133,5 @@
|
|||
#define cudaKernelNodeParams musaKernelNodeParams
|
||||
#define cudaStreamCaptureModeRelaxed musaStreamCaptureModeRelaxed
|
||||
#define cudaStreamEndCapture musaStreamEndCapture
|
||||
|
||||
typedef mt_bfloat16 nv_bfloat16;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue