llama : disable FA for AMD
This commit is contained in:
parent
8937ec5307
commit
ce281b904c
3 changed files with 12 additions and 2 deletions
|
@ -399,8 +399,8 @@ static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
|
||||||
|
|
||||||
#define FP16_AVAILABLE defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) ? \
|
#define FP16_AVAILABLE defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) ? \
|
||||||
defined(RDNA1) || defined(RDNA2) || defined(RDNA3) : __CUDA_ARCH__ >= CC_PASCAL
|
defined(RDNA1) || defined(RDNA2) || defined(RDNA3) : __CUDA_ARCH__ >= CC_PASCAL
|
||||||
#define FP16_MMA_AVAILABLE defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) ? \
|
|
||||||
defined(RDNA3) : __CUDA_ARCH__ >= CC_VOLTA
|
#define FP16_MMA_AVAILABLE !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_VOLTA
|
||||||
|
|
||||||
// TODO: move to ggml-common.h
|
// TODO: move to ggml-common.h
|
||||||
static const __device__ int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
static const __device__ int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
||||||
|
|
|
@ -2,7 +2,10 @@
|
||||||
#include "fattn.cuh"
|
#include "fattn.cuh"
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
|
#if FP16_MMA_AVAILABLE
|
||||||
#include <mma.h>
|
#include <mma.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define FATTN_KQ_STRIDE 256
|
#define FATTN_KQ_STRIDE 256
|
||||||
#define HALF_MAX_HALF __float2half(65504.0f/2) // Use neg. of this instead of -INFINITY to initialize KQ max vals to avoid NaN upon subtraction.
|
#define HALF_MAX_HALF __float2half(65504.0f/2) // Use neg. of this instead of -INFINITY to initialize KQ max vals to avoid NaN upon subtraction.
|
||||||
|
|
|
@ -15357,6 +15357,13 @@ struct llama_context * llama_new_context_with_model(
|
||||||
cparams.flash_attn = false;
|
cparams.flash_attn = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef GGML_USE_HIPBLAS
|
||||||
|
if (cparams.flash_attn) {
|
||||||
|
LLAMA_LOG_WARN("%s: flash_attn is not yet compatible with HIPBLAS builds - forcing off\n", __func__);
|
||||||
|
cparams.flash_attn = false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (params.seed == LLAMA_DEFAULT_SEED) {
|
if (params.seed == LLAMA_DEFAULT_SEED) {
|
||||||
params.seed = time(NULL);
|
params.seed = time(NULL);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue