ggml : always define ggml_fp16_t as uint16_t (#5666)

* ggml : always define ggml_fp16_t as uint16_t

ggml-ci

* ggml : cont

ggml-ci

* ggml : cont

* ggml : cont

ggml-ci

* ggml : cont

ggml-ci

* cuda : no longer ggml headers last

ggml-ci

* ggml : fix q6_K FP16 -> FP32 conversion

ggml-ci

* ggml : more FP16 -> FP32 conversion fixes

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-02-22 23:21:39 +02:00 committed by GitHub
parent 334f76fa38
commit 7e4f339c40
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 42 additions and 36 deletions

View file

@ -1,3 +1,7 @@
#include "ggml-cuda.h"
#include "ggml.h"
#include "ggml-backend-impl.h"
#include <algorithm>
#include <assert.h>
#include <atomic>
@ -121,11 +125,6 @@
#endif // defined(GGML_USE_HIPBLAS)
// ggml-cuda need half type so keep ggml headers include at last
#include "ggml-cuda.h"
#include "ggml.h"
#include "ggml-backend-impl.h"
#define CUDART_HMAX 11070 // CUDA 11.7, min. ver. for which __hmax and __hmax2 are known to work (may be higher than needed)
#define CC_PASCAL 600