Merge branch 'master' into gg/flash-attn

This commit is contained in:
Georgi Gerganov 2024-04-17 10:13:09 +03:00
commit 2c41180e88
No known key found for this signature in database
GPG key ID: BF970631944C16B7
110 changed files with 11660 additions and 6357 deletions

17
ggml.h
View file

@ -332,8 +332,8 @@ extern "C" {
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n);
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n);
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
struct ggml_object;
struct ggml_context;
@ -2230,9 +2230,9 @@ extern "C" {
enum ggml_type type,
const float * src,
void * dst,
int start,
int nrows,
int n_per_row,
int64_t start,
int64_t nrows,
int64_t n_per_row,
const float * imatrix);
//
@ -2309,6 +2309,9 @@ extern "C" {
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
// removes key if it exists
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
// overrides existing values or adds a new one
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
@ -2397,8 +2400,8 @@ extern "C" {
#else
#define GGML_RESTRICT restrict
#endif
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
const void * GGML_RESTRICT y, size_t by, int nrc);