Merge commit '81bc9214a3' into nomic-vulkan

This commit is contained in:
Jared Van Bortel 2023-12-13 17:25:15 -05:00
commit 747e1eafcf
61 changed files with 3432 additions and 591 deletions

22
llama.h
View file

@ -158,6 +158,22 @@ extern "C" {
llama_seq_id all_seq_id; // used if seq_id == NULL
} llama_batch;
enum llama_model_kv_override_type {
LLAMA_KV_OVERRIDE_INT,
LLAMA_KV_OVERRIDE_FLOAT,
LLAMA_KV_OVERRIDE_BOOL,
};
struct llama_model_kv_override {
char key[128];
enum llama_model_kv_override_type tag;
union {
int64_t int_value;
double float_value;
bool bool_value;
};
};
struct llama_model_params {
int32_t n_gpu_layers; // number of layers to store in VRAM
int32_t main_gpu; // the GPU that is used for scratch and small tensors
@ -165,9 +181,13 @@ extern "C" {
// called with a progress value between 0 and 1, pass NULL to disable
llama_progress_callback progress_callback;
// context pointer passed to the progress callback
void * progress_callback_user_data;
// override key-value pairs of the model meta data
const struct llama_model_kv_override * kv_overrides;
// Keep the booleans together to avoid misalignment during copy-by-value.
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
@ -185,7 +205,7 @@ extern "C" {
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
float rope_freq_base; // RoPE base frequency, 0 = from model
float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model
float yarn_ext_factor; // YaRN extrapolation mix factor, NaN = from model
float yarn_ext_factor; // YaRN extrapolation mix factor, negative = from model
float yarn_attn_factor; // YaRN magnitude scaling factor
float yarn_beta_fast; // YaRN low correction dim
float yarn_beta_slow; // YaRN high correction dim