llama : allow overriding GGUF metadata when loading model (#4092)
* feat: Allow overriding GGUF metadata when loading model * Fix the one time GCC is stricter than clang about something * Step1 * Refactor... basically everything! * Nuke obsolete GetArrayLen struct * simplify std::string specialization * Various cleanups Add informational output when overrides are applied Warn user when an override with the wrong type is specified * Fix broken logic for parsing bool KV overrides Fix issue where overrides didn't apply when key missing in GGUF metadata Resolve merge changes * llama : rearrange model params * Update new GET_KEY call Add note that metadata KV overrides aren't reflected in initial metadata KV info dump --------- Co-authored-by: cebtenzzre <cebtenzzre@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
52c8bc3cf3
commit
5aa365d88f
4 changed files with 361 additions and 86 deletions
20
llama.h
20
llama.h
|
@ -158,6 +158,22 @@ extern "C" {
|
|||
llama_seq_id all_seq_id; // used if seq_id == NULL
|
||||
} llama_batch;
|
||||
|
||||
enum llama_model_kv_override_type {
|
||||
LLAMA_KV_OVERRIDE_INT,
|
||||
LLAMA_KV_OVERRIDE_FLOAT,
|
||||
LLAMA_KV_OVERRIDE_BOOL,
|
||||
};
|
||||
|
||||
struct llama_model_kv_override {
|
||||
char key[128];
|
||||
enum llama_model_kv_override_type tag;
|
||||
union {
|
||||
int64_t int_value;
|
||||
double float_value;
|
||||
bool bool_value;
|
||||
};
|
||||
};
|
||||
|
||||
struct llama_model_params {
|
||||
int32_t n_gpu_layers; // number of layers to store in VRAM
|
||||
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||
|
@ -165,9 +181,13 @@ extern "C" {
|
|||
|
||||
// called with a progress value between 0 and 1, pass NULL to disable
|
||||
llama_progress_callback progress_callback;
|
||||
|
||||
// context pointer passed to the progress callback
|
||||
void * progress_callback_user_data;
|
||||
|
||||
// override key-value pairs of the model meta data
|
||||
const struct llama_model_kv_override * kv_overrides;
|
||||
|
||||
// Keep the booleans together to avoid misalignment during copy-by-value.
|
||||
bool vocab_only; // only load the vocabulary, no weights
|
||||
bool use_mmap; // use mmap if possible
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue