diff --git a/llama.h b/llama.h index 7a258a1e1..890b111ba 100644 --- a/llama.h +++ b/llama.h @@ -72,6 +72,8 @@ extern "C" { LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16 + LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_2_SOME_F16 = 6, // except 1d tensors }; LLAMA_API struct llama_context_params llama_context_default_params();