From e07000b525d7193240ace18400a326504644e305 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 6 Feb 2025 08:55:11 +0100 Subject: [PATCH] squash! common : add default embeddings presets [no ci] Default to Q8_0 quantization. --- common/arg.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 86226b2ec..2b3581e71 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2329,7 +2329,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"), [](common_params & params) { params.hf_repo = "CompendiumLabs/bge-small-en-v1.5-gguf"; - params.hf_file = "bge-small-en-v1.5-q4_k_m.gguf"; + params.hf_file = "bge-small-en-v1.5-q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; params.n_ctx = 512; @@ -2343,7 +2343,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("use default e5-small-v2 model (note: can download weights from the internet)"), [](common_params & params) { params.hf_repo = "ChristianAzinn/e5-small-v2-gguf"; - params.hf_file = "e5-small-v2.Q4_K_M.gguf"; + params.hf_file = "e5-small-v2.Q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; params.n_ctx = 512; @@ -2357,7 +2357,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex string_format("use default gte-small model (note: can download weights from the internet)"), [](common_params & params) { params.hf_repo = "ChristianAzinn/gte-small-gguf"; - params.hf_file = "gte-small.Q4_K_M.gguf"; + params.hf_file = "gte-small.Q8_0.gguf"; params.pooling_type = LLAMA_POOLING_TYPE_NONE; params.embd_normalize = 2; params.n_ctx = 512;