From a8d4afb97b483cfe7d4c94eea62eaa8cf8171f45 Mon Sep 17 00:00:00 2001 From: fmz Date: Fri, 28 Jun 2024 06:51:02 -0700 Subject: [PATCH] address review comments --- convert-hf-to-gguf.py | 10 ++++++++++ include/llama.h | 5 +++++ src/llama.cpp | 9 +++++++++ 3 files changed, 24 insertions(+) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 88bfcecec..4ce0b49e6 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -427,6 +427,9 @@ class Model: # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! + if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": + # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B + res = "llama-bpe" if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754": # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base res = "deepseek-llm" @@ -454,12 +457,18 @@ class Model: if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff": # ref: https://huggingface.co/smallcloudai/Refact-1_6-base res = "refact" + if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8": + # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01 + res = "command-r" if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea": # ref: https://huggingface.co/Qwen/Qwen1.5-7B res = "qwen2" if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166": # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf res = "olmo" + if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e": + # ref: https://huggingface.co/databricks/dbrx-base + res = "dbrx" if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en res = "jina-v2-en" @@ -2811,6 +2820,7 @@ class DeepseekV2Model(Model): if len(experts) > 0: raise ValueError(f"Unprocessed experts: {experts}") + @Model.register("T5ForConditionalGeneration") @Model.register("T5WithLMHeadModel") class T5Model(Model): diff --git a/include/llama.h b/include/llama.h index c5b618292..2591edce9 100644 --- a/include/llama.h +++ b/include/llama.h @@ -652,6 +652,11 @@ extern "C" { // State / sessions // + // hack + void llama_set_logits_all( + struct llama_context * ctx, + bool logits_all); + // Returns the maximum size in bytes of the state (rng, logits, embedding // and kv_cache) - will often be smaller after compacting tokens LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx); diff --git a/src/llama.cpp b/src/llama.cpp index 00f7f708e..88ca14db9 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -4281,6 +4281,7 @@ static const char * llama_model_type_name(e_model type) { case MODEL_410M: return "410M"; case MODEL_0_5B: return "0.5B"; case MODEL_1B: return "1B"; + case MODEL_1_3B: return "1.3B"; case MODEL_1_4B: return "1.4B"; case MODEL_2B: return "2B"; case MODEL_2_8B: return "2.8B"; @@ -13105,6 +13106,13 @@ static void llama_graph_compute( // fprintf(stderr, "splits: %d\n", ggml_backend_sched_get_n_splits(lctx.sched)); } +void llama_set_logits_all( + struct llama_context * ctx, + bool logits_all +) { + ctx->logits_all = logits_all; +} + // decode a batch of tokens by evaluating the transformer // // - lctx: llama context @@ -14052,6 +14060,7 @@ struct llm_tokenizer_bpe { break; case LLAMA_VOCAB_PRE_TYPE_GPT2: case LLAMA_VOCAB_PRE_TYPE_OLMO: + case LLAMA_VOCAB_PRE_TYPE_JAIS: regex_exprs = { "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)", };