From 0ead1f1072fdc70720f92e008a294dc74a826b1d Mon Sep 17 00:00:00 2001 From: slaren Date: Thu, 25 Apr 2024 15:23:47 +0200 Subject: [PATCH 1/3] llama : check that all the tensor data is in the model file (#6885) * llama : check that all the tensor data is in the model file * also check for unsigned overflow --- llama.cpp | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/llama.cpp b/llama.cpp index 51aae62af..a43cde109 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2999,9 +2999,13 @@ struct llama_model_loader { ggml_tensor * tensor; - llama_tensor_weight(uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) { + llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) { const int tensor_idx = gguf_find_tensor(gguf_ctx, name); offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx); + + if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) { + throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name)); + } } }; std::vector weights; @@ -3040,15 +3044,15 @@ struct llama_model_loader { get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false); llm_kv = LLM_KV(llm_arch_from_string(arch_name)); + files.emplace_back(new llama_file(fname.c_str(), "rb")); + contexts.emplace_back(ctx); + // Save tensors data offset of the main file. // For subsidiary files, `meta` tensor data offset must not be used, // so we build a unified tensors index for weights. for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { - weights.emplace_back(0, cur->name, meta, cur); + weights.emplace_back(files.back().get(), 0, cur->name, meta, cur); } - files.emplace_back(new llama_file(fname.c_str(), "rb")); - contexts.emplace_back(ctx); - uint16_t n_split = 0; get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false); @@ -3082,13 +3086,14 @@ struct llama_model_loader { throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path)); } - // Save tensors data offset info of the shard. - for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { - weights.emplace_back(idx, cur->name, ctx_gguf, cur); - } files.emplace_back(new llama_file(split_path, "rb")); contexts.emplace_back(ctx); + // Save tensors data offset info of the shard. + for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) { + weights.emplace_back(files.back().get(), idx, cur->name, ctx_gguf, cur); + } + gguf_free(ctx_gguf); } From 3fe0596c1817a6114ffffb6dbfd6c36ca7815dc7 Mon Sep 17 00:00:00 2001 From: BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com> Date: Thu, 25 Apr 2024 09:52:28 -0400 Subject: [PATCH 2/3] readme : update model list (#6908) * Update README.md * missing space * llama3 ! --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 72b5a2c88..bb2ca9dbd 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,7 @@ Typically finetunes of the base models below are supported as well. - [X] LLaMA 🦙 - [x] LLaMA 2 🦙🦙 +- [x] LLaMA 3 🦙🦙🦙 - [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) - [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) - [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct) @@ -119,8 +120,9 @@ Typically finetunes of the base models below are supported as well. - [x] [CodeShell](https://github.com/WisdomShell/codeshell) - [x] [Gemma](https://ai.google.dev/gemma) - [x] [Mamba](https://github.com/state-spaces/mamba) +- [x] [Grok-1](https://huggingface.co/keyfan/grok-1-hf) - [x] [Xverse](https://huggingface.co/models?search=xverse) -- [x] [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01) +- [x] [Command-R models](https://huggingface.co/models?search=CohereForAI/c4ai-command-r) - [x] [SEA-LION](https://huggingface.co/models?search=sea-lion) - [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B) - [x] [OLMo](https://allenai.org/olmo) @@ -135,6 +137,7 @@ Typically finetunes of the base models below are supported as well. - [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V) - [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM) - [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL) +- [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM) **HTTP server** From 853d06ffe26621176d60909a6e3f7c4cd067b305 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 25 Apr 2024 17:06:27 +0300 Subject: [PATCH 3/3] ci : tmp disable slow tests --- ci/run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index a75d0f5e3..da05f0d48 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -160,8 +160,9 @@ function gg_run_test_scripts_debug { set -e + # TODO: too slow, run on dedicated node (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log - (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + #(cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log set +e }