fix: remove ollama patches

2024-06-05 09:15:36 +02:00 · 2024-06-05 09:15:36 +02:00 · 05659d3c7b
commit 05659d3c7b
parent 3b44f8f658
1 changed files with 20 additions and 20 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -4653,8 +4653,16 @@ static void llm_load_vocab(
        // for now, only BPE models have pre-tokenizers
        if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
-            if (
+            if (tokenizer_pre.empty()) {
-                    tokenizer_pre == "default") {
+                LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
                LLAMA_LOG_WARN("%s:                                             \n", __func__);
                LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
                LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED!        \n", __func__);
                LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL             \n", __func__);
                LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
                LLAMA_LOG_WARN("%s:                                             \n", __func__);
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
            } else if (tokenizer_pre == "default") {
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
            } else if (
                    tokenizer_pre == "llama3"   ||
@ -4706,8 +4714,7 @@ static void llm_load_vocab(
                tokenizer_pre == "smaug-bpe") {
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMAUG;
            } else {
-                LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);
+                throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
            }
        } else {
            vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
@ -6623,7 +6630,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
        }
    } catch (const std::exception & err) {
        LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
-        throw;
+        return -1;
    }
    return 0;
@ -16246,23 +16253,16 @@ struct llama_model * llama_load_model_from_file(
        }
        model->rpc_servers.push_back(servers);
    }
-
+    int status = llama_model_load(path_model, *model, params);
-    try {
+    GGML_ASSERT(status <= 0);
-        int status = llama_model_load(path_model, *model, params);
+    if (status < 0) {
-        GGML_ASSERT(status <= 0);
+        if (status == -1) {
-        if (status < 0) {
+            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
-            if (status == -1) {
+        } else if (status == -2) {
-                LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
+            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
            } else if (status == -2) {
                LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
            }
            delete model;
            return nullptr;
        }
    } catch (...) {
        LLAMA_LOG_ERROR("%s: exception loading model\n", __func__);
        delete model;
-        throw;
+        return nullptr;
    }
    return model;