From 6b2921423ed2cd4248f194e7ccf9b3f3a80b1ffc Mon Sep 17 00:00:00 2001
From: iohub <rickyang.pro@gmail.com>
Date: Thu, 7 Mar 2024 19:17:26 +0800
Subject: [PATCH] llama: fix crash when tokenize unkown spm vocab token.

---
 llama.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index 478099648..564823330 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8770,7 +8770,10 @@ static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
             }
             // Try to fall back to just the byte as a string
             const char buf2[2] = { (char)ch, 0 };
-            return vocab.token_to_id.at(buf2);
+            token = vocab.token_to_id.find(buf2);
+            if (token != vocab.token_to_id.end()) {
+                return (*token).second;
+            }
         }
         case LLAMA_VOCAB_TYPE_WPM:
         case LLAMA_VOCAB_TYPE_BPE: {