diff --git a/convert-hf-to-gguf-update.py b/convert-hf-to-gguf-update.py
index c1d53aaf0..deca36bf9 100644
--- a/convert-hf-to-gguf-update.py
+++ b/convert-hf-to-gguf-update.py
@@ -6,6 +6,7 @@
 #   python3 convert-hf-to-gguf-update.py <huggingface_token>
 #
 # - Copy-paste the generated get_vocab_base_pre() function into convert-hf-to-gguf.py
+# - Update llama.cpp with the new pre-tokenizer if necessary
 #
 # TODO: generate tokenizer tests for llama.cpp
 #
@@ -33,13 +34,14 @@ else:
     print("Usage: python convert-hf-to-gguf-update.py <huggingface_token>")
     sys.exit(1)
 
-# TODO: add models here
+# TODO: add models here, base models preferred
 models = [
-        { "name": "llama-v2",       "tokenizer_type": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf",                },
-        { "name": "llama-v3",       "tokenizer_type": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B",              },
-        { "name": "deepseek-llm",   "tokenizer_type": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-chat",        },
-        { "name": "deepseek-coder", "tokenizer_type": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base",    },
-        { "name": "bert-bge",       "tokenizer_type": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5",                  },
+        { "name": "llama-v2",       "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf",                },
+        { "name": "llama-v3",       "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B",              },
+        { "name": "deepseek-llm",   "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base",        },
+        { "name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base",    },
+        { "name": "falcon",         "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b",                        },
+        { "name": "bert-bge",       "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5",                  },
         ]
 
 # make directory "models/tokenizers" if it doesn't exist
@@ -59,7 +61,7 @@ def download_file_with_auth(url, token, save_path):
 for model in models:
     name = model["name"]
     repo = model["repo"]
-    tokenizer_type = model["tokenizer_type"]
+    tokt = model["tokt"]
 
     if not os.path.exists(f"models/tokenizers/{name}"):
         os.makedirs(f"models/tokenizers/{name}")
@@ -73,7 +75,7 @@ for model in models:
     save_path = f"models/tokenizers/{name}/tokenizer.json"
     download_file_with_auth(url, token, save_path)
 
-    if tokenizer_type == TOKENIZER_TYPE.SPM:
+    if tokt == TOKENIZER_TYPE.SPM:
         url = f"{repo}/resolve/main/tokenizer.model"
         save_path = f"models/tokenizers/{name}/tokenizer.model"
         download_file_with_auth(url, token, save_path)
@@ -88,9 +90,9 @@ for model in models:
 src_ifs = ""
 for model in models:
     name = model["name"]
-    tokenizer_type = model["tokenizer_type"]
+    tokt = model["tokt"]
 
-    if tokenizer_type == TOKENIZER_TYPE.SPM:
+    if tokt == TOKENIZER_TYPE.SPM:
         continue
 
     # create the tokenizer
@@ -101,7 +103,7 @@ for model in models:
     chkhsh = sha256(str(chktok).encode()).hexdigest()
 
     print(f"model: {name}")
-    print(f"tokenizer_type: {tokenizer_type}")
+    print(f"tokt: {tokt}")
     print(f"repo: {model['repo']}")
     print(f"chktok: {chktok}")
     print(f"chkhsh: {chkhsh}")
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 9b2f68cfd..56475a49a 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -401,11 +401,14 @@ class Model(ABC):
             # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B
             res = "llama-v3"
         if chkhsh == "58c3d0e812ae7fa6a20931006d2398274732c105a9a964c148c43cf898c5fb7a":
-            # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-chat
+            # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base
             res = "deepseek-llm"
         if chkhsh == "0438d2a948d7fb26c7a662705ac68374f3138ee29e44f133b1f059203500fb4d":
             # ref: https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base
             res = "deepseek-coder"
+        if chkhsh == "822bdd323c3ef8667a9526b16b5bfe97974059838d992a170f965063f99c9b9e":
+            # ref: https://huggingface.co/tiiuae/falcon-7b
+            res = "falcon"
         if chkhsh == "406f3f61e1c67d7b0456c5df2fce5cbb30c77dd3671a436b07a6c510303f721e":
             # ref: https://huggingface.co/BAAI/bge-small-en-v1.5
             res = "bert-bge"
diff --git a/llama.cpp b/llama.cpp
index eaf9a8da0..98316161c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4348,6 +4348,9 @@ static void llm_load_vocab(
             } else if (
                     tokenizer_pre == "deepseek-coder") {
                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER;
+            } else if (
+                    tokenizer_pre == "falcon") {
+                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
             } else {
                 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
             }
@@ -12112,6 +12115,14 @@ struct llm_tokenizer_bpe {
                             "\\p{N}+",
                         });
                         break;
+                    case LLAMA_VOCAB_PRE_TYPE_FALCON:
+                        word_collection = unicode_regex_split(text, {
+                            "[\\p{P}\\$\\+<=>\\^~\\|]+",
+                            "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+                            "\\p{N}+",
+                            "[0-9][0-9][0-9]",
+                        });
+                        break;
                     default:
                         // default regex for BPE tokenization pre-processing
                         word_collection = unicode_regex_split(text, {
diff --git a/llama.h b/llama.h
index 083ce22f1..3beb80e0c 100644
--- a/llama.h
+++ b/llama.h
@@ -75,6 +75,7 @@ extern "C" {
         LLAMA_VOCAB_PRE_TYPE_LLAMA3         = 1,
         LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM   = 2,
         LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
+        LLAMA_VOCAB_PRE_TYPE_FALCON         = 4,
     };
 
     // note: these values should be synchronized with ggml_rope
diff --git a/models/ggml-vocab-falcon.gguf b/models/ggml-vocab-falcon.gguf
index d4ea2e822..334d50da5 100644
Binary files a/models/ggml-vocab-falcon.gguf and b/models/ggml-vocab-falcon.gguf differ