diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 1aab4d2fe..28b060ed3 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -398,6 +398,9 @@ class Model(ABC):
             if chkhsh == -3290901550109860290:
                 # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer.json
                 res = "llama3"
+            if chkhsh ==  5332289095291046364:
+                # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-chat/blob/main/tokenizer.json
+                res = "deepseek-llm"
             if chkhsh ==  4190561703949727616:
                 # ref: https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct/blob/main/tokenizer.json
                 res = "deepseek-coder"
diff --git a/llama.cpp b/llama.cpp
index 09d8a0dd8..e05d10cdb 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -316,6 +316,7 @@ enum llm_kv {
     LLM_KV_SSM_TIME_STEP_RANK,
 
     LLM_KV_TOKENIZER_MODEL,
+    LLM_KV_TOKENIZER_PRE,
     LLM_KV_TOKENIZER_LIST,
     LLM_KV_TOKENIZER_TOKEN_TYPE,
     LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
@@ -392,6 +393,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
     { LLM_KV_SSM_TIME_STEP_RANK,            "%s.ssm.time_step_rank" },
 
     { LLM_KV_TOKENIZER_MODEL,               "tokenizer.ggml.model"              },
+    { LLM_KV_TOKENIZER_PRE,                 "tokenizer.ggml.pre"                },
     { LLM_KV_TOKENIZER_LIST,                "tokenizer.ggml.tokens"             },
     { LLM_KV_TOKENIZER_TOKEN_TYPE,          "tokenizer.ggml.token_type"         },
     { LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,    "tokenizer.ggml.token_type_count"   },
@@ -2114,8 +2116,8 @@ struct llama_vocab {
         ttype type;
     };
 
-    enum llm_arch         arch = LLM_ARCH_UNKNOWN;
-    enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
+    enum llama_vocab_type     type     = LLAMA_VOCAB_TYPE_SPM;
+    enum llama_vocab_pre_type type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
 
     std::unordered_map<token, id> token_to_id;
     std::vector<token_data>       id_to_token;
@@ -4166,11 +4168,13 @@ static void llm_load_vocab(
 
     // determine vocab type
     {
-        std::string tokenizer_name;
+        std::string tokenizer_model;
+        std::string tokenizer_pre;
 
-        ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_name);
+        ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
+        ml.get_key(LLM_KV_TOKENIZER_PRE,   tokenizer_pre, false);
 
-        if (tokenizer_name == "no_vocab") {
+        if (tokenizer_model == "no_vocab") {
             vocab.type = LLAMA_VOCAB_TYPE_NONE;
 
             // default special tokens
@@ -4184,7 +4188,7 @@ static void llm_load_vocab(
             vocab.linefeed_id     = -1;
 
             return;
-        } else if (tokenizer_name == "llama") {
+        } else if (tokenizer_model == "llama") {
             vocab.type = LLAMA_VOCAB_TYPE_SPM;
 
             // default special tokens
@@ -4229,7 +4233,7 @@ static void llm_load_vocab(
             if (add_space_prefix_keyidx != -1) {
                 vocab.add_space_prefix = gguf_get_val_bool(ctx, add_space_prefix_keyidx);
             } // The default value of add_space_prefix is true.
-        } else if (tokenizer_name == "bert") {
+        } else if (tokenizer_model == "bert") {
             vocab.type = LLAMA_VOCAB_TYPE_WPM;
 
             // default special tokens
@@ -4242,10 +4246,10 @@ static void llm_load_vocab(
             vocab.special_mask_id = 103;
             vocab.add_space_prefix = false;
         } else {
-            if (tokenizer_name == "gpt2") {
+            if (tokenizer_model == "gpt2") {
                 vocab.type = LLAMA_VOCAB_TYPE_BPE;
             } else {
-                LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_name.c_str());
+                LLAMA_LOG_WARN("%s: unknown tokenizer: '%s'", __func__, tokenizer_model.c_str());
                 LLAMA_LOG_WARN("%s: using default tokenizer: 'llama'", __func__);
                 vocab.type = LLAMA_VOCAB_TYPE_SPM;
                 return;
@@ -4285,7 +4289,20 @@ static void llm_load_vocab(
             vocab.special_mask_id = -1;
         }
 
-        vocab.arch = model.arch;
+        if (tokenizer_pre.empty()) {
+            LLAMA_LOG_WARN("%s: missing tokenizer pre, using default tokenizer pre: 'default'", __func__);
+            vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+        } else if (tokenizer_pre == "default") {
+            vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+        } else if (tokenizer_pre == "llama3") {
+            vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
+        } else if (tokenizer_pre == "deepseek-llm") {
+            vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM;
+        } else if (tokenizer_pre == "deepseek-coder") {
+            vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER;
+        } else {
+            throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
+        }
     }
 
     const int token_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_LIST).c_str());
@@ -12011,38 +12028,44 @@ struct llm_tokenizer_bpe {
         std::vector<std::string> word_collection;
         switch (vocab.type) {
             case LLAMA_VOCAB_TYPE_BPE:
-                switch (vocab.arch) {
-                    // TODO: how to detect deepseek and llama v3 models?
-                    //case LLM_ARCH_LLAMA:
-                    //case LLM_ARCH_DEEPSEEK_CODER:
-                    //    word_collection = unicode_regex_split(text, {
-                    //        "[\r\n]",
-                    //        "\\s?\\p{L}+",
-                    //        "\\s?\\p{P}+",
-                    //        "[一-龥ࠀ-一가-퟿]+",
-                    //        "\\p{N}+"
-                    //    });
-                    //    break;
-                    //case LLM_ARCH_DEEPSEEK_LLM:
-                    //    word_collection = unicode_regex_split(text, {
-                    //        "[\r\n]",
-                    //        "\\s?[A-Za-zµÀ-ÖØ-öø-ƺƼ-ƿǄ-ʓʕ-ʯͰ-ͳͶͷͻ-ͽͿΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-ՖႠ-ჅᎠ-Ᏽᏸ-ᏽᲐ-ᲺᲽ-Ჿᴀ-ᴫᵫ-ᵷᵹ-ᶚḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℴℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-ⱻⱾ-ⳤⳫ-ⳮⳲⳳꙀ-ꙭꚀ-ꚛꜢ-ꝯꝱ-ꞇꞋ-ꞎꭰ-ꮿﬀ-ﬆﬓ-ﬗＡ-Ｚａ-ｚ𐐀-𐑏𐒰-𐓓𐓘-𐓻𐲀-𐲲𐳀-𐳲𑢠-𑣟𞤀-𞥃]+",
-                    //        "\\s?[!-/:-~！-／：-～‘-‟　-。]+",
-                    //        "\\s+$",
-                    //        "[一-龥ࠀ-一가-퟿]+",
-                    //        "\\p{N}+"
-                    //    });
-                    //    break;
+                switch (vocab.type_pre) {
+                    case LLAMA_VOCAB_PRE_TYPE_LLAMA3:
+                        word_collection = unicode_regex_split(text, {
+                            // TODO: ??????????????
+                            //"(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+
+                            "\\p{P}+",
+                            "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+                            "\\p{N}+",
+                            "[0-9][0-9][0-9]"
+                        });
+                        break;
+                    case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM:
+                        word_collection = unicode_regex_split(text, {
+                            "[\r\n]",
+                            "\\s?[A-Za-zµÀ-ÖØ-öø-ƺƼ-ƿǄ-ʓʕ-ʯͰ-ͳͶͷͻ-ͽͿΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԯԱ-ՖႠ-ჅᎠ-Ᏽᏸ-ᏽᲐ-ᲺᲽ-Ჿᴀ-ᴫᵫ-ᵷᵹ-ᶚḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℴℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-ⱻⱾ-ⳤⳫ-ⳮⳲⳳꙀ-ꙭꚀ-ꚛꜢ-ꝯꝱ-ꞇꞋ-ꞎꭰ-ꮿﬀ-ﬆﬓ-ﬗＡ-Ｚａ-ｚ𐐀-𐑏𐒰-𐓓𐓘-𐓻𐲀-𐲲𐳀-𐳲𑢠-𑣟𞤀-𞥃]+",
+                            "\\s?[!-/:-~！-／：-～‘-‟　-。]+",
+                            "\\s+$",
+                            "[一-龥ࠀ-一가-퟿]+",
+                            "\\p{N}+"
+                        });
+                        break;
+                    case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
+                        word_collection = unicode_regex_split(text, {
+                            "[\r\n]",
+                            "\\s?\\p{L}+",
+                            "\\s?\\p{P}+",
+                            "[一-龥ࠀ-一가-퟿]+",
+                            "\\p{N}+"
+                        });
+                        break;
                     default:
                         // default regex for BPE tokenization pre-processing
-                        {
-                            word_collection = unicode_regex_split(text, {
-                                "\\p{P}+",
-                                "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
-                                "\\p{N}+",
-                                "[0-9][0-9][0-9]"
-                            });
-                        }
+                        word_collection = unicode_regex_split(text, {
+                            "\\p{P}+",
+                            "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
+                            "\\p{N}+",
+                            "[0-9][0-9][0-9]"
+                        });
                         break;
                 }
                 break;
diff --git a/llama.h b/llama.h
index 8aa763672..9c89d72af 100644
--- a/llama.h
+++ b/llama.h
@@ -69,6 +69,14 @@ extern "C" {
         LLAMA_VOCAB_TYPE_WPM  = 3, // BERT tokenizer based on WordPiece
     };
 
+    // pre-tokenization types
+    enum llama_vocab_pre_type {
+        LLAMA_VOCAB_PRE_TYPE_DEFAULT        = 0,
+        LLAMA_VOCAB_PRE_TYPE_LLAMA3         = 1,
+        LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM   = 2,
+        LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
+    };
+
     // note: these values should be synchronized with ggml_rope
     // TODO: maybe move this enum to ggml.h (ggml_rope_type)
     enum llama_rope_type {
diff --git a/models/ggml-vocab-deepseek-coder.gguf b/models/ggml-vocab-deepseek-coder.gguf
index 640ee63d8..8ea17fa4d 100644
Binary files a/models/ggml-vocab-deepseek-coder.gguf and b/models/ggml-vocab-deepseek-coder.gguf differ
diff --git a/models/ggml-vocab-deepseek-llm.gguf b/models/ggml-vocab-deepseek-llm.gguf
index 8fed82fa0..1e087220f 100644
Binary files a/models/ggml-vocab-deepseek-llm.gguf and b/models/ggml-vocab-deepseek-llm.gguf differ
diff --git a/models/ggml-vocab-llama.gguf b/models/ggml-vocab-llama.gguf
index 549eed8c5..568ffdc16 100644
Binary files a/models/ggml-vocab-llama.gguf and b/models/ggml-vocab-llama.gguf differ
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 4f0889007..3acf28ba4 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -41,13 +41,12 @@ llama_test(test-quantize-perf.cpp)
 llama_test(test-sampling.cpp)
 llama_test(test-chat-template.cpp)
 
-# TODO: tmp disabled LLaMA v3 and Deepseek tests
 llama_test(test-tokenizer-0-llama.cpp    NAME test-tokenizer-0-llama                          ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
-#llama_test(test-tokenizer-0-llama-v3.cpp NAME test-tokenizer-0-llama-v3                       ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-v3.gguf)
+llama_test(test-tokenizer-0-llama-v3.cpp NAME test-tokenizer-0-llama-v3                       ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-v3.gguf)
 llama_test(test-tokenizer-0-falcon.cpp   NAME test-tokenizer-0-falcon                         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
 
-#llama_test(test-tokenizer-0-deepseek-coder.cpp NAME test-tokenizer-0-deepseek-coder           ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
-#llama_test(test-tokenizer-0-deepseek-llm.cpp   NAME test-tokenizer-0-deepseek-llm             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
+llama_test(test-tokenizer-0-deepseek-coder.cpp NAME test-tokenizer-0-deepseek-coder           ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
+llama_test(test-tokenizer-0-deepseek-llm.cpp   NAME test-tokenizer-0-deepseek-llm             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
 
 llama_test(test-tokenizer-1-llama.cpp  NAME test-tokenizer-1-llama                            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
 llama_test(test-tokenizer-1-llama.cpp  NAME test-tokenizer-1-baichuan                         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
diff --git a/tests/test-tokenizer-0-bpe.py b/tests/test-tokenizer-0-bpe.py
index d412ce039..38aa33c46 100644
--- a/tests/test-tokenizer-0-bpe.py
+++ b/tests/test-tokenizer-0-bpe.py
@@ -27,6 +27,8 @@ tests = [
     "   ",
     "\t",
     "\n",
+    "\n\n",
+    "\n\n\n",
     "\t\n",
     "Hello world",
     " Hello world",
diff --git a/tests/test-tokenizer-0-falcon.cpp b/tests/test-tokenizer-0-falcon.cpp
index 3e8877563..67f675a62 100644
--- a/tests/test-tokenizer-0-falcon.cpp
+++ b/tests/test-tokenizer-0-falcon.cpp
@@ -17,6 +17,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
         { "   "                   , {     466, }, },
         { "\t"                    , {     192, }, },
         { "\n"                    , {     193, }, },
+        { "\n\n"                  , {    1001, }, },
+        { "\n\n\n"                , {   11331, }, },
         { "\t\n"                  , {   19125, }, },
         { "Hello world"           , {    9856,   1079, }, },
         { " Hello world"          , {   23090,   1079, }, },
diff --git a/tests/test-tokenizer-0-llama-v3.cpp b/tests/test-tokenizer-0-llama-v3.cpp
index a0ecf6283..2e91b717f 100644
--- a/tests/test-tokenizer-0-llama-v3.cpp
+++ b/tests/test-tokenizer-0-llama-v3.cpp
@@ -17,6 +17,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
         { "   "                   , {     262, }, },
         { "\t"                    , {     197, }, },
         { "\n"                    , {     198, }, },
+        { "\n\n"                  , {     271, }, },
+        { "\n\n\n"                , {    1432, }, },
         { "\t\n"                  , {    1602, }, },
         { "Hello world"           , {    9906,   1917, }, },
         { " Hello world"          , {   22691,   1917, }, },
diff --git a/tests/test-tokenizer-0-llama.cpp b/tests/test-tokenizer-0-llama.cpp
index fd407041b..f0634cfe5 100644
--- a/tests/test-tokenizer-0-llama.cpp
+++ b/tests/test-tokenizer-0-llama.cpp
@@ -17,6 +17,8 @@ static const std::map<std::string, std::vector<llama_token>> & k_tests() {
         { "   "                   , {     268, }, },
         { "\t"                    , {   29871,     12, }, },
         { "\n"                    , {   29871,     13, }, },
+        { "\n\n"                  , {   29871,     13,     13, }, },
+        { "\n\n\n"                , {   29871,     13,     13,     13, }, },
         { "\t\n"                  , {   29871,     12,     13, }, },
         { "Hello world"           , {   15043,   3186, }, },
         { " Hello world"          , {   29871,  15043,   3186, }, },
diff --git a/tests/test-tokenizer-0-spm.py b/tests/test-tokenizer-0-spm.py
index f2d3b6e88..be12a6b93 100644
--- a/tests/test-tokenizer-0-spm.py
+++ b/tests/test-tokenizer-0-spm.py
@@ -27,6 +27,8 @@ tests = [
     "   ",
     "\t",
     "\n",
+    "\n\n",
+    "\n\n\n",
     "\t\n",
     "Hello world",
     " Hello world",