Resolved issues

2024-03-23 14:38:06 +06:00 · 2024-03-23 14:38:06 +06:00 · 1c924e4b35
commit 1c924e4b35
parent 54f93eb50b
5 changed files with 18 additions and 18 deletions
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -46,6 +46,7 @@ llama_test(test-tokenizer-0-llama.cpp  NAME test-tokenizer-0-llama
 llama_test(test-tokenizer-0-falcon.cpp NAME test-tokenizer-0-falcon                           ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
 llama_test(test-tokenizer-0-deepseek-coder.cpp NAME test-tokenizer-0-deepseek-coder           ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-coder.gguf)
 llama_test(test-tokenizer-0-deepseek-llm.cpp   NAME test-tokenizer-0-deepseek-llm             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-deepseek-llm.gguf)
 llama_test(test-tokenizer-1-llama.cpp  NAME test-tokenizer-1-llama                            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
 llama_test(test-tokenizer-1-llama.cpp  NAME test-tokenizer-1-baichuan                         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
--- a/tests/test-tokenizer-0-deepseek-coder.cpp
+++ b/tests/test-tokenizer-0-deepseek-coder.cpp
@ -63,7 +63,7 @@ int main(int argc, char **argv) {
    llama_model * model;
    llama_context * ctx;
-    llama_backend_init(false);
+    llama_backend_init();
    // load the vocab
    {
--- a/tests/test-tokenizer-0-deepseek-llm.cpp
+++ b/tests/test-tokenizer-0-deepseek-llm.cpp
@ -63,7 +63,7 @@ int main(int argc, char **argv) {
    llama_model * model;
    llama_context * ctx;
-    llama_backend_init(false);
+    llama_backend_init();
    // load the vocab
    {
--- a/unicode.cpp
+++ b/unicode.cpp
@ -10,8 +10,6 @@
 #include <unordered_map>
 #include <utility>
 #include <vector>
 #include <locale>
 #include <codecvt>
 static std::string unicode_cpts_to_utf8(const std::vector<uint32_t> & cps) {
    std::string result;
@ -320,17 +318,5 @@ std::vector<std::wstring> get_deepseek_llm_regex() {
    return deepseek_llm_regex;
 }
 inline std::wstring from_utf8(const std::string & s)
 {
    std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
    return conv.from_bytes(s);
 }
 inline std::string to_utf8(const std::wstring & ws)
 {
    // code to convert from utf32/utf16 to utf8
    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
    std::string utf8 = converter.to_bytes(ws);
    return utf8;
 }
--- a/unicode.h
+++ b/unicode.h
@ -3,6 +3,8 @@
 #include <cstdint>
 #include <string>
 #include <vector>
 #include <locale>
 #include <codecvt>
 #define CODEPOINT_TYPE_UNIDENTIFIED 0
 #define CODEPOINT_TYPE_DIGIT        1
@ -30,5 +32,16 @@ std::vector<std::wstring> get_gpt2_regex();
 std::vector<std::wstring> get_deepseek_coder_regex();
 std::vector<std::wstring> get_deepseek_llm_regex();
-inline std::wstring from_utf8(const std::string & s);
+inline std::wstring from_utf8(const std::string & s)
-inline std::string to_utf8(const std::wstring & ws);
+{
    std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
    return conv.from_bytes(s);
 }
 inline std::string to_utf8(const std::wstring & ws)
 {
    // code to convert from utf32/utf16 to utf8
    std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
    std::string utf8 = converter.to_bytes(ws);
    return utf8;
 }