llama : minor stuff

2023-12-29 19:32:30 +02:00 · 2023-12-29 19:32:30 +02:00 · 128c213ab5
commit 128c213ab5
parent d24da31d2f
3 changed files with 21 additions and 23 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -78,7 +78,6 @@
 #include <thread>
 #include <type_traits>
 #include <unordered_map>
-#include <iostream>

 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
@ -7006,9 +7005,9 @@ struct llm_tokenizer_bpe {

    void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {
        int final_prev_index = -1;
+
        std::vector<std::string> word_collection;
-        switch (vocab.type)
-        {
+        switch (vocab.type) {
            case LLAMA_VOCAB_TYPE_BPE:
                word_collection = bpe_gpt2_preprocess(text);
                break;
--- a/llama.h
+++ b/llama.h
@ -70,8 +70,8 @@ extern "C" {
    enum llama_vocab_type {
        LLAMA_VOCAB_TYPE_SPM           = 0, // SentencePiece
        LLAMA_VOCAB_TYPE_BPE           = 1, // Byte Pair Encoding
-        LLAMA_VOCAB_TYPE_DEEPSEEKCODER = 2, // deepseek coder
-        LLAMA_VOCAB_TYPE_DEEPSEEKLLM = 3, // deepseek coder
+        LLAMA_VOCAB_TYPE_DEEPSEEKCODER = 2, // Deepseek Coder
+        LLAMA_VOCAB_TYPE_DEEPSEEKLLM   = 3, // Deepseek LLM
    };

    enum llama_token_type {
--- a/unicode.h
+++ b/unicode.h
@ -7,7 +7,6 @@
 #include <locale>
 #include <codecvt>
 #include <string>
-#include <cstring>

 static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
 {0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},