llama : minor stuff
This commit is contained in:
parent
d24da31d2f
commit
128c213ab5
3 changed files with 21 additions and 23 deletions
|
@ -78,7 +78,6 @@
|
|||
#include <thread>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <iostream>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
|
@ -7006,9 +7005,9 @@ struct llm_tokenizer_bpe {
|
|||
|
||||
void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {
|
||||
int final_prev_index = -1;
|
||||
|
||||
std::vector<std::string> word_collection;
|
||||
switch (vocab.type)
|
||||
{
|
||||
switch (vocab.type) {
|
||||
case LLAMA_VOCAB_TYPE_BPE:
|
||||
word_collection = bpe_gpt2_preprocess(text);
|
||||
break;
|
||||
|
|
4
llama.h
4
llama.h
|
@ -70,8 +70,8 @@ extern "C" {
|
|||
enum llama_vocab_type {
|
||||
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
|
||||
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
|
||||
LLAMA_VOCAB_TYPE_DEEPSEEKCODER = 2, // deepseek coder
|
||||
LLAMA_VOCAB_TYPE_DEEPSEEKLLM = 3, // deepseek coder
|
||||
LLAMA_VOCAB_TYPE_DEEPSEEKCODER = 2, // Deepseek Coder
|
||||
LLAMA_VOCAB_TYPE_DEEPSEEKLLM = 3, // Deepseek LLM
|
||||
};
|
||||
|
||||
enum llama_token_type {
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include <locale>
|
||||
#include <codecvt>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
|
||||
static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
|
||||
{0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue