llama : minor stuff
This commit is contained in:
parent
d24da31d2f
commit
128c213ab5
3 changed files with 21 additions and 23 deletions
|
@ -78,7 +78,6 @@
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
|
@ -7006,9 +7005,9 @@ struct llm_tokenizer_bpe {
|
||||||
|
|
||||||
void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {
|
void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {
|
||||||
int final_prev_index = -1;
|
int final_prev_index = -1;
|
||||||
|
|
||||||
std::vector<std::string> word_collection;
|
std::vector<std::string> word_collection;
|
||||||
switch (vocab.type)
|
switch (vocab.type) {
|
||||||
{
|
|
||||||
case LLAMA_VOCAB_TYPE_BPE:
|
case LLAMA_VOCAB_TYPE_BPE:
|
||||||
word_collection = bpe_gpt2_preprocess(text);
|
word_collection = bpe_gpt2_preprocess(text);
|
||||||
break;
|
break;
|
||||||
|
|
4
llama.h
4
llama.h
|
@ -70,8 +70,8 @@ extern "C" {
|
||||||
enum llama_vocab_type {
|
enum llama_vocab_type {
|
||||||
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
|
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
|
||||||
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
|
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
|
||||||
LLAMA_VOCAB_TYPE_DEEPSEEKCODER = 2, // deepseek coder
|
LLAMA_VOCAB_TYPE_DEEPSEEKCODER = 2, // Deepseek Coder
|
||||||
LLAMA_VOCAB_TYPE_DEEPSEEKLLM = 3, // deepseek coder
|
LLAMA_VOCAB_TYPE_DEEPSEEKLLM = 3, // Deepseek LLM
|
||||||
};
|
};
|
||||||
|
|
||||||
enum llama_token_type {
|
enum llama_token_type {
|
||||||
|
|
|
@ -7,7 +7,6 @@
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include <codecvt>
|
#include <codecvt>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
|
static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
|
||||||
{0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},
|
{0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue