Merge branch 'refactor-jina-rename' of https://github.com/JoanFM/llama.cpp into feat-jina-embeddings-v2-zh

This commit is contained in:
Joan Martinez 2024-05-13 10:29:55 +02:00
commit ea0f7df2fb
3 changed files with 4 additions and 2 deletions

2
ggml.c
View file

@ -13409,7 +13409,7 @@ static void ggml_compute_forward_soft_max_f32(
const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1) : 1.0f;
float * sp = (float *)((char *) src0->data + i1*src0->nb[1]);
float * dp = (float *)((char *) dst->data + i1*dst->nb[1]);
float * dp = (float *)((char *) dst->data + i1*dst->nb[1]);
// broadcast the mask across rows
ggml_fp16_t * mp_f16 = src1 ? (ggml_fp16_t *)((char *) src1->data) + (i1%ne01)*ne00 : NULL;

View file

@ -4423,6 +4423,8 @@ static void llm_load_vocab(
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_STARCODER;
} else if (
tokenizer_pre == "gpt-2" ||
tokenizer_pre == "jina-es" ||
tokenizer_pre == "jina-de" ||
tokenizer_pre == "jina-v2-es" ||
tokenizer_pre == "jina-v2-de") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;

View file

@ -14,4 +14,4 @@ extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_punctuati
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_symbol;
extern const std::vector<std::pair<uint32_t, uint32_t>> unicode_ranges_control;
extern const std::multimap<uint32_t, uint32_t> unicode_map_nfd;
extern const std::map<char32_t, char32_t> unicode_map_lowercase;
extern const std::map<char32_t, char32_t> unicode_map_lowercase;