rerank : use [SEP] token instead of [BOS] (#9737)

* rerank : use [SEP] token instead of [BOS]

ggml-ci

* common : sanity check for non-NULL tokens

ggml-ci

* ci : adjust rank score interval

ggml-ci

* ci : add shebang to run.sh

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-10-05 15:55:04 +03:00 committed by GitHub
parent 58b16695e1
commit 8c475b97b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 45 additions and 16 deletions

View file

@ -40,17 +40,17 @@ struct llama_vocab {
id special_bos_id = 1;
id special_eos_id = 2;
id special_unk_id = 0;
id special_sep_id = -1;
id special_pad_id = -1;
id special_cls_id = -1;
id special_mask_id = -1;
id special_sep_id = LLAMA_TOKEN_NULL;
id special_pad_id = LLAMA_TOKEN_NULL;
id special_cls_id = LLAMA_TOKEN_NULL;
id special_mask_id = LLAMA_TOKEN_NULL;
id linefeed_id = 13;
id special_prefix_id = -1;
id special_suffix_id = -1;
id special_middle_id = -1;
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
id special_eom_id = -1;
id special_prefix_id = LLAMA_TOKEN_NULL;
id special_suffix_id = LLAMA_TOKEN_NULL;
id special_middle_id = LLAMA_TOKEN_NULL;
id special_eot_id = LLAMA_TOKEN_NULL; // TODO: move above after "eos_id", and here add "file separator" token
id special_eom_id = LLAMA_TOKEN_NULL;
// set of all tokens that cause "end of generation"
std::set<id> special_eog_ids;

View file

@ -2412,7 +2412,7 @@ struct llama_hparams {
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
llama_token dec_start_token_id = -1;
llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;