Improve handling of special tokens in GGML to GGUF converter (#2725)
* Improve UNK, BOS, EOS token handling when converting without metadata. * Allow importing as a module. * Remove some obsolete code and minor cleanups. * Set default UNK token mapping from -1 to 0 in llama.cpp * Try to handle overflow due to buggy Windows Python with a better error message
This commit is contained in:
parent
46ef5b5fcf
commit
777f42ba18
2 changed files with 31 additions and 14 deletions
|
@ -703,7 +703,7 @@ struct llama_vocab {
|
|||
// default LLaMA special tokens
|
||||
id special_bos_id = 1;
|
||||
id special_eos_id = 2;
|
||||
id special_unk_id = -1;
|
||||
id special_unk_id = 0;
|
||||
id special_sep_id = -1;
|
||||
id special_pad_id = -1;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue