llama : suppress conversion from 'size_t' to 'int'
This commit updates llm_tokenizer_spm.tokenize to suppress/remove the following warnings that are generated on Windows when using MSVC: ```console src\llama-vocab.cpp(211,1): warning C4267: 'argument': conversion from 'size_t' to 'int', possible loss of data src\llama-vocab.cpp(517,1): warning C4267: 'argument': conversion from 'size_t' to 'int', possible loss of data ``` This is done by adding a cast for the size_t returned from symbols.size(). I believe this is safe as it seems unlikely that symbols, which stores an entry for each UTF8 character, would become larger than INT_MAX. The motivation for this change is to reduce the number of warnings that are currently generated when building on Windows.
This commit is contained in:
parent
d3ae0ee8d7
commit
efeccedaf6
1 changed files with 4 additions and 2 deletions
|
@ -207,7 +207,8 @@ struct llm_tokenizer_spm {
|
|||
}
|
||||
|
||||
// seed the work queue with all possible 2-character tokens.
|
||||
for (size_t i = 1; i < symbols.size(); ++i) {
|
||||
int symbols_size = static_cast<int>(symbols.size());
|
||||
for (int i = 1; i < symbols_size; ++i) {
|
||||
try_add_bigram(i - 1, i);
|
||||
}
|
||||
|
||||
|
@ -511,7 +512,8 @@ struct llm_tokenizer_bpe {
|
|||
index++;
|
||||
symbols.emplace_back(sym);
|
||||
}
|
||||
for (size_t i = 1; i < symbols.size(); ++i) {
|
||||
int symbols_size = static_cast<int>(symbols.size());
|
||||
for (int i = 1; i < symbols_size; ++i) {
|
||||
add_new_bigram(i - 1, i);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue