Fix trailing ws
This commit is contained in:
parent
0c5baa1cd1
commit
0d198bbf98
3 changed files with 5 additions and 5 deletions
|
@ -533,7 +533,7 @@ With input 'á' (utf8 hex: C3 A1) on tinyllama/stories260k
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"tokens": [
|
"tokens": [
|
||||||
{"id": 198, "piece": [195]}, // hex C3
|
{"id": 198, "piece": [195]}, // hex C3
|
||||||
{"id": 164, "piece": [161]} // hex A1
|
{"id": 164, "piece": [161]} // hex A1
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
@ -104,15 +104,15 @@ Feature: llama.cpp server
|
||||||
Then tokens begin with BOS
|
Then tokens begin with BOS
|
||||||
Given first token is removed
|
Given first token is removed
|
||||||
Then tokens can be detokenized
|
Then tokens can be detokenized
|
||||||
|
|
||||||
Scenario: Tokenize with pieces
|
Scenario: Tokenize with pieces
|
||||||
When tokenizing with pieces:
|
When tokenizing with pieces:
|
||||||
"""
|
"""
|
||||||
What is the capital of Germany?
|
What is the capital of Germany?
|
||||||
媽
|
媽
|
||||||
"""
|
"""
|
||||||
Then tokens are given with pieces
|
Then tokens are given with pieces
|
||||||
|
|
||||||
Scenario: Models available
|
Scenario: Models available
|
||||||
Given available models
|
Given available models
|
||||||
Then 1 models are supported
|
Then 1 models are supported
|
||||||
|
|
|
@ -603,7 +603,7 @@ static bool is_valid_utf8(const std::string & str) {
|
||||||
bytes += 3;
|
bytes += 3;
|
||||||
} else if ((*bytes & 0xF8) == 0xF0) {
|
} else if ((*bytes & 0xF8) == 0xF0) {
|
||||||
// 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
|
// 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
|
||||||
if (end - bytes < 4 || (bytes[1] & 0xC0) != 0x80 ||
|
if (end - bytes < 4 || (bytes[1] & 0xC0) != 0x80 ||
|
||||||
(bytes[2] & 0xC0) != 0x80 || (bytes[3] & 0xC0) != 0x80)
|
(bytes[2] & 0xC0) != 0x80 || (bytes[3] & 0xC0) != 0x80)
|
||||||
return false;
|
return false;
|
||||||
bytes += 4;
|
bytes += 4;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue