Fix coding style
This commit is contained in:
parent
3d162cc8ad
commit
5aee498d97
3 changed files with 15 additions and 15 deletions
14
llama.cpp
14
llama.cpp
|
@ -4590,7 +4590,7 @@ private:
|
|||
work_queue.push(bigram);
|
||||
}
|
||||
|
||||
std::vector<std::string> bpe_gpt2_preprocess(const std::string& text) {
|
||||
std::vector<std::string> bpe_gpt2_preprocess(const std::string & text) {
|
||||
std::vector<std::string> bpe_words;
|
||||
std::vector<std::string> bpe_encoded_words;
|
||||
|
||||
|
@ -4612,13 +4612,13 @@ private:
|
|||
text_utf.emplace_back(codepoint_to_utf8(cps[i]));
|
||||
|
||||
for (int i = 0; i < (int)text_utf.size(); i++) {
|
||||
const std::string& utf_char = text_utf[i];
|
||||
const std::string & utf_char = text_utf[i];
|
||||
bool split_condition = false;
|
||||
// const char* text_pos = raw_text_p + utf_char.seq_offset_bytes;
|
||||
int bytes_remain = text_utf.size() - i;
|
||||
// forward backward lookups
|
||||
const std::string& utf_char_next = (i + 1 < (int)text_utf.size()) ? text_utf[i + 1] : "";
|
||||
const std::string& utf_char_next_next = (i + 2 < (int)text_utf.size()) ? text_utf[i + 2] : "";
|
||||
const std::string & utf_char_next = (i + 1 < (int)text_utf.size()) ? text_utf[i + 1] : "";
|
||||
const std::string & utf_char_next_next = (i + 2 < (int)text_utf.size()) ? text_utf[i + 2] : "";
|
||||
|
||||
// handling contractions
|
||||
if (!split_condition && bytes_remain >= 2) {
|
||||
|
@ -4719,9 +4719,9 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
for (std::string& word : bpe_words) {
|
||||
for (std::string & word : bpe_words) {
|
||||
std::string encoded_token = "";
|
||||
for (char& c : word) {
|
||||
for (char & c : word) {
|
||||
encoded_token += bytes_to_unicode_bpe(c);
|
||||
}
|
||||
bpe_encoded_words.emplace_back(encoded_token);
|
||||
|
@ -7654,7 +7654,7 @@ int llama_tokenize(
|
|||
return res.size();
|
||||
}
|
||||
|
||||
static std::string llama_decode_text(const std::string& text) {
|
||||
static std::string llama_decode_text(const std::string & text) {
|
||||
std::string decoded_text;
|
||||
auto unicode_sequences = codepoints_from_utf8(text);
|
||||
for (auto& unicode_sequence : unicode_sequences) {
|
||||
|
|
|
@ -73,7 +73,7 @@ int main(int argc, char **argv) {
|
|||
return 2;
|
||||
}
|
||||
}
|
||||
catch (const std::invalid_argument&) {
|
||||
catch (const std::invalid_argument &) {
|
||||
fprintf(stderr, "%s : info: utf8 conversion %d '%s'\n", __func__, i, str.c_str());
|
||||
}
|
||||
}
|
||||
|
|
14
unicode.h
14
unicode.h
|
@ -248,7 +248,7 @@ static std::string codepoint_to_utf8(uint32_t cp) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static std::string codepoints_to_utf8(const std::vector<uint32_t>& cps) {
|
||||
static std::string codepoints_to_utf8(const std::vector<uint32_t> & cps) {
|
||||
std::string result;
|
||||
for (size_t i = 0; i < cps.size(); ++i) {
|
||||
result.append(codepoint_to_utf8(cps[i]));
|
||||
|
@ -256,7 +256,7 @@ static std::string codepoints_to_utf8(const std::vector<uint32_t>& cps) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static uint32_t codepoint_from_utf8(const std::string& utf8, size_t& offset) {
|
||||
static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
|
||||
assert(offset < utf8.size());
|
||||
if (!(utf8[offset + 0] & 0x80)) {
|
||||
auto result = utf8[offset + 0];
|
||||
|
@ -290,7 +290,7 @@ static uint32_t codepoint_from_utf8(const std::string& utf8, size_t& offset) {
|
|||
throw std::invalid_argument("invalid string");
|
||||
}
|
||||
|
||||
static std::vector<uint32_t> codepoints_from_utf8(const std::string& utf8) {
|
||||
static std::vector<uint32_t> codepoints_from_utf8(const std::string & utf8) {
|
||||
std::vector<uint32_t> result;
|
||||
size_t offset = 0;
|
||||
while (offset < utf8.size()) {
|
||||
|
@ -314,7 +314,7 @@ static std::vector<uint16_t> codepoint_to_utf16(uint32_t cp) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t>& cps) {
|
||||
static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t> & cps) {
|
||||
std::vector<uint16_t> result;
|
||||
for (size_t i = 0; i < cps.size(); ++i) {
|
||||
auto temp = codepoint_to_utf16(cps[i]);
|
||||
|
@ -323,7 +323,7 @@ static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t>& cp
|
|||
return result;
|
||||
}
|
||||
|
||||
static uint32_t codepoint_from_utf16(const std::vector<uint16_t>& utf16, size_t& offset) {
|
||||
static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t & offset) {
|
||||
assert(offset < utf16.size());
|
||||
if (((utf16[0] >> 10) << 10) != 0xd800) {
|
||||
auto result = utf16[offset + 0];
|
||||
|
@ -340,7 +340,7 @@ static uint32_t codepoint_from_utf16(const std::vector<uint16_t>& utf16, size_t&
|
|||
throw std::invalid_argument("invalid string");
|
||||
}
|
||||
|
||||
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t>& utf16) {
|
||||
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
|
||||
std::vector<uint32_t> result;
|
||||
size_t offset = 0;
|
||||
while (offset < utf16.size())
|
||||
|
@ -395,7 +395,7 @@ static int codepoint_type(uint32_t cp) {
|
|||
return codepoint_types[cp];
|
||||
}
|
||||
|
||||
static int codepoint_type(std::string utf8) {
|
||||
static int codepoint_type(const std::string & utf8) {
|
||||
if (utf8.length() == 0)
|
||||
return CODEPOINT_TYPE_UNIDENTIFIED;
|
||||
size_t offset = 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue