From 5aee498d9705151be61f8cfa96269bdd8da15a8f Mon Sep 17 00:00:00 2001 From: goerch Date: Mon, 2 Oct 2023 13:01:46 +0200 Subject: [PATCH] Fix coding style --- llama.cpp | 14 +++++++------- tests/test-tokenizer-1-bpe.cpp | 2 +- unicode.h | 14 +++++++------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/llama.cpp b/llama.cpp index eafcfda22..b7270b905 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4590,7 +4590,7 @@ private: work_queue.push(bigram); } - std::vector bpe_gpt2_preprocess(const std::string& text) { + std::vector bpe_gpt2_preprocess(const std::string & text) { std::vector bpe_words; std::vector bpe_encoded_words; @@ -4612,13 +4612,13 @@ private: text_utf.emplace_back(codepoint_to_utf8(cps[i])); for (int i = 0; i < (int)text_utf.size(); i++) { - const std::string& utf_char = text_utf[i]; + const std::string & utf_char = text_utf[i]; bool split_condition = false; // const char* text_pos = raw_text_p + utf_char.seq_offset_bytes; int bytes_remain = text_utf.size() - i; // forward backward lookups - const std::string& utf_char_next = (i + 1 < (int)text_utf.size()) ? text_utf[i + 1] : ""; - const std::string& utf_char_next_next = (i + 2 < (int)text_utf.size()) ? text_utf[i + 2] : ""; + const std::string & utf_char_next = (i + 1 < (int)text_utf.size()) ? text_utf[i + 1] : ""; + const std::string & utf_char_next_next = (i + 2 < (int)text_utf.size()) ? text_utf[i + 2] : ""; // handling contractions if (!split_condition && bytes_remain >= 2) { @@ -4719,9 +4719,9 @@ private: } } - for (std::string& word : bpe_words) { + for (std::string & word : bpe_words) { std::string encoded_token = ""; - for (char& c : word) { + for (char & c : word) { encoded_token += bytes_to_unicode_bpe(c); } bpe_encoded_words.emplace_back(encoded_token); @@ -7654,7 +7654,7 @@ int llama_tokenize( return res.size(); } -static std::string llama_decode_text(const std::string& text) { +static std::string llama_decode_text(const std::string & text) { std::string decoded_text; auto unicode_sequences = codepoints_from_utf8(text); for (auto& unicode_sequence : unicode_sequences) { diff --git a/tests/test-tokenizer-1-bpe.cpp b/tests/test-tokenizer-1-bpe.cpp index 986e67ba5..85a59a14d 100644 --- a/tests/test-tokenizer-1-bpe.cpp +++ b/tests/test-tokenizer-1-bpe.cpp @@ -73,7 +73,7 @@ int main(int argc, char **argv) { return 2; } } - catch (const std::invalid_argument&) { + catch (const std::invalid_argument &) { fprintf(stderr, "%s : info: utf8 conversion %d '%s'\n", __func__, i, str.c_str()); } } diff --git a/unicode.h b/unicode.h index 5b2247c89..aeca879ea 100644 --- a/unicode.h +++ b/unicode.h @@ -248,7 +248,7 @@ static std::string codepoint_to_utf8(uint32_t cp) { return result; } -static std::string codepoints_to_utf8(const std::vector& cps) { +static std::string codepoints_to_utf8(const std::vector & cps) { std::string result; for (size_t i = 0; i < cps.size(); ++i) { result.append(codepoint_to_utf8(cps[i])); @@ -256,7 +256,7 @@ static std::string codepoints_to_utf8(const std::vector& cps) { return result; } -static uint32_t codepoint_from_utf8(const std::string& utf8, size_t& offset) { +static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) { assert(offset < utf8.size()); if (!(utf8[offset + 0] & 0x80)) { auto result = utf8[offset + 0]; @@ -290,7 +290,7 @@ static uint32_t codepoint_from_utf8(const std::string& utf8, size_t& offset) { throw std::invalid_argument("invalid string"); } -static std::vector codepoints_from_utf8(const std::string& utf8) { +static std::vector codepoints_from_utf8(const std::string & utf8) { std::vector result; size_t offset = 0; while (offset < utf8.size()) { @@ -314,7 +314,7 @@ static std::vector codepoint_to_utf16(uint32_t cp) { return result; } -static std::vector codepoints_to_utf16(const std::vector& cps) { +static std::vector codepoints_to_utf16(const std::vector & cps) { std::vector result; for (size_t i = 0; i < cps.size(); ++i) { auto temp = codepoint_to_utf16(cps[i]); @@ -323,7 +323,7 @@ static std::vector codepoints_to_utf16(const std::vector& cp return result; } -static uint32_t codepoint_from_utf16(const std::vector& utf16, size_t& offset) { +static uint32_t codepoint_from_utf16(const std::vector & utf16, size_t & offset) { assert(offset < utf16.size()); if (((utf16[0] >> 10) << 10) != 0xd800) { auto result = utf16[offset + 0]; @@ -340,7 +340,7 @@ static uint32_t codepoint_from_utf16(const std::vector& utf16, size_t& throw std::invalid_argument("invalid string"); } -static std::vector codepoints_from_utf16(const std::vector& utf16) { +static std::vector codepoints_from_utf16(const std::vector & utf16) { std::vector result; size_t offset = 0; while (offset < utf16.size()) @@ -395,7 +395,7 @@ static int codepoint_type(uint32_t cp) { return codepoint_types[cp]; } -static int codepoint_type(std::string utf8) { +static int codepoint_type(const std::string & utf8) { if (utf8.length() == 0) return CODEPOINT_TYPE_UNIDENTIFIED; size_t offset = 0;