Fix coding style

2023-10-02 13:01:46 +02:00 · 2023-10-02 13:01:46 +02:00 · 5aee498d97
commit 5aee498d97
parent 3d162cc8ad
3 changed files with 15 additions and 15 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -4590,7 +4590,7 @@ private:
        work_queue.push(bigram);
    }

-    std::vector<std::string> bpe_gpt2_preprocess(const std::string& text) {
+    std::vector<std::string> bpe_gpt2_preprocess(const std::string & text) {
        std::vector<std::string> bpe_words;
        std::vector<std::string> bpe_encoded_words;

@ -4612,13 +4612,13 @@ private:
            text_utf.emplace_back(codepoint_to_utf8(cps[i]));

        for (int i = 0; i < (int)text_utf.size(); i++) {
-            const std::string& utf_char = text_utf[i];
+            const std::string & utf_char = text_utf[i];
            bool split_condition = false;
            // const char* text_pos = raw_text_p + utf_char.seq_offset_bytes;
            int bytes_remain = text_utf.size() - i;
            // forward backward lookups
-            const std::string& utf_char_next = (i + 1 < (int)text_utf.size()) ? text_utf[i + 1] : "";
-            const std::string& utf_char_next_next = (i + 2 < (int)text_utf.size()) ? text_utf[i + 2] : "";
+            const std::string & utf_char_next = (i + 1 < (int)text_utf.size()) ? text_utf[i + 1] : "";
+            const std::string & utf_char_next_next = (i + 2 < (int)text_utf.size()) ? text_utf[i + 2] : "";

            // handling contractions
            if (!split_condition && bytes_remain >= 2) {
@ -4719,9 +4719,9 @@ private:
            }
        }

-        for (std::string& word : bpe_words) {
+        for (std::string & word : bpe_words) {
            std::string encoded_token = "";
-            for (char& c : word) {
+            for (char & c : word) {
                encoded_token += bytes_to_unicode_bpe(c);
            }
            bpe_encoded_words.emplace_back(encoded_token);
@ -7654,7 +7654,7 @@ int llama_tokenize(
    return res.size();
 }

-static std::string llama_decode_text(const std::string& text) {
+static std::string llama_decode_text(const std::string & text) {
    std::string decoded_text;
    auto unicode_sequences = codepoints_from_utf8(text);
    for (auto& unicode_sequence : unicode_sequences) {
--- a/tests/test-tokenizer-1-bpe.cpp
+++ b/tests/test-tokenizer-1-bpe.cpp
@ -73,7 +73,7 @@ int main(int argc, char **argv) {
                return 2;
            }
        }
-        catch (const std::invalid_argument&) {
+        catch (const std::invalid_argument &) {
            fprintf(stderr, "%s : info: utf8 conversion %d '%s'\n", __func__, i, str.c_str());
        }
    }
--- a/unicode.h
+++ b/unicode.h
@ -248,7 +248,7 @@ static std::string codepoint_to_utf8(uint32_t cp) {
    return result;
 }

-static std::string codepoints_to_utf8(const std::vector<uint32_t>& cps) {
+static std::string codepoints_to_utf8(const std::vector<uint32_t> & cps) {
    std::string result;
    for (size_t i = 0; i < cps.size(); ++i) {
        result.append(codepoint_to_utf8(cps[i]));
@ -256,7 +256,7 @@ static std::string codepoints_to_utf8(const std::vector<uint32_t>& cps) {
    return result;
 }

-static uint32_t codepoint_from_utf8(const std::string& utf8, size_t& offset) {
+static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
    assert(offset < utf8.size());
    if (!(utf8[offset + 0] & 0x80)) {
        auto result = utf8[offset + 0];
@ -290,7 +290,7 @@ static uint32_t codepoint_from_utf8(const std::string& utf8, size_t& offset) {
    throw std::invalid_argument("invalid string");
 }

-static std::vector<uint32_t> codepoints_from_utf8(const std::string& utf8) {
+static std::vector<uint32_t> codepoints_from_utf8(const std::string & utf8) {
    std::vector<uint32_t> result;
    size_t offset = 0;
    while (offset < utf8.size()) {
@ -314,7 +314,7 @@ static std::vector<uint16_t> codepoint_to_utf16(uint32_t cp) {
    return result;
 }

-static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t>& cps) {
+static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t> & cps) {
    std::vector<uint16_t> result;
    for (size_t i = 0; i < cps.size(); ++i) {
        auto temp = codepoint_to_utf16(cps[i]);
@ -323,7 +323,7 @@ static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t>& cp
    return result;
 }

-static uint32_t codepoint_from_utf16(const std::vector<uint16_t>& utf16, size_t& offset) {
+static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t & offset) {
    assert(offset < utf16.size());
    if (((utf16[0] >> 10) << 10) != 0xd800) {
        auto result = utf16[offset + 0];
@ -340,7 +340,7 @@ static uint32_t codepoint_from_utf16(const std::vector<uint16_t>& utf16, size_t&
    throw std::invalid_argument("invalid string");
 }

-static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t>& utf16) {
+static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
    std::vector<uint32_t> result;
    size_t offset = 0;
    while (offset < utf16.size())
@ -395,7 +395,7 @@ static int codepoint_type(uint32_t cp) {
    return codepoint_types[cp];
 }

-static int codepoint_type(std::string utf8) {
+static int codepoint_type(const std::string & utf8) {
    if (utf8.length() == 0)
        return CODEPOINT_TYPE_UNIDENTIFIED;
    size_t offset = 0;