From 37050ba90a9fe47bbfa593278eeba3fa0303cc90 Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Tue, 10 Oct 2023 13:33:12 +0100 Subject: [PATCH] Updated cmap-example --- examples/cmap-example/cmap-example.cpp | 263 ++++++++----------------- examples/cmap-example/creadcommonh.cpp | 92 --------- 2 files changed, 78 insertions(+), 277 deletions(-) delete mode 100644 examples/cmap-example/creadcommonh.cpp diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index cfac88860..1c39bd2bd 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -1,31 +1,61 @@ // example of a C/C++ equivalent data structure to the python dict -// there are two: std::map automatically sorts on key; std::unordered_map does not #include #include #include #include #include - -#include +#include +#include #include #include #include -#include +// there may be good reasons not to sort the parameters, but here we use map +#include #include -std::vector splitString(const std::string& str, const std::string& delimiter) { +std::vector split_string(const std::string& str, const std::string& delimiter) { std::vector tokens; std::size_t start = 0, end = 0; + bool inside_tags = false; // flag to track if we are inside "<>" + while ((end = str.find(delimiter, start)) != std::string::npos) { - tokens.push_back(str.substr(start, end - start)); + std::string token = str.substr(start, end - start); + + // if (!token.empty()) { // Add condition to exclude empty substrings + // tokens.push_back(token); + + if (!inside_tags && !token.empty()) { // Add condition to exclude empty substrings and if not inside "<>" + tokens.push_back(token); + } + // deal with cases where the split character occurs inside <> + // Update inside_tags flag based on "<>" + size_t open_tag_pos = str.find("<", start); + size_t close_tag_pos = str.find(">", start); + if (open_tag_pos != std::string::npos && close_tag_pos != std::string::npos && open_tag_pos < end) { + inside_tags = true; + } else if (close_tag_pos != std::string::npos && close_tag_pos < end) { + inside_tags = false; + } start = end + delimiter.length(); } tokens.push_back(str.substr(start)); return tokens; } -std::unordered_map> extractParameters() { +void print_parameters(const std::map>& parameters) { + for (const auto& pair : parameters) { + const std::string& key = pair.first; + const std::vector& value = pair.second; // usually has multiple elements + printf("key: %25s: values: ", key.c_str()); + for (const std::string& element : value) { + printf("%s ", element.c_str()); + } + printf("\n"); + } +} + +std::map> extract_parameters() { std::ifstream file("common/common.h"); std::string line; std::vector lines; @@ -33,199 +63,62 @@ std::unordered_map> extractParameters() { lines.push_back(line); } - std::unordered_map> parameters; - parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; + std::map> parameters; + // fix up failure to match logit_bias; may also need to add lora_adapter; now dealt with and ready for deletion + // parameters["logit_bias"] = {"std::unordered_map" "logit_bias", "=", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; + // parameters["lora_adapter"] = {"std::vector>", "lora_adapter", "=", "", "//", "lora", "adapter", "path", "with", "user-defined", "scale"}; + // are we inside gpt_params? + // this for loop finds all the params inside struct gpt-params bool inside = false; for (const std::string& line : lines) { - std::vector nonWhitespaceElements = splitString(line, " "); - printf("nwe = \033[33m"); - for (const std::string& element : nonWhitespaceElements) { - printf("%s ", element); - } - printf("\033[0m\n"); - - if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { - inside = true; - } - - if (nonWhitespaceElements.size() > 2 && inside) { - // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite - // Here we deliberately add back the key so we can manually change it when it is different - parameters[nonWhitespaceElements[1]] = nonWhitespaceElements; - - // Remove spurious entry caused by eccentric status of logit_bias - if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") { - parameters.erase("float>"); - } - } - - // Terminate the harvest - if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") { - inside = false; - break; - } - } - - for (const auto& pair : parameters) { - const std::string& key = pair.first; - const std::vector& value = pair.second; - printf("key: %s; values: ", key); - for (const std::string& element : value) { - printf("%s ", element); + std::vector nws_elements = split_string(line, " "); + printf("nwe = "); + for (const std::string& element : nws_elements) { + printf("%s ", element.c_str()); } printf("\n"); - std::string concatenatedElement = ""; - for (std::size_t i = 0; i < value.size(); i++) { - if (value[i] == "//") { - concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string("")); - // break; + if (!nws_elements.empty() && nws_elements[0] == "struct" && nws_elements[1] == "gpt_params") { + inside = true; + } + + if (nws_elements.size() > 2 && inside) { + // cannot use nwe[0] as key because types do not generate unique keys and so overwrite + // Here we deliberately add back the key so we can manually change it when it is different (remove eventually) + // parameters[nws_elements[1]] = nws_elements; + std::vector copy = nws_elements; // Create a copy of nws_elements + parameters[nws_elements[1]] = copy; // Assign the copy to parameters + + // Remove spurious entry caused by eccentric status of logit_bias + if (parameters.count("float>") && parameters["float>"][2] == "logit_bias;") { + parameters.erase("float>"); + } + // Remove spurious entry caused by eccentric status of lora_adapter + if (parameters.count("float>>") && parameters["float>>"][2] == "lora_adapter;") { + parameters.erase("float>>"); } } - printf("parameter: \033[32m key: \033[34m%s default: \033[30m%s \033[34mcomment: \033[33m%s\033[0m\n", key, value[1], concatenatedElement); - } + // Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now + if (nws_elements.size() > 2 && nws_elements[1] == "infill") { + inside = false; + break; + } + } + // now display them (unnecessary operationally; here for development) + print_parameters(parameters); + // return the results (will eventually become a void function) return parameters; } int main() { - std::map dict; - std::map> helpdict; - dict[std::string("apple")] = 5; - dict[std::string("banana")] = 2; - dict[std::string("orange")] = 7; - - - // Accessing elements in the map - printf("First kind of dictionary\n\nValue of apple: %d\n", dict[std::string("apple")]); - - for (const auto& pair : dict) { - printf("Key: %10s, Value: %4d\n", pair.first.c_str(), pair.second); - } - - // Now try the helpdict idea - - printf("Second kind of dictionary\n"); - - // Create a list of strings - std::list stringList = {"apple", "banana", "orange"}; - - // Add key-value pair to map - helpdict["fruits"] = stringList; - - // Access and modify the list of strings - std::list& fruitsList = helpdict["fruits"]; - fruitsList.push_back("grape"); - fruitsList.push_back("pineapple"); - - for (const auto& pair : helpdict) { - printf("helpdict contains a list of %s\n", pair.first.c_str()); - for (const auto& element : pair.second) { - printf(" %s", element.c_str()); - } - printf("\n"); - } - - // Create a binary key for each value consisting of a list of strings - - std::map> bitdict; - - // Example binary key - int binaryKey1 = 0b0000001; - int binaryKey2 = 0b0000010; - int binaryKey3 = 0b0000100; - int binaryKey4 = 0b0001000; - int binaryKey5 = 0b0010000; - - // Convert binary key to string - std::string keyString1 = std::bitset<8>(binaryKey1).to_string(); - std::string keyString2 = std::bitset<8>(binaryKey2).to_string(); - std::string keyString3 = std::bitset<8>(binaryKey3).to_string(); - std::string keyString4 = std::bitset<8>(binaryKey4).to_string(); - std::string keyString5 = std::bitset<8>(binaryKey5).to_string(); - - // Add key-value pair to map - bitdict[keyString1] = {"-h", "--help", "print this help list and exit"}; - bitdict[keyString2] = {"-f", "FNAME", "--file", "FNAME", "read the prompts from an external text file"}; - bitdict[keyString3] = {"-n", "N", "--n-predict", "N", "number of tokens to predict in generating a completion"}; - bitdict[keyString4] = {"-t", "N", "--threads", "N", "number of threads to use"}; - bitdict[keyString5] = {"-m", "MODELPATH", "--model", "MODELPATH", "path to llama model to use"}; - - - for (const auto& pair : bitdict) { - printf("help dictionary contains a list of arguments specific to this app %s\n", pair.first.substr(pair.first.size() - 5).c_str()); - for (const auto& element : pair.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - - printf("\nThis is the complete help file in this mock-up illustrative example:\n\n"); - for (const auto& pair: bitdict) { - printf("%s ",pair.first.c_str()); - for (const auto& element : pair.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - - // Now we try to use the appcode to select from the help available - // app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101 - - int app1code = 0b0001011; - int app2code = 0b0010111; - - printf("\nNow processing app with only -h, -t and -f implemented and appcode %3d\n", app1code); - if (app1code != 0) { - for (const auto& kvp : bitdict) { - if ((app1code & std::stoi(kvp.first)) != 0) { - printf("%s ",kvp.first.c_str()); - for (const auto& element : kvp.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - } - printf("\n"); - } - - printf("\nNow processing app with only -h, -f, -m and -n implemented and appcode %3d\n", app2code); - if (app2code != 0) { - for (const auto& kvp : bitdict) { - if ((app2code & std::stoi(kvp.first)) != 0) { - printf("%s ",kvp.first.c_str()); - for (const auto& element : kvp.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - } - printf("\n"); - } - - // This is more like the general way to do it - std::vector appcodes = {2, 5, 11, 17, 23, 31}; - for (size_t i = 0; i < appcodes.size(); ++i) { - int x = appcodes[i]; - if (x != 0) { - for (const auto& kvp : bitdict) { - if ((x & std::stoi(kvp.first)) != 0) { - printf("appcode %3d %s ", x, kvp.first.c_str()); - for (const auto& element : kvp.second) { - printf(" %5s", element.c_str()); - } - printf("\n"); - } - } - printf("\n"); - } - } - // now process the extra bit inserted to replicate readcommonh.py - - std::unordered_map> parameters = extractParameters(); + // process the code inserted to replicate readcommonh.py + // this does not produce output but here is forced; it just collects the output into parameters and returns 0 + std::map> parameters = extract_parameters(); + print_parameters(parameters); return 0; } diff --git a/examples/cmap-example/creadcommonh.cpp b/examples/cmap-example/creadcommonh.cpp deleted file mode 100644 index 59e1e94ab..000000000 --- a/examples/cmap-example/creadcommonh.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -std::vector splitString(const std::string& str, const std::string& delimiter) { - std::vector tokens; - std::size_t start = 0, end = 0; - while ((end = str.find(delimiter, start)) != std::string::npos) { - tokens.push_back(str.substr(start, end - start)); - start = end + delimiter.length(); - } - tokens.push_back(str.substr(start)); - return tokens; -} - -std::unordered_map> extractParameters() { - std::ifstream file("common/common.h"); - std::string line; - std::vector lines; - while (std::getline(file, line)) { - lines.push_back(line); - } - - std::unordered_map> parameters; - parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; - - bool inside = false; - for (const std::string& line : lines) { - std::vector nonWhitespaceElements = splitString(line, " "); - std::cout << "nwe = \033[33m"; - for (const std::string& element : nonWhitespaceElements) { - std::cout << element << " "; - } - std::cout << "\033[0m" << std::endl; - - if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") { - inside = true; - } - - if (nonWhitespaceElements.size() > 2 && inside) { - // Note: cannot use nwe[0] because types do not generate unique keys and so overwrite - // Here we deliberately add back the key so we can manually change it when it is different - parameters[nonWhitespaceElements[1]] = nonWhitespaceElements; - - // Remove spurious entry caused by eccentric status of logit_bias - if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") { - parameters.erase("float>"); - } - } - - // Terminate the harvest - if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") { - inside = false; - break; - } - } - - for (const auto& pair : parameters) { - const std::string& key = pair.first; - const std::vector& value = pair.second; - std::cout << "key: " << std::left << std::setw(20) << key << "; values: "; - for (const std::string& element : value) { - std::cout << element << " "; - } - std::cout << std::endl; - - std::string concatenatedElement = ""; - for (std::size_t i = 0; i < value.size(); i++) { - if (value[i] == "//") { - concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string("")); - // break; - } - } - - std::cout << std::string(10, ' '); - std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m" - << std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m" - << std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl; - } - - return parameters; -} - -// everything above is called from here as 'extractParameters()' -int main() { - std::unordered_map> parameters = extractParameters(); - return 0; -}