Updated cmap-example

This commit is contained in:
pudepiedj 2023-10-10 13:33:12 +01:00
parent 759973be79
commit 37050ba90a
2 changed files with 78 additions and 277 deletions

View file

@ -1,31 +1,61 @@
// example of a C/C++ equivalent data structure to the python dict // example of a C/C++ equivalent data structure to the python dict
// there are two: std::map automatically sorts on key; std::unordered_map does not
#include <map> #include <map>
#include <list> #include <list>
#include <string> #include <string>
#include <bitset> #include <bitset>
#include <vector> #include <vector>
#include <cstdio>
#include <iostream> #include <cmath>
#include <fstream> #include <fstream>
#include <sstream> #include <sstream>
#include <regex> #include <regex>
#include <unordered_map> // there may be good reasons not to sort the parameters, but here we use map
#include <map>
#include <numeric> #include <numeric>
std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) { std::vector<std::string> split_string(const std::string& str, const std::string& delimiter) {
std::vector<std::string> tokens; std::vector<std::string> tokens;
std::size_t start = 0, end = 0; std::size_t start = 0, end = 0;
bool inside_tags = false; // flag to track if we are inside "<>"
while ((end = str.find(delimiter, start)) != std::string::npos) { while ((end = str.find(delimiter, start)) != std::string::npos) {
tokens.push_back(str.substr(start, end - start)); std::string token = str.substr(start, end - start);
// if (!token.empty()) { // Add condition to exclude empty substrings
// tokens.push_back(token);
if (!inside_tags && !token.empty()) { // Add condition to exclude empty substrings and if not inside "<>"
tokens.push_back(token);
}
// deal with cases where the split character occurs inside <>
// Update inside_tags flag based on "<>"
size_t open_tag_pos = str.find("<", start);
size_t close_tag_pos = str.find(">", start);
if (open_tag_pos != std::string::npos && close_tag_pos != std::string::npos && open_tag_pos < end) {
inside_tags = true;
} else if (close_tag_pos != std::string::npos && close_tag_pos < end) {
inside_tags = false;
}
start = end + delimiter.length(); start = end + delimiter.length();
} }
tokens.push_back(str.substr(start)); tokens.push_back(str.substr(start));
return tokens; return tokens;
} }
std::unordered_map<std::string, std::vector<std::string>> extractParameters() { void print_parameters(const std::map<std::string, std::vector<std::string>>& parameters) {
for (const auto& pair : parameters) {
const std::string& key = pair.first;
const std::vector<std::string>& value = pair.second; // usually has multiple elements
printf("key: %25s: values: ", key.c_str());
for (const std::string& element : value) {
printf("%s ", element.c_str());
}
printf("\n");
}
}
std::map<std::string, std::vector<std::string>> extract_parameters() {
std::ifstream file("common/common.h"); std::ifstream file("common/common.h");
std::string line; std::string line;
std::vector<std::string> lines; std::vector<std::string> lines;
@ -33,199 +63,62 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
lines.push_back(line); lines.push_back(line);
} }
std::unordered_map<std::string, std::vector<std::string>> parameters; std::map<std::string, std::vector<std::string>> parameters;
parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"}; // fix up failure to match logit_bias; may also need to add lora_adapter; now dealt with and ready for deletion
// parameters["logit_bias"] = {"std::unordered_map<llama_token, float>" "logit_bias", "=", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
// parameters["lora_adapter"] = {"std::vector<std::tuple<std::string, float>>", "lora_adapter", "=", "", "//", "lora", "adapter", "path", "with", "user-defined", "scale"};
// are we inside gpt_params?
// this for loop finds all the params inside struct gpt-params
bool inside = false; bool inside = false;
for (const std::string& line : lines) { for (const std::string& line : lines) {
std::vector<std::string> nonWhitespaceElements = splitString(line, " "); std::vector<std::string> nws_elements = split_string(line, " ");
printf("nwe = \033[33m"); printf("nwe = ");
for (const std::string& element : nonWhitespaceElements) { for (const std::string& element : nws_elements) {
printf("%s ", element); printf("%s ", element.c_str());
}
printf("\033[0m\n");
if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
inside = true;
}
if (nonWhitespaceElements.size() > 2 && inside) {
// Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
// Here we deliberately add back the key so we can manually change it when it is different
parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
// Remove spurious entry caused by eccentric status of logit_bias
if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
parameters.erase("float>");
}
}
// Terminate the harvest
if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
inside = false;
break;
}
}
for (const auto& pair : parameters) {
const std::string& key = pair.first;
const std::vector<std::string>& value = pair.second;
printf("key: %s; values: ", key);
for (const std::string& element : value) {
printf("%s ", element);
} }
printf("\n"); printf("\n");
std::string concatenatedElement = ""; if (!nws_elements.empty() && nws_elements[0] == "struct" && nws_elements[1] == "gpt_params") {
for (std::size_t i = 0; i < value.size(); i++) { inside = true;
if (value[i] == "//") { }
concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
// break; if (nws_elements.size() > 2 && inside) {
// cannot use nwe[0] as key because types do not generate unique keys and so overwrite
// Here we deliberately add back the key so we can manually change it when it is different (remove eventually)
// parameters[nws_elements[1]] = nws_elements;
std::vector<std::string> copy = nws_elements; // Create a copy of nws_elements
parameters[nws_elements[1]] = copy; // Assign the copy to parameters
// Remove spurious entry caused by eccentric status of logit_bias
if (parameters.count("float>") && parameters["float>"][2] == "logit_bias;") {
parameters.erase("float>");
}
// Remove spurious entry caused by eccentric status of lora_adapter
if (parameters.count("float>>") && parameters["float>>"][2] == "lora_adapter;") {
parameters.erase("float>>");
} }
} }
printf("parameter: \033[32m key: \033[34m%s default: \033[30m%s \033[34mcomment: \033[33m%s\033[0m\n", key, value[1], concatenatedElement); // Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now
} if (nws_elements.size() > 2 && nws_elements[1] == "infill") {
inside = false;
break;
}
}
// now display them (unnecessary operationally; here for development)
print_parameters(parameters);
// return the results (will eventually become a void function)
return parameters; return parameters;
} }
int main() { int main() {
std::map<std::string, int> dict;
std::map<std::string, std::list<std::string>> helpdict;
dict[std::string("apple")] = 5; // process the code inserted to replicate readcommonh.py
dict[std::string("banana")] = 2; // this does not produce output but here is forced; it just collects the output into parameters and returns 0
dict[std::string("orange")] = 7; std::map<std::string, std::vector<std::string>> parameters = extract_parameters();
print_parameters(parameters);
// Accessing elements in the map
printf("First kind of dictionary\n\nValue of apple: %d\n", dict[std::string("apple")]);
for (const auto& pair : dict) {
printf("Key: %10s, Value: %4d\n", pair.first.c_str(), pair.second);
}
// Now try the helpdict idea
printf("Second kind of dictionary\n");
// Create a list of strings
std::list<std::string> stringList = {"apple", "banana", "orange"};
// Add key-value pair to map
helpdict["fruits"] = stringList;
// Access and modify the list of strings
std::list<std::string>& fruitsList = helpdict["fruits"];
fruitsList.push_back("grape");
fruitsList.push_back("pineapple");
for (const auto& pair : helpdict) {
printf("helpdict contains a list of %s\n", pair.first.c_str());
for (const auto& element : pair.second) {
printf(" %s", element.c_str());
}
printf("\n");
}
// Create a binary key for each value consisting of a list of strings
std::map<std::string, std::list<std::string>> bitdict;
// Example binary key
int binaryKey1 = 0b0000001;
int binaryKey2 = 0b0000010;
int binaryKey3 = 0b0000100;
int binaryKey4 = 0b0001000;
int binaryKey5 = 0b0010000;
// Convert binary key to string
std::string keyString1 = std::bitset<8>(binaryKey1).to_string();
std::string keyString2 = std::bitset<8>(binaryKey2).to_string();
std::string keyString3 = std::bitset<8>(binaryKey3).to_string();
std::string keyString4 = std::bitset<8>(binaryKey4).to_string();
std::string keyString5 = std::bitset<8>(binaryKey5).to_string();
// Add key-value pair to map
bitdict[keyString1] = {"-h", "--help", "print this help list and exit"};
bitdict[keyString2] = {"-f", "FNAME", "--file", "FNAME", "read the prompts from an external text file"};
bitdict[keyString3] = {"-n", "N", "--n-predict", "N", "number of tokens to predict in generating a completion"};
bitdict[keyString4] = {"-t", "N", "--threads", "N", "number of threads to use"};
bitdict[keyString5] = {"-m", "MODELPATH", "--model", "MODELPATH", "path to llama model to use"};
for (const auto& pair : bitdict) {
printf("help dictionary contains a list of arguments specific to this app %s\n", pair.first.substr(pair.first.size() - 5).c_str());
for (const auto& element : pair.second) {
printf(" %5s", element.c_str());
}
printf("\n");
}
printf("\nThis is the complete help file in this mock-up illustrative example:\n\n");
for (const auto& pair: bitdict) {
printf("%s ",pair.first.c_str());
for (const auto& element : pair.second) {
printf(" %5s", element.c_str());
}
printf("\n");
}
// Now we try to use the appcode to select from the help available
// app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101
int app1code = 0b0001011;
int app2code = 0b0010111;
printf("\nNow processing app with only -h, -t and -f implemented and appcode %3d\n", app1code);
if (app1code != 0) {
for (const auto& kvp : bitdict) {
if ((app1code & std::stoi(kvp.first)) != 0) {
printf("%s ",kvp.first.c_str());
for (const auto& element : kvp.second) {
printf(" %5s", element.c_str());
}
printf("\n");
}
}
printf("\n");
}
printf("\nNow processing app with only -h, -f, -m and -n implemented and appcode %3d\n", app2code);
if (app2code != 0) {
for (const auto& kvp : bitdict) {
if ((app2code & std::stoi(kvp.first)) != 0) {
printf("%s ",kvp.first.c_str());
for (const auto& element : kvp.second) {
printf(" %5s", element.c_str());
}
printf("\n");
}
}
printf("\n");
}
// This is more like the general way to do it
std::vector<int> appcodes = {2, 5, 11, 17, 23, 31};
for (size_t i = 0; i < appcodes.size(); ++i) {
int x = appcodes[i];
if (x != 0) {
for (const auto& kvp : bitdict) {
if ((x & std::stoi(kvp.first)) != 0) {
printf("appcode %3d %s ", x, kvp.first.c_str());
for (const auto& element : kvp.second) {
printf(" %5s", element.c_str());
}
printf("\n");
}
}
printf("\n");
}
}
// now process the extra bit inserted to replicate readcommonh.py
std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
return 0; return 0;
} }

View file

@ -1,92 +0,0 @@
#include <iostream>
#include <fstream>
#include <sstream>
#include <regex>
#include <unordered_map>
#include <vector>
#include <numeric>
std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
std::vector<std::string> tokens;
std::size_t start = 0, end = 0;
while ((end = str.find(delimiter, start)) != std::string::npos) {
tokens.push_back(str.substr(start, end - start));
start = end + delimiter.length();
}
tokens.push_back(str.substr(start));
return tokens;
}
std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
std::ifstream file("common/common.h");
std::string line;
std::vector<std::string> lines;
while (std::getline(file, line)) {
lines.push_back(line);
}
std::unordered_map<std::string, std::vector<std::string>> parameters;
parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
bool inside = false;
for (const std::string& line : lines) {
std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
std::cout << "nwe = \033[33m";
for (const std::string& element : nonWhitespaceElements) {
std::cout << element << " ";
}
std::cout << "\033[0m" << std::endl;
if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
inside = true;
}
if (nonWhitespaceElements.size() > 2 && inside) {
// Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
// Here we deliberately add back the key so we can manually change it when it is different
parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
// Remove spurious entry caused by eccentric status of logit_bias
if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
parameters.erase("float>");
}
}
// Terminate the harvest
if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
inside = false;
break;
}
}
for (const auto& pair : parameters) {
const std::string& key = pair.first;
const std::vector<std::string>& value = pair.second;
std::cout << "key: " << std::left << std::setw(20) << key << "; values: ";
for (const std::string& element : value) {
std::cout << element << " ";
}
std::cout << std::endl;
std::string concatenatedElement = "";
for (std::size_t i = 0; i < value.size(); i++) {
if (value[i] == "//") {
concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
// break;
}
}
std::cout << std::string(10, ' ');
std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
<< std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
<< std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
}
return parameters;
}
// everything above is called from here as 'extractParameters()'
int main() {
std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
return 0;
}