Updated cmap-example
This commit is contained in:
parent
759973be79
commit
37050ba90a
2 changed files with 78 additions and 277 deletions
|
@ -1,31 +1,61 @@
|
|||
// example of a C/C++ equivalent data structure to the python dict
|
||||
// there are two: std::map automatically sorts on key; std::unordered_map does not
|
||||
|
||||
#include <map>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <bitset>
|
||||
#include <vector>
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
#include <cmath>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <regex>
|
||||
#include <unordered_map>
|
||||
// there may be good reasons not to sort the parameters, but here we use map
|
||||
#include <map>
|
||||
#include <numeric>
|
||||
|
||||
std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
|
||||
std::vector<std::string> split_string(const std::string& str, const std::string& delimiter) {
|
||||
std::vector<std::string> tokens;
|
||||
std::size_t start = 0, end = 0;
|
||||
bool inside_tags = false; // flag to track if we are inside "<>"
|
||||
|
||||
while ((end = str.find(delimiter, start)) != std::string::npos) {
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
std::string token = str.substr(start, end - start);
|
||||
|
||||
// if (!token.empty()) { // Add condition to exclude empty substrings
|
||||
// tokens.push_back(token);
|
||||
|
||||
if (!inside_tags && !token.empty()) { // Add condition to exclude empty substrings and if not inside "<>"
|
||||
tokens.push_back(token);
|
||||
}
|
||||
// deal with cases where the split character occurs inside <>
|
||||
// Update inside_tags flag based on "<>"
|
||||
size_t open_tag_pos = str.find("<", start);
|
||||
size_t close_tag_pos = str.find(">", start);
|
||||
if (open_tag_pos != std::string::npos && close_tag_pos != std::string::npos && open_tag_pos < end) {
|
||||
inside_tags = true;
|
||||
} else if (close_tag_pos != std::string::npos && close_tag_pos < end) {
|
||||
inside_tags = false;
|
||||
}
|
||||
start = end + delimiter.length();
|
||||
}
|
||||
tokens.push_back(str.substr(start));
|
||||
return tokens;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
|
||||
void print_parameters(const std::map<std::string, std::vector<std::string>>& parameters) {
|
||||
for (const auto& pair : parameters) {
|
||||
const std::string& key = pair.first;
|
||||
const std::vector<std::string>& value = pair.second; // usually has multiple elements
|
||||
printf("key: %25s: values: ", key.c_str());
|
||||
for (const std::string& element : value) {
|
||||
printf("%s ", element.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<std::string>> extract_parameters() {
|
||||
std::ifstream file("common/common.h");
|
||||
std::string line;
|
||||
std::vector<std::string> lines;
|
||||
|
@ -33,199 +63,62 @@ std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
|
|||
lines.push_back(line);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::vector<std::string>> parameters;
|
||||
parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
|
||||
std::map<std::string, std::vector<std::string>> parameters;
|
||||
// fix up failure to match logit_bias; may also need to add lora_adapter; now dealt with and ready for deletion
|
||||
// parameters["logit_bias"] = {"std::unordered_map<llama_token, float>" "logit_bias", "=", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
|
||||
// parameters["lora_adapter"] = {"std::vector<std::tuple<std::string, float>>", "lora_adapter", "=", "", "//", "lora", "adapter", "path", "with", "user-defined", "scale"};
|
||||
|
||||
// are we inside gpt_params?
|
||||
// this for loop finds all the params inside struct gpt-params
|
||||
bool inside = false;
|
||||
for (const std::string& line : lines) {
|
||||
std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
|
||||
printf("nwe = \033[33m");
|
||||
for (const std::string& element : nonWhitespaceElements) {
|
||||
printf("%s ", element);
|
||||
}
|
||||
printf("\033[0m\n");
|
||||
|
||||
if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
|
||||
inside = true;
|
||||
}
|
||||
|
||||
if (nonWhitespaceElements.size() > 2 && inside) {
|
||||
// Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
|
||||
// Here we deliberately add back the key so we can manually change it when it is different
|
||||
parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
|
||||
|
||||
// Remove spurious entry caused by eccentric status of logit_bias
|
||||
if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
|
||||
parameters.erase("float>");
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate the harvest
|
||||
if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
|
||||
inside = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& pair : parameters) {
|
||||
const std::string& key = pair.first;
|
||||
const std::vector<std::string>& value = pair.second;
|
||||
printf("key: %s; values: ", key);
|
||||
for (const std::string& element : value) {
|
||||
printf("%s ", element);
|
||||
std::vector<std::string> nws_elements = split_string(line, " ");
|
||||
printf("nwe = ");
|
||||
for (const std::string& element : nws_elements) {
|
||||
printf("%s ", element.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
std::string concatenatedElement = "";
|
||||
for (std::size_t i = 0; i < value.size(); i++) {
|
||||
if (value[i] == "//") {
|
||||
concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
|
||||
// break;
|
||||
if (!nws_elements.empty() && nws_elements[0] == "struct" && nws_elements[1] == "gpt_params") {
|
||||
inside = true;
|
||||
}
|
||||
|
||||
if (nws_elements.size() > 2 && inside) {
|
||||
// cannot use nwe[0] as key because types do not generate unique keys and so overwrite
|
||||
// Here we deliberately add back the key so we can manually change it when it is different (remove eventually)
|
||||
// parameters[nws_elements[1]] = nws_elements;
|
||||
std::vector<std::string> copy = nws_elements; // Create a copy of nws_elements
|
||||
parameters[nws_elements[1]] = copy; // Assign the copy to parameters
|
||||
|
||||
// Remove spurious entry caused by eccentric status of logit_bias
|
||||
if (parameters.count("float>") && parameters["float>"][2] == "logit_bias;") {
|
||||
parameters.erase("float>");
|
||||
}
|
||||
// Remove spurious entry caused by eccentric status of lora_adapter
|
||||
if (parameters.count("float>>") && parameters["float>>"][2] == "lora_adapter;") {
|
||||
parameters.erase("float>>");
|
||||
}
|
||||
}
|
||||
|
||||
printf("parameter: \033[32m key: \033[34m%s default: \033[30m%s \033[34mcomment: \033[33m%s\033[0m\n", key, value[1], concatenatedElement);
|
||||
}
|
||||
// Terminate the harvest; TODO: not robust; need better terminator; this just a crude hack for now
|
||||
if (nws_elements.size() > 2 && nws_elements[1] == "infill") {
|
||||
inside = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// now display them (unnecessary operationally; here for development)
|
||||
print_parameters(parameters);
|
||||
|
||||
// return the results (will eventually become a void function)
|
||||
return parameters;
|
||||
}
|
||||
|
||||
int main() {
|
||||
std::map<std::string, int> dict;
|
||||
std::map<std::string, std::list<std::string>> helpdict;
|
||||
|
||||
dict[std::string("apple")] = 5;
|
||||
dict[std::string("banana")] = 2;
|
||||
dict[std::string("orange")] = 7;
|
||||
|
||||
|
||||
// Accessing elements in the map
|
||||
printf("First kind of dictionary\n\nValue of apple: %d\n", dict[std::string("apple")]);
|
||||
|
||||
for (const auto& pair : dict) {
|
||||
printf("Key: %10s, Value: %4d\n", pair.first.c_str(), pair.second);
|
||||
}
|
||||
|
||||
// Now try the helpdict idea
|
||||
|
||||
printf("Second kind of dictionary\n");
|
||||
|
||||
// Create a list of strings
|
||||
std::list<std::string> stringList = {"apple", "banana", "orange"};
|
||||
|
||||
// Add key-value pair to map
|
||||
helpdict["fruits"] = stringList;
|
||||
|
||||
// Access and modify the list of strings
|
||||
std::list<std::string>& fruitsList = helpdict["fruits"];
|
||||
fruitsList.push_back("grape");
|
||||
fruitsList.push_back("pineapple");
|
||||
|
||||
for (const auto& pair : helpdict) {
|
||||
printf("helpdict contains a list of %s\n", pair.first.c_str());
|
||||
for (const auto& element : pair.second) {
|
||||
printf(" %s", element.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Create a binary key for each value consisting of a list of strings
|
||||
|
||||
std::map<std::string, std::list<std::string>> bitdict;
|
||||
|
||||
// Example binary key
|
||||
int binaryKey1 = 0b0000001;
|
||||
int binaryKey2 = 0b0000010;
|
||||
int binaryKey3 = 0b0000100;
|
||||
int binaryKey4 = 0b0001000;
|
||||
int binaryKey5 = 0b0010000;
|
||||
|
||||
// Convert binary key to string
|
||||
std::string keyString1 = std::bitset<8>(binaryKey1).to_string();
|
||||
std::string keyString2 = std::bitset<8>(binaryKey2).to_string();
|
||||
std::string keyString3 = std::bitset<8>(binaryKey3).to_string();
|
||||
std::string keyString4 = std::bitset<8>(binaryKey4).to_string();
|
||||
std::string keyString5 = std::bitset<8>(binaryKey5).to_string();
|
||||
|
||||
// Add key-value pair to map
|
||||
bitdict[keyString1] = {"-h", "--help", "print this help list and exit"};
|
||||
bitdict[keyString2] = {"-f", "FNAME", "--file", "FNAME", "read the prompts from an external text file"};
|
||||
bitdict[keyString3] = {"-n", "N", "--n-predict", "N", "number of tokens to predict in generating a completion"};
|
||||
bitdict[keyString4] = {"-t", "N", "--threads", "N", "number of threads to use"};
|
||||
bitdict[keyString5] = {"-m", "MODELPATH", "--model", "MODELPATH", "path to llama model to use"};
|
||||
|
||||
|
||||
for (const auto& pair : bitdict) {
|
||||
printf("help dictionary contains a list of arguments specific to this app %s\n", pair.first.substr(pair.first.size() - 5).c_str());
|
||||
for (const auto& element : pair.second) {
|
||||
printf(" %5s", element.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("\nThis is the complete help file in this mock-up illustrative example:\n\n");
|
||||
for (const auto& pair: bitdict) {
|
||||
printf("%s ",pair.first.c_str());
|
||||
for (const auto& element : pair.second) {
|
||||
printf(" %5s", element.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Now we try to use the appcode to select from the help available
|
||||
// app1 has only -h and -f so 0b00011; app2 has only -h and -n so 0b00101
|
||||
|
||||
int app1code = 0b0001011;
|
||||
int app2code = 0b0010111;
|
||||
|
||||
printf("\nNow processing app with only -h, -t and -f implemented and appcode %3d\n", app1code);
|
||||
if (app1code != 0) {
|
||||
for (const auto& kvp : bitdict) {
|
||||
if ((app1code & std::stoi(kvp.first)) != 0) {
|
||||
printf("%s ",kvp.first.c_str());
|
||||
for (const auto& element : kvp.second) {
|
||||
printf(" %5s", element.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("\nNow processing app with only -h, -f, -m and -n implemented and appcode %3d\n", app2code);
|
||||
if (app2code != 0) {
|
||||
for (const auto& kvp : bitdict) {
|
||||
if ((app2code & std::stoi(kvp.first)) != 0) {
|
||||
printf("%s ",kvp.first.c_str());
|
||||
for (const auto& element : kvp.second) {
|
||||
printf(" %5s", element.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// This is more like the general way to do it
|
||||
std::vector<int> appcodes = {2, 5, 11, 17, 23, 31};
|
||||
for (size_t i = 0; i < appcodes.size(); ++i) {
|
||||
int x = appcodes[i];
|
||||
if (x != 0) {
|
||||
for (const auto& kvp : bitdict) {
|
||||
if ((x & std::stoi(kvp.first)) != 0) {
|
||||
printf("appcode %3d %s ", x, kvp.first.c_str());
|
||||
for (const auto& element : kvp.second) {
|
||||
printf(" %5s", element.c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
// now process the extra bit inserted to replicate readcommonh.py
|
||||
|
||||
std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
|
||||
// process the code inserted to replicate readcommonh.py
|
||||
// this does not produce output but here is forced; it just collects the output into parameters and returns 0
|
||||
std::map<std::string, std::vector<std::string>> parameters = extract_parameters();
|
||||
print_parameters(parameters);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,92 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <regex>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
|
||||
std::vector<std::string> splitString(const std::string& str, const std::string& delimiter) {
|
||||
std::vector<std::string> tokens;
|
||||
std::size_t start = 0, end = 0;
|
||||
while ((end = str.find(delimiter, start)) != std::string::npos) {
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
start = end + delimiter.length();
|
||||
}
|
||||
tokens.push_back(str.substr(start));
|
||||
return tokens;
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::vector<std::string>> extractParameters() {
|
||||
std::ifstream file("common/common.h");
|
||||
std::string line;
|
||||
std::vector<std::string> lines;
|
||||
while (std::getline(file, line)) {
|
||||
lines.push_back(line);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, std::vector<std::string>> parameters;
|
||||
parameters["logit_bias"] = {"logit_bias", "0", "//", "way", "to", "alter", "prob", "of", "word", "being", "chosen"};
|
||||
|
||||
bool inside = false;
|
||||
for (const std::string& line : lines) {
|
||||
std::vector<std::string> nonWhitespaceElements = splitString(line, " ");
|
||||
std::cout << "nwe = \033[33m";
|
||||
for (const std::string& element : nonWhitespaceElements) {
|
||||
std::cout << element << " ";
|
||||
}
|
||||
std::cout << "\033[0m" << std::endl;
|
||||
|
||||
if (!nonWhitespaceElements.empty() && nonWhitespaceElements[0] == "struct") {
|
||||
inside = true;
|
||||
}
|
||||
|
||||
if (nonWhitespaceElements.size() > 2 && inside) {
|
||||
// Note: cannot use nwe[0] because types do not generate unique keys and so overwrite
|
||||
// Here we deliberately add back the key so we can manually change it when it is different
|
||||
parameters[nonWhitespaceElements[1]] = nonWhitespaceElements;
|
||||
|
||||
// Remove spurious entry caused by eccentric status of logit_bias
|
||||
if (parameters.count("float>") && parameters["float>"][1] == "logit_bias") {
|
||||
parameters.erase("float>");
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate the harvest
|
||||
if (nonWhitespaceElements.size() > 2 && nonWhitespaceElements[1] == "infill") {
|
||||
inside = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& pair : parameters) {
|
||||
const std::string& key = pair.first;
|
||||
const std::vector<std::string>& value = pair.second;
|
||||
std::cout << "key: " << std::left << std::setw(20) << key << "; values: ";
|
||||
for (const std::string& element : value) {
|
||||
std::cout << element << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::string concatenatedElement = "";
|
||||
for (std::size_t i = 0; i < value.size(); i++) {
|
||||
if (value[i] == "//") {
|
||||
concatenatedElement = std::accumulate(value.begin() + i, value.end(), std::string(""));
|
||||
// break;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << std::string(10, ' ');
|
||||
std::cout << "parameter: \033[32m" << std::right << std::setw(40) << key << " \033[34mdefault: \033[30m"
|
||||
<< std::right << std::setw(5) << value[1] << " \033[34mcomment: \033[33m"
|
||||
<< std::left << std::setw(80) << concatenatedElement << "\033[0m" << std::endl;
|
||||
}
|
||||
|
||||
return parameters;
|
||||
}
|
||||
|
||||
// everything above is called from here as 'extractParameters()'
|
||||
int main() {
|
||||
std::unordered_map<std::string, std::vector<std::string>> parameters = extractParameters();
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue