From 739d6d30227b15114e40881ac29eda04521e2a2c Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Fri, 6 Oct 2023 09:52:33 +0100 Subject: [PATCH] Automatic helper dev --- common/common.h | 1 + examples/cmap-example/cmap-example.cpp | 2 +- .../cmap-example/find_implemented_args.py | 55 ++++++--- help_list.txt | 104 ++++++++++++++++++ 4 files changed, 147 insertions(+), 15 deletions(-) create mode 100644 help_list.txt diff --git a/common/common.h b/common/common.h index e095c56e3..30260486f 100644 --- a/common/common.h +++ b/common/common.h @@ -79,6 +79,7 @@ struct gpt_params { std::string model_draft = ""; // draft model for speculative decoding std::string model_alias = "unknown"; // model alias std::string prompt = ""; + std::string prompt_file = ""; // store the external prompt file std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state std::string input_prefix = ""; // string to prefix user inputs with std::string input_suffix = ""; // string to suffix user inputs with diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp index b2a7b2735..ece30702a 100644 --- a/examples/cmap-example/cmap-example.cpp +++ b/examples/cmap-example/cmap-example.cpp @@ -130,7 +130,7 @@ int main() { if (x != 0) { for (const auto& kvp : bitdict) { if ((x & std::stoi(kvp.first)) != 0) { - printf("Appcode %3d %s ", x, kvp.first.c_str()); + printf("appcode %3d %s ", x, kvp.first.c_str()); for (const auto& element : kvp.second) { printf(" %5s", element.c_str()); } diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py index 8d745c822..e46689436 100644 --- a/examples/cmap-example/find_implemented_args.py +++ b/examples/cmap-example/find_implemented_args.py @@ -1,22 +1,27 @@ +# search the specified directory for files that include argv[i] == '-f' or '--file' arguments + import os import re def find_arguments(directory): arguments = {} - # Get a list of all .cpp files in the specified directory - cpp_files = [filename for filename in os.listdir(directory) if filename.endswith('.cpp')] + # Use os.walk() to traverse through files in directory and subdirectories + for root, dirs, files in os.walk(directory): + for file in files: + if file.endswith('.cpp'): + filepath = os.path.join(root, file) + with open(filepath, 'r') as file: + content = file.read() - # Read each .cpp file and search for the specified expressions - for filename in cpp_files: - with open(os.path.join(directory, filename), 'r') as file: - content = file.read() + # Search for the expression "params." and read the attribute without trailing detritus + matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content) - # Search for the expressions using regular expressions - matches = re.findall(r'argv\s*\[\s*i\s*\]\s*==\s*([\'"])(?P-[a-zA-Z]+|\-\-[a-zA-Z]+[a-zA-Z0-9-]*)\1', content) + # Remove duplicates from matches list + arguments_list = list(set([match.strip() for match in matches])) - # Add the found arguments to the dictionary - arguments[filename] = [match[1] for match in matches] + # Add the matches to the dictionary + arguments[filepath] = arguments_list return arguments @@ -24,7 +29,29 @@ def find_arguments(directory): # Specify the directory you want to search for cpp files directory = '/Users/edsilm2/llama.cpp/examples' -# Call the function and print the result -result = find_arguments(directory) -for filename, arguments in result.items(): - print(filename, arguments) \ No newline at end of file +if __name__ == '__main__': + # Call the find function and print the result + result = find_arguments(directory) + all_of_them = set() + for filename, arguments in result.items(): + print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n") + for argument in arguments: + if argument not in all_of_them: + all_of_them.add("".join(argument)) + print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.") + + with open("help_list.txt", "r") as helpfile: + lines = helpfile.read().split("\n") + for filename, arguments in result.items(): + parameters = [] + for line in lines: + for argument in arguments: + if argument in line: + parameters.append(line) + all_parameters = set(parameters) + print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n command-line arguments available and gpt-params functions implemented:\n") + if not all_parameters: + print(f" \033[032mNone\033[0m\n") + else: + for parameter in all_parameters: + print(f" help: \033[33m{parameter:<30}\033[0m") \ No newline at end of file diff --git a/help_list.txt b/help_list.txt new file mode 100644 index 000000000..08dcf67ec --- /dev/null +++ b/help_list.txt @@ -0,0 +1,104 @@ +-h, --helpshow this help message and exit +-i, --interactive run in interactive mode +--interactive-first run in interactive mode and wait for input right away +-ins, --instructrun in instruction mode (use with Alpaca models) +--multiline-input allows you to write or paste multiple lines without ending each in '\\' +-r PROMPT, --reverse-prompt PROMPT + halt generation at PROMPT, return control in interactive mode + (can be specified more than once for multiple prompts). +--color colorise output to distinguish prompt and user input from generations +-s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0) +-t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads); +-tb N, --threads-batch N + number of threads to use during batch and prompt processing (default: same as --threads) +-p PROMPT, --prompt PROMPT + prompt to start generation with (default: empty) +-e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) +--prompt-cache FNAME file to cache prompt state for faster startup (default: none) +--prompt-cache-all if specified, saves user input and generations to cache as well. + not supported with --interactive or other interactive options +--prompt-cache-ro if specified, uses the prompt cache but does not update it. +--random-prompt start with a randomized prompt. +--in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string +--in-prefix STRING string to prefix user inputs with (default: empty) +--in-suffix STRING string to suffix after user inputs with (default: empty) +-f FNAME, --file FNAME + prompt file to start generation. +-n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); +-c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); +-b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); +--top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); +--top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); +--tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); +--typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); +--repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); +--repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); +--presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); +--frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); +--mirostat N use Mirostat sampling. + Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used. + (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); +--mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); +--mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); +-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS + modifies the likelihood of token appearing in the completion, + i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello', + or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' +--grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir) +--grammar-file FNAME file to read grammar from +--cfg-negative-prompt PROMPT + negative prompt to use for guidance. (default: empty) +--cfg-negative-prompt-file FNAME + negative prompt file to use for guidance. (default: empty) +--cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); +--rope-scale N RoPE context linear scaling factor, inverse of --rope-freq-scale +--rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model) +--rope-freq-scale N RoPE frequency linear scaling factor (default: loaded from model) +--ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf) +--no-penalize-nldo not penalize newline token +--memory-f32 use f32 instead of f16 for memory key+value (default: disabled) + not recommended: doubles context memory required and no measurable increase in quality +--temp N temperature (default: %.1f)\n", (double)params.temp); +--logits-all return logits for all tokens in the batch (default: disabled) +--hellaswag compute HellaSwag score over random tasks from datafile supplied with -f +--hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); +--keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); +--draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft); +--chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); +-np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel); +-ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences); +-cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled) + if (llama_mlock_supported()) { + --mlock force system to keep model in RAM rather than swapping or compressing + } + if (llama_mmap_supported()) { + --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock) + } +--numa attempt optimizations that help on some NUMA systems + if run without this previously, it is recommended to drop the system page cache before using this + see https://github.com/ggerganov/llama.cpp/issues/1437 +#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD +-ngl N, --n-gpu-layers N + number of layers to store in VRAM +-ngld N, --n-gpu-layers-draft N + number of layers to store in VRAM for the draft model +-ts SPLIT --tensor-split SPLIT + how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1 +-mg i, --main-gpu i the GPU to use for scratch and small tensors +#ifdef GGML_USE_CUBLAS +-nommq, --no-mul-mat-q + use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels. + Not recommended since this is both slower and uses more VRAM. +#endif // GGML_USE_CUBLAS +#endif +--verbose-promptprint prompt before generation + fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles +--lora FNAME apply LoRA adapter (implies --no-mmap) +--lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap) +--lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter +-m FNAME, --model FNAME + model path (default: %s)\n", params.model.c_str()); +-md FNAME, --model-draft FNAME + draft model for speculative decoding (default: %s)\n", params.model.c_str()); +-ld LOGDIR, --logdir LOGDIR + path under which to save YAML logs (no logging if unset) \ No newline at end of file