From 739d6d30227b15114e40881ac29eda04521e2a2c Mon Sep 17 00:00:00 2001
From: pudepiedj <pudepiedj@gmail.com>
Date: Fri, 6 Oct 2023 09:52:33 +0100
Subject: [PATCH] Automatic helper dev

---
 common/common.h                               |   1 +
 examples/cmap-example/cmap-example.cpp        |   2 +-
 .../cmap-example/find_implemented_args.py     |  55 ++++++---
 help_list.txt                                 | 104 ++++++++++++++++++
 4 files changed, 147 insertions(+), 15 deletions(-)
 create mode 100644 help_list.txt
diff --git a/common/common.h b/common/common.h
index e095c56e3..30260486f 100644
--- a/common/common.h
+++ b/common/common.h
@@ -79,6 +79,7 @@ struct gpt_params {
     std::string model_draft       = "";                              // draft model for speculative decoding
     std::string model_alias       = "unknown"; // model alias
     std::string prompt            = "";
+    std::string prompt_file       = "";  // store the external prompt file
     std::string path_prompt_cache = "";  // path to file for saving/loading prompt eval state
     std::string input_prefix      = "";  // string to prefix user inputs with
     std::string input_suffix      = "";  // string to suffix user inputs with
diff --git a/examples/cmap-example/cmap-example.cpp b/examples/cmap-example/cmap-example.cpp
index b2a7b2735..ece30702a 100644
--- a/examples/cmap-example/cmap-example.cpp
+++ b/examples/cmap-example/cmap-example.cpp
@@ -130,7 +130,7 @@ int main() {
         if (x != 0) {
         for (const auto& kvp : bitdict) {
             if ((x & std::stoi(kvp.first)) != 0) {
-                printf("Appcode %3d %s  ", x, kvp.first.c_str());
+                printf("appcode %3d %s  ", x, kvp.first.c_str());
                 for (const auto& element : kvp.second) {
                     printf(" %5s", element.c_str());
                 }
diff --git a/examples/cmap-example/find_implemented_args.py b/examples/cmap-example/find_implemented_args.py
index 8d745c822..e46689436 100644
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@@ -1,22 +1,27 @@
+# search the specified directory for files that include argv[i] == '-f' or '--file' arguments
+
 import os
 import re
 
 def find_arguments(directory):
     arguments = {}
 
-    # Get a list of all .cpp files in the specified directory
-    cpp_files = [filename for filename in os.listdir(directory) if filename.endswith('.cpp')]
+    # Use os.walk() to traverse through files in directory and subdirectories
+    for root, dirs, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.cpp'):
+                filepath = os.path.join(root, file)
+                with open(filepath, 'r') as file:
+                    content = file.read()
 
-    # Read each .cpp file and search for the specified expressions
-    for filename in cpp_files:
-        with open(os.path.join(directory, filename), 'r') as file:
-            content = file.read()
+                    # Search for the expression "params." and read the attribute without trailing detritus
+                    matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content)
 
-            # Search for the expressions using regular expressions
-            matches = re.findall(r'argv\s*\[\s*i\s*\]\s*==\s*([\'"])(?P<arg>-[a-zA-Z]+|\-\-[a-zA-Z]+[a-zA-Z0-9-]*)\1', content)
+                    # Remove duplicates from matches list
+                    arguments_list = list(set([match.strip() for match in matches]))
 
-            # Add the found arguments to the dictionary
-            arguments[filename] = [match[1] for match in matches]
+                    # Add the matches to the dictionary
+                    arguments[filepath] = arguments_list
 
     return arguments
 
@@ -24,7 +29,29 @@ def find_arguments(directory):
 # Specify the directory you want to search for cpp files
 directory = '/Users/edsilm2/llama.cpp/examples'
 
-# Call the function and print the result
-result = find_arguments(directory)
-for filename, arguments in result.items():
-    print(filename, arguments)
\ No newline at end of file
+if __name__ == '__main__':
+    # Call the find function and print the result
+    result = find_arguments(directory)
+    all_of_them = set()
+    for filename, arguments in result.items():
+        print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n")
+        for argument in arguments:
+            if argument not in all_of_them:
+                all_of_them.add("".join(argument))
+    print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
+
+    with open("help_list.txt", "r") as helpfile:
+        lines = helpfile.read().split("\n")
+        for filename, arguments in result.items():
+            parameters = []
+            for line in lines:
+                for argument in arguments:
+                    if argument in line:
+                        parameters.append(line)
+            all_parameters = set(parameters)            
+            print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n    command-line arguments available and gpt-params functions implemented:\n")
+            if not all_parameters:
+                print(f"    \033[032mNone\033[0m\n")
+            else:
+                for parameter in all_parameters:
+                    print(f"    help: \033[33m{parameter:<30}\033[0m")
\ No newline at end of file
diff --git a/help_list.txt b/help_list.txt
new file mode 100644
index 000000000..08dcf67ec
--- /dev/null
+++ b/help_list.txt
@@ -0,0 +1,104 @@
+-h, --helpshow this help message and exit
+-i, --interactive     run in interactive mode
+--interactive-first   run in interactive mode and wait for input right away
+-ins, --instructrun in instruction mode (use with Alpaca models)
+--multiline-input     allows you to write or paste multiple lines without ending each in '\\'
+-r PROMPT, --reverse-prompt PROMPT
+    halt generation at PROMPT, return control in interactive mode
+    (can be specified more than once for multiple prompts).
+--color   colorise output to distinguish prompt and user input from generations
+-s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)
+-t N, --threads N     number of threads to use during generation (default: %d)\n", params.n_threads);
+-tb N, --threads-batch N
+    number of threads to use during batch and prompt processing (default: same as --threads)
+-p PROMPT, --prompt PROMPT
+    prompt to start generation with (default: empty)
+-e, --escape    process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)
+--prompt-cache FNAME  file to cache prompt state for faster startup (default: none)
+--prompt-cache-all    if specified, saves user input and generations to cache as well.
+    not supported with --interactive or other interactive options
+--prompt-cache-ro     if specified, uses the prompt cache but does not update it.
+--random-prompt start with a randomized prompt.
+--in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string
+--in-prefix STRING    string to prefix user inputs with (default: empty)
+--in-suffix STRING    string to suffix after user inputs with (default: empty)
+-f FNAME, --file FNAME
+    prompt file to start generation.
+-n N, --n-predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
+-c N, --ctx-size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
+-b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch);
+--top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
+--top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
+--tfs N   tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
+--typical N     locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
+--repeat-last-n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
+--repeat-penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
+--presence-penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
+--frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
+--mirostat N    use Mirostat sampling.
+    Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.
+    (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
+--mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
+--mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
+-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS
+    modifies the likelihood of token appearing in the completion,
+    i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',
+    or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'
+--grammar GRAMMAR     BNF-like grammar to constrain generations (see samples in grammars/ dir)
+--grammar-file FNAME  file to read grammar from
+--cfg-negative-prompt PROMPT
+    negative prompt to use for guidance. (default: empty)
+--cfg-negative-prompt-file FNAME
+    negative prompt file to use for guidance. (default: empty)
+--cfg-scale N   strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
+--rope-scale N  RoPE context linear scaling factor, inverse of --rope-freq-scale
+--rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: loaded from model)
+--rope-freq-scale N   RoPE frequency linear scaling factor (default: loaded from model)
+--ignore-eos    ignore end of stream token and continue generating (implies --logit-bias 2-inf)
+--no-penalize-nldo not penalize newline token
+--memory-f32    use f32 instead of f16 for memory key+value (default: disabled)
+    not recommended: doubles context memory required and no measurable increase in quality
+--temp N  temperature (default: %.1f)\n", (double)params.temp);
+--logits-all    return logits for all tokens in the batch (default: disabled)
+--hellaswag     compute HellaSwag score over random tasks from datafile supplied with -f
+--hellaswag-tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
+--keep N  number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
+--draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft);
+--chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
+-np N, --parallel N   number of parallel sequences to decode (default: %d)\n", params.n_parallel);
+-ns N, --sequences N  number of sequences to decode (default: %d)\n", params.n_sequences);
+-cb, --cont-batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)
+    if (llama_mlock_supported()) {
+    --mlock   force system to keep model in RAM rather than swapping or compressing
+    }
+    if (llama_mmap_supported()) {
+    --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)
+    }
+--numa    attempt optimizations that help on some NUMA systems
+    if run without this previously, it is recommended to drop the system page cache before using this
+    see https://github.com/ggerganov/llama.cpp/issues/1437
+#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
+-ngl N, --n-gpu-layers N
+    number of layers to store in VRAM
+-ngld N, --n-gpu-layers-draft N
+    number of layers to store in VRAM for the draft model
+-ts SPLIT --tensor-split SPLIT
+    how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1
+-mg i, --main-gpu i   the GPU to use for scratch and small tensors
+#ifdef GGML_USE_CUBLAS
+-nommq, --no-mul-mat-q
+    use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.
+    Not recommended since this is both slower and uses more VRAM.
+#endif // GGML_USE_CUBLAS
+#endif
+--verbose-promptprint prompt before generation
+    fprintf(stderr, "  --simple-io     use basic IO for better compatibility in subprocesses and limited consoles
+--lora FNAME    apply LoRA adapter (implies --no-mmap)
+--lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)
+--lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter
+-m FNAME, --model FNAME
+    model path (default: %s)\n", params.model.c_str());
+-md FNAME, --model-draft FNAME
+    draft model for speculative decoding (default: %s)\n", params.model.c_str());
+-ld LOGDIR, --logdir LOGDIR
+    path under which to save YAML logs (no logging if unset)
\ No newline at end of file