Update contextual help dev

2023-10-06 14:50:17 +01:00 · 2023-10-06 14:50:17 +01:00 · 7a4dcff667
commit 7a4dcff667
parent 739d6d3022
2 changed files with 98 additions and 68 deletions
--- a/examples/cmap-example/find_implemented_args.py
+++ b/examples/cmap-example/find_implemented_args.py
@ -2,6 +2,18 @@
 import os
 import re
 import collections
 import re
 def replace_dashes_with_underscores(filename):
    with open(filename, 'r') as file:
        content = file.read()
    # Match '-' surrounded by word characters on both sides and replace with '_'
    replaced_content = re.sub(r'(\w)-(\w)', r'\1_\2', content)
    with open(filename, 'w') as file:
        file.write(replaced_content)
 def find_arguments(directory):
    arguments = {}
@ -14,9 +26,8 @@ def find_arguments(directory):
                with open(filepath, 'r') as file:
                    content = file.read()
-                    # Search for the expression "params." and read the attribute without trailing detritus
+                    # Search for the expression "params." excluding prefixes and read the attribute without trailing detritus
-                    matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content)
+                    matches = re.findall(r'(?:^|\s)params\.(.*?)(?=[\). <,;}]|\Z)', content)
                    # Remove duplicates from matches list
                    arguments_list = list(set([match.strip() for match in matches]))
@ -25,28 +36,34 @@ def find_arguments(directory):
    return arguments
-
+def output_results(result):
-# Specify the directory you want to search for cpp files
+    sorted_result = collections.OrderedDict(sorted(result.items()))
 directory = '/Users/edsilm2/llama.cpp/examples'
 if __name__ == '__main__':
    # Call the find function and print the result
    result = find_arguments(directory)
    all_of_them = set()
-    for filename, arguments in result.items():
+    for filename, arguments in sorted_result.items():
-        print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n")
+        print(f"Filename: \033[32m{filename.split('/')[-1]}\033[0m, arguments: {arguments}\n")
        for argument in arguments:
            if argument not in all_of_them:
                all_of_them.add("".join(argument))
    print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
    return sorted_result
-    with open("help_list.txt", "r") as helpfile:
+def find_parameters(file, sorted_result):
     with open(file, "r") as helpfile:
        lines = helpfile.read().split("\n")
-        for filename, arguments in result.items():
+        for filename, arguments in sorted_result.items():
            parameters = []
            for line in lines:
                for argument in arguments:
-                    if argument in line:
+                    # need to try to avoid spurious matches
                    argument1 = "--" + argument + " "
                    if argument1 in line:
                        parameters.append(line)
                    # need to try to avoid spurious matches
                    argument2 = "params." + argument.split('n_')[-1]
                    if argument2 in line:
                        parameters.append(line)
                    argument3 = "params." + argument
                    if argument3 in line:
                        parameters.append(line)
            all_parameters = set(parameters)            
            print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n    command-line arguments available and gpt-params functions implemented:\n")
@ -55,3 +72,16 @@ if __name__ == '__main__':
            else:
                for parameter in all_parameters:
                    print(f"    help: \033[33m{parameter:<30}\033[0m")
 # Specify the directory you want to search for cpp files
 directory = '/Users/edsilm2/llama.cpp/examples'
 if __name__ == '__main__':
    # First we alter all the hyphenated help words in help-file.txt to underscores
    replace_dashes_with_underscores('help_list.txt')
    # Call the find function and output the result
    result = find_arguments(directory)
    sorted = output_results(result)
    # analyse the files and what they contain
    find_parameters("help_list.txt", sorted)
--- a/help_list.txt
+++ b/help_list.txt
@ -1,104 +1,104 @@
 -h, --helpshow this help message and exit
 -i, --interactive     run in interactive mode
--interactive-first   run in interactive mode and wait for input right away
+--interactive_first   run in interactive mode and wait for input right away
 -ins, --instructrun in instruction mode (use with Alpaca models)
--multiline-input     allows you to write or paste multiple lines without ending each in '\\'
+--multiline_input     allows you to write or paste multiple lines without ending each in '\\'
-r PROMPT, --reverse-prompt PROMPT
+-r PROMPT, --reverse_prompt PROMPT
    halt generation at PROMPT, return control in interactive mode
    (can be specified more than once for multiple prompts).
 --color   colorise output to distinguish prompt and user input from generations
 -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)
 -t N, --threads N     number of threads to use during generation (default: %d)\n", params.n_threads);
-tb N, --threads-batch N
+-tb N, --threads_batch N
    number of threads to use during batch and prompt processing (default: same as --threads)
 -p PROMPT, --prompt PROMPT
    prompt to start generation with (default: empty)
 -e, --escape    process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)
--prompt-cache FNAME  file to cache prompt state for faster startup (default: none)
+--prompt_cache FNAME  file to cache prompt state for faster startup (default: none)
--prompt-cache-all    if specified, saves user input and generations to cache as well.
+--prompt_cache_all    if specified, saves user input and generations to cache as well.
    not supported with --interactive or other interactive options
--prompt-cache-ro     if specified, uses the prompt cache but does not update it.
+--prompt_cache_ro     if specified, uses the prompt cache but does not update it.
--random-prompt start with a randomized prompt.
+--random_prompt start with a randomized prompt.
--in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string
+--in_prefix_bos prefix BOS to user inputs, preceding the `--in_prefix` string
--in-prefix STRING    string to prefix user inputs with (default: empty)
+--in_prefix STRING    string to prefix user inputs with (default: empty)
--in-suffix STRING    string to suffix after user inputs with (default: empty)
+--in_suffix STRING    string to suffix after user inputs with (default: empty)
 -f FNAME, --file FNAME
    prompt file to start generation.
-n N, --n-predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
+-n N, --n_predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
-c N, --ctx-size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
+-c N, --ctx_size N    size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
-b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch);
+-b N, --batch_size N  batch size for prompt processing (default: %d)\n", params.n_batch);
--top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
+--top_k N top_k sampling (default: %d, 0 = disabled)\n", params.top_k);
--top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
+--top_p N top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
 --tfs N   tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
 --typical N     locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
--repeat-last-n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
+--repeat_last_n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
--repeat-penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
+--repeat_penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
--presence-penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
+--presence_penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
--frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
+--frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
 --mirostat N    use Mirostat sampling.
    Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.
    (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
--mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
+--mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
--mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
+--mirostat_ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS
+-l TOKEN_ID(+/-)BIAS, --logit_bias TOKEN_ID(+/-)BIAS
    modifies the likelihood of token appearing in the completion,
-    i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',
+    i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello',
-    or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'
+    or `--logit_bias 15043_1` to decrease likelihood of token ' Hello'
--grammar GRAMMAR     BNF-like grammar to constrain generations (see samples in grammars/ dir)
+--grammar GRAMMAR     BNF_like grammar to constrain generations (see samples in grammars/ dir)
--grammar-file FNAME  file to read grammar from
+--grammar_file FNAME  file to read grammar from
--cfg-negative-prompt PROMPT
+--cfg_negative_prompt PROMPT
    negative prompt to use for guidance. (default: empty)
--cfg-negative-prompt-file FNAME
+--cfg_negative_prompt_file FNAME
    negative prompt file to use for guidance. (default: empty)
--cfg-scale N   strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
+--cfg_scale N   strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
--rope-scale N  RoPE context linear scaling factor, inverse of --rope-freq-scale
+--rope_scale N  RoPE context linear scaling factor, inverse of --rope_freq_scale
--rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: loaded from model)
+--rope_freq_base N    RoPE base frequency, used by NTK_aware scaling (default: loaded from model)
--rope-freq-scale N   RoPE frequency linear scaling factor (default: loaded from model)
+--rope_freq_scale N   RoPE frequency linear scaling factor (default: loaded from model)
--ignore-eos    ignore end of stream token and continue generating (implies --logit-bias 2-inf)
+--ignore_eos    ignore end of stream token and continue generating (implies --logit_bias 2_inf)
--no-penalize-nldo not penalize newline token
+--no_penalize_nldo not penalize newline token
--memory-f32    use f32 instead of f16 for memory key+value (default: disabled)
+--memory_f32    use f32 instead of f16 for memory key+value (default: disabled)
    not recommended: doubles context memory required and no measurable increase in quality
 --temp N  temperature (default: %.1f)\n", (double)params.temp);
--logits-all    return logits for all tokens in the batch (default: disabled)
+--logits_all    return logits for all tokens in the batch (default: disabled)
 --hellaswag     compute HellaSwag score over random tasks from datafile supplied with -f
--hellaswag-tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
+--hellaswag_tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
 --keep N  number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
 --draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft);
 --chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
 -np N, --parallel N   number of parallel sequences to decode (default: %d)\n", params.n_parallel);
 -ns N, --sequences N  number of sequences to decode (default: %d)\n", params.n_sequences);
-cb, --cont-batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)
+-cb, --cont_batching  enable continuous batching (a.k.a dynamic batching) (default: disabled)
    if (llama_mlock_supported()) {
    --mlock   force system to keep model in RAM rather than swapping or compressing
    }
    if (llama_mmap_supported()) {
-    --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)
+    --no_mmap do not memory_map model (slower load but may reduce pageouts if not using mlock)
    }
 --numa    attempt optimizations that help on some NUMA systems
    if run without this previously, it is recommended to drop the system page cache before using this
    see https://github.com/ggerganov/llama.cpp/issues/1437
 #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
-ngl N, --n-gpu-layers N
+-ngl N, --n_gpu_layers N
    number of layers to store in VRAM
-ngld N, --n-gpu-layers-draft N
+-ngld N, --n_gpu_layers_draft N
    number of layers to store in VRAM for the draft model
-ts SPLIT --tensor-split SPLIT
+-ts SPLIT --tensor_split SPLIT
-    how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1
+    how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1
-mg i, --main-gpu i   the GPU to use for scratch and small tensors
+-mg i, --main_gpu i   the GPU to use for scratch and small tensors
 #ifdef GGML_USE_CUBLAS
-nommq, --no-mul-mat-q
+-nommq, --no_mul_mat_q
    use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.
    Not recommended since this is both slower and uses more VRAM.
 #endif // GGML_USE_CUBLAS
 #endif
--verbose-promptprint prompt before generation
+--verbose_promptprint prompt before generation
-    fprintf(stderr, "  --simple-io     use basic IO for better compatibility in subprocesses and limited consoles
+    fprintf(stderr, "  --simple_io     use basic IO for better compatibility in subprocesses and limited consoles
--lora FNAME    apply LoRA adapter (implies --no-mmap)
+--lora FNAME    apply LoRA adapter (implies --no_mmap)
--lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)
+--lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap)
--lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter
+--lora_base FNAME     optional model to use as a base for the layers modified by the LoRA adapter
 -m FNAME, --model FNAME
    model path (default: %s)\n", params.model.c_str());
-md FNAME, --model-draft FNAME
+-md FNAME, --model_draft FNAME
    draft model for speculative decoding (default: %s)\n", params.model.c_str());
 -ld LOGDIR, --logdir LOGDIR
    path under which to save YAML logs (no logging if unset)