Update contextual help dev

This commit is contained in:
pudepiedj 2023-10-06 14:50:17 +01:00
parent 739d6d3022
commit 7a4dcff667
2 changed files with 98 additions and 68 deletions

View file

@ -2,6 +2,18 @@
import os import os
import re import re
import collections
import re
def replace_dashes_with_underscores(filename):
with open(filename, 'r') as file:
content = file.read()
# Match '-' surrounded by word characters on both sides and replace with '_'
replaced_content = re.sub(r'(\w)-(\w)', r'\1_\2', content)
with open(filename, 'w') as file:
file.write(replaced_content)
def find_arguments(directory): def find_arguments(directory):
arguments = {} arguments = {}
@ -14,9 +26,8 @@ def find_arguments(directory):
with open(filepath, 'r') as file: with open(filepath, 'r') as file:
content = file.read() content = file.read()
# Search for the expression "params." and read the attribute without trailing detritus # Search for the expression "params." excluding prefixes and read the attribute without trailing detritus
matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content) matches = re.findall(r'(?:^|\s)params\.(.*?)(?=[\). <,;}]|\Z)', content)
# Remove duplicates from matches list # Remove duplicates from matches list
arguments_list = list(set([match.strip() for match in matches])) arguments_list = list(set([match.strip() for match in matches]))
@ -25,28 +36,34 @@ def find_arguments(directory):
return arguments return arguments
def output_results(result):
# Specify the directory you want to search for cpp files sorted_result = collections.OrderedDict(sorted(result.items()))
directory = '/Users/edsilm2/llama.cpp/examples'
if __name__ == '__main__':
# Call the find function and print the result
result = find_arguments(directory)
all_of_them = set() all_of_them = set()
for filename, arguments in result.items(): for filename, arguments in sorted_result.items():
print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n") print(f"Filename: \033[32m{filename.split('/')[-1]}\033[0m, arguments: {arguments}\n")
for argument in arguments: for argument in arguments:
if argument not in all_of_them: if argument not in all_of_them:
all_of_them.add("".join(argument)) all_of_them.add("".join(argument))
print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.") print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
return sorted_result
with open("help_list.txt", "r") as helpfile: def find_parameters(file, sorted_result):
with open(file, "r") as helpfile:
lines = helpfile.read().split("\n") lines = helpfile.read().split("\n")
for filename, arguments in result.items(): for filename, arguments in sorted_result.items():
parameters = [] parameters = []
for line in lines: for line in lines:
for argument in arguments: for argument in arguments:
if argument in line: # need to try to avoid spurious matches
argument1 = "--" + argument + " "
if argument1 in line:
parameters.append(line)
# need to try to avoid spurious matches
argument2 = "params." + argument.split('n_')[-1]
if argument2 in line:
parameters.append(line)
argument3 = "params." + argument
if argument3 in line:
parameters.append(line) parameters.append(line)
all_parameters = set(parameters) all_parameters = set(parameters)
print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n command-line arguments available and gpt-params functions implemented:\n") print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n command-line arguments available and gpt-params functions implemented:\n")
@ -55,3 +72,16 @@ if __name__ == '__main__':
else: else:
for parameter in all_parameters: for parameter in all_parameters:
print(f" help: \033[33m{parameter:<30}\033[0m") print(f" help: \033[33m{parameter:<30}\033[0m")
# Specify the directory you want to search for cpp files
directory = '/Users/edsilm2/llama.cpp/examples'
if __name__ == '__main__':
# First we alter all the hyphenated help words in help-file.txt to underscores
replace_dashes_with_underscores('help_list.txt')
# Call the find function and output the result
result = find_arguments(directory)
sorted = output_results(result)
# analyse the files and what they contain
find_parameters("help_list.txt", sorted)

View file

@ -1,104 +1,104 @@
-h, --helpshow this help message and exit -h, --helpshow this help message and exit
-i, --interactive run in interactive mode -i, --interactive run in interactive mode
--interactive-first run in interactive mode and wait for input right away --interactive_first run in interactive mode and wait for input right away
-ins, --instructrun in instruction mode (use with Alpaca models) -ins, --instructrun in instruction mode (use with Alpaca models)
--multiline-input allows you to write or paste multiple lines without ending each in '\\' --multiline_input allows you to write or paste multiple lines without ending each in '\\'
-r PROMPT, --reverse-prompt PROMPT -r PROMPT, --reverse_prompt PROMPT
halt generation at PROMPT, return control in interactive mode halt generation at PROMPT, return control in interactive mode
(can be specified more than once for multiple prompts). (can be specified more than once for multiple prompts).
--color colorise output to distinguish prompt and user input from generations --color colorise output to distinguish prompt and user input from generations
-s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0) -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)
-t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads); -t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads);
-tb N, --threads-batch N -tb N, --threads_batch N
number of threads to use during batch and prompt processing (default: same as --threads) number of threads to use during batch and prompt processing (default: same as --threads)
-p PROMPT, --prompt PROMPT -p PROMPT, --prompt PROMPT
prompt to start generation with (default: empty) prompt to start generation with (default: empty)
-e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) -e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)
--prompt-cache FNAME file to cache prompt state for faster startup (default: none) --prompt_cache FNAME file to cache prompt state for faster startup (default: none)
--prompt-cache-all if specified, saves user input and generations to cache as well. --prompt_cache_all if specified, saves user input and generations to cache as well.
not supported with --interactive or other interactive options not supported with --interactive or other interactive options
--prompt-cache-ro if specified, uses the prompt cache but does not update it. --prompt_cache_ro if specified, uses the prompt cache but does not update it.
--random-prompt start with a randomized prompt. --random_prompt start with a randomized prompt.
--in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string --in_prefix_bos prefix BOS to user inputs, preceding the `--in_prefix` string
--in-prefix STRING string to prefix user inputs with (default: empty) --in_prefix STRING string to prefix user inputs with (default: empty)
--in-suffix STRING string to suffix after user inputs with (default: empty) --in_suffix STRING string to suffix after user inputs with (default: empty)
-f FNAME, --file FNAME -f FNAME, --file FNAME
prompt file to start generation. prompt file to start generation.
-n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
-c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); -c N, --ctx_size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
-b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
--top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); --top_k N top_k sampling (default: %d, 0 = disabled)\n", params.top_k);
--top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); --top_p N top_p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
--tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
--typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
--repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); --repeat_last_n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
--repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); --repeat_penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
--presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); --presence_penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
--frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); --frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
--mirostat N use Mirostat sampling. --mirostat N use Mirostat sampling.
Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used. Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.
(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
--mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); --mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
--mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); --mirostat_ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS -l TOKEN_ID(+/-)BIAS, --logit_bias TOKEN_ID(+/-)BIAS
modifies the likelihood of token appearing in the completion, modifies the likelihood of token appearing in the completion,
i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello', i.e. `--logit_bias 15043+1` to increase likelihood of token ' Hello',
or `--logit-bias 15043-1` to decrease likelihood of token ' Hello' or `--logit_bias 15043_1` to decrease likelihood of token ' Hello'
--grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir) --grammar GRAMMAR BNF_like grammar to constrain generations (see samples in grammars/ dir)
--grammar-file FNAME file to read grammar from --grammar_file FNAME file to read grammar from
--cfg-negative-prompt PROMPT --cfg_negative_prompt PROMPT
negative prompt to use for guidance. (default: empty) negative prompt to use for guidance. (default: empty)
--cfg-negative-prompt-file FNAME --cfg_negative_prompt_file FNAME
negative prompt file to use for guidance. (default: empty) negative prompt file to use for guidance. (default: empty)
--cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); --cfg_scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
--rope-scale N RoPE context linear scaling factor, inverse of --rope-freq-scale --rope_scale N RoPE context linear scaling factor, inverse of --rope_freq_scale
--rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model) --rope_freq_base N RoPE base frequency, used by NTK_aware scaling (default: loaded from model)
--rope-freq-scale N RoPE frequency linear scaling factor (default: loaded from model) --rope_freq_scale N RoPE frequency linear scaling factor (default: loaded from model)
--ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf) --ignore_eos ignore end of stream token and continue generating (implies --logit_bias 2_inf)
--no-penalize-nldo not penalize newline token --no_penalize_nldo not penalize newline token
--memory-f32 use f32 instead of f16 for memory key+value (default: disabled) --memory_f32 use f32 instead of f16 for memory key+value (default: disabled)
not recommended: doubles context memory required and no measurable increase in quality not recommended: doubles context memory required and no measurable increase in quality
--temp N temperature (default: %.1f)\n", (double)params.temp); --temp N temperature (default: %.1f)\n", (double)params.temp);
--logits-all return logits for all tokens in the batch (default: disabled) --logits_all return logits for all tokens in the batch (default: disabled)
--hellaswag compute HellaSwag score over random tasks from datafile supplied with -f --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f
--hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); --hellaswag_tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
--keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
--draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft); --draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft);
--chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); --chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
-np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel); -np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel);
-ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences); -ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences);
-cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled) -cb, --cont_batching enable continuous batching (a.k.a dynamic batching) (default: disabled)
if (llama_mlock_supported()) { if (llama_mlock_supported()) {
--mlock force system to keep model in RAM rather than swapping or compressing --mlock force system to keep model in RAM rather than swapping or compressing
} }
if (llama_mmap_supported()) { if (llama_mmap_supported()) {
--no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock) --no_mmap do not memory_map model (slower load but may reduce pageouts if not using mlock)
} }
--numa attempt optimizations that help on some NUMA systems --numa attempt optimizations that help on some NUMA systems
if run without this previously, it is recommended to drop the system page cache before using this if run without this previously, it is recommended to drop the system page cache before using this
see https://github.com/ggerganov/llama.cpp/issues/1437 see https://github.com/ggerganov/llama.cpp/issues/1437
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
-ngl N, --n-gpu-layers N -ngl N, --n_gpu_layers N
number of layers to store in VRAM number of layers to store in VRAM
-ngld N, --n-gpu-layers-draft N -ngld N, --n_gpu_layers_draft N
number of layers to store in VRAM for the draft model number of layers to store in VRAM for the draft model
-ts SPLIT --tensor-split SPLIT -ts SPLIT --tensor_split SPLIT
how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1 how to split tensors across multiple GPUs, comma_separated list of proportions, e.g. 3,1
-mg i, --main-gpu i the GPU to use for scratch and small tensors -mg i, --main_gpu i the GPU to use for scratch and small tensors
#ifdef GGML_USE_CUBLAS #ifdef GGML_USE_CUBLAS
-nommq, --no-mul-mat-q -nommq, --no_mul_mat_q
use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels. use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.
Not recommended since this is both slower and uses more VRAM. Not recommended since this is both slower and uses more VRAM.
#endif // GGML_USE_CUBLAS #endif // GGML_USE_CUBLAS
#endif #endif
--verbose-promptprint prompt before generation --verbose_promptprint prompt before generation
fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles fprintf(stderr, " --simple_io use basic IO for better compatibility in subprocesses and limited consoles
--lora FNAME apply LoRA adapter (implies --no-mmap) --lora FNAME apply LoRA adapter (implies --no_mmap)
--lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap) --lora_scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no_mmap)
--lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter --lora_base FNAME optional model to use as a base for the layers modified by the LoRA adapter
-m FNAME, --model FNAME -m FNAME, --model FNAME
model path (default: %s)\n", params.model.c_str()); model path (default: %s)\n", params.model.c_str());
-md FNAME, --model-draft FNAME -md FNAME, --model_draft FNAME
draft model for speculative decoding (default: %s)\n", params.model.c_str()); draft model for speculative decoding (default: %s)\n", params.model.c_str());
-ld LOGDIR, --logdir LOGDIR -ld LOGDIR, --logdir LOGDIR
path under which to save YAML logs (no logging if unset) path under which to save YAML logs (no logging if unset)