Automatic helper dev
This commit is contained in:
parent
297b7b6301
commit
739d6d3022
4 changed files with 147 additions and 15 deletions
|
@ -79,6 +79,7 @@ struct gpt_params {
|
||||||
std::string model_draft = ""; // draft model for speculative decoding
|
std::string model_draft = ""; // draft model for speculative decoding
|
||||||
std::string model_alias = "unknown"; // model alias
|
std::string model_alias = "unknown"; // model alias
|
||||||
std::string prompt = "";
|
std::string prompt = "";
|
||||||
|
std::string prompt_file = ""; // store the external prompt file
|
||||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||||
std::string input_prefix = ""; // string to prefix user inputs with
|
std::string input_prefix = ""; // string to prefix user inputs with
|
||||||
std::string input_suffix = ""; // string to suffix user inputs with
|
std::string input_suffix = ""; // string to suffix user inputs with
|
||||||
|
|
|
@ -130,7 +130,7 @@ int main() {
|
||||||
if (x != 0) {
|
if (x != 0) {
|
||||||
for (const auto& kvp : bitdict) {
|
for (const auto& kvp : bitdict) {
|
||||||
if ((x & std::stoi(kvp.first)) != 0) {
|
if ((x & std::stoi(kvp.first)) != 0) {
|
||||||
printf("Appcode %3d %s ", x, kvp.first.c_str());
|
printf("appcode %3d %s ", x, kvp.first.c_str());
|
||||||
for (const auto& element : kvp.second) {
|
for (const auto& element : kvp.second) {
|
||||||
printf(" %5s", element.c_str());
|
printf(" %5s", element.c_str());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,22 +1,27 @@
|
||||||
|
# search the specified directory for files that include argv[i] == '-f' or '--file' arguments
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def find_arguments(directory):
|
def find_arguments(directory):
|
||||||
arguments = {}
|
arguments = {}
|
||||||
|
|
||||||
# Get a list of all .cpp files in the specified directory
|
# Use os.walk() to traverse through files in directory and subdirectories
|
||||||
cpp_files = [filename for filename in os.listdir(directory) if filename.endswith('.cpp')]
|
for root, dirs, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
# Read each .cpp file and search for the specified expressions
|
if file.endswith('.cpp'):
|
||||||
for filename in cpp_files:
|
filepath = os.path.join(root, file)
|
||||||
with open(os.path.join(directory, filename), 'r') as file:
|
with open(filepath, 'r') as file:
|
||||||
content = file.read()
|
content = file.read()
|
||||||
|
|
||||||
# Search for the expressions using regular expressions
|
# Search for the expression "params." and read the attribute without trailing detritus
|
||||||
matches = re.findall(r'argv\s*\[\s*i\s*\]\s*==\s*([\'"])(?P<arg>-[a-zA-Z]+|\-\-[a-zA-Z]+[a-zA-Z0-9-]*)\1', content)
|
matches = re.findall(r'params\.(.*?)(?=[\). <,;}])', content)
|
||||||
|
|
||||||
# Add the found arguments to the dictionary
|
# Remove duplicates from matches list
|
||||||
arguments[filename] = [match[1] for match in matches]
|
arguments_list = list(set([match.strip() for match in matches]))
|
||||||
|
|
||||||
|
# Add the matches to the dictionary
|
||||||
|
arguments[filepath] = arguments_list
|
||||||
|
|
||||||
return arguments
|
return arguments
|
||||||
|
|
||||||
|
@ -24,7 +29,29 @@ def find_arguments(directory):
|
||||||
# Specify the directory you want to search for cpp files
|
# Specify the directory you want to search for cpp files
|
||||||
directory = '/Users/edsilm2/llama.cpp/examples'
|
directory = '/Users/edsilm2/llama.cpp/examples'
|
||||||
|
|
||||||
# Call the function and print the result
|
if __name__ == '__main__':
|
||||||
|
# Call the find function and print the result
|
||||||
result = find_arguments(directory)
|
result = find_arguments(directory)
|
||||||
|
all_of_them = set()
|
||||||
for filename, arguments in result.items():
|
for filename, arguments in result.items():
|
||||||
print(filename, arguments)
|
print(f"Filename: \033[32m{filename}\033[0m, arguments: {arguments}\n")
|
||||||
|
for argument in arguments:
|
||||||
|
if argument not in all_of_them:
|
||||||
|
all_of_them.add("".join(argument))
|
||||||
|
print(f"\033[32mAll of them: \033[0m{sorted(all_of_them)}.")
|
||||||
|
|
||||||
|
with open("help_list.txt", "r") as helpfile:
|
||||||
|
lines = helpfile.read().split("\n")
|
||||||
|
for filename, arguments in result.items():
|
||||||
|
parameters = []
|
||||||
|
for line in lines:
|
||||||
|
for argument in arguments:
|
||||||
|
if argument in line:
|
||||||
|
parameters.append(line)
|
||||||
|
all_parameters = set(parameters)
|
||||||
|
print(f"\n\nFilename: \033[32m{filename.split('/')[-1]}\033[0m\n\n command-line arguments available and gpt-params functions implemented:\n")
|
||||||
|
if not all_parameters:
|
||||||
|
print(f" \033[032mNone\033[0m\n")
|
||||||
|
else:
|
||||||
|
for parameter in all_parameters:
|
||||||
|
print(f" help: \033[33m{parameter:<30}\033[0m")
|
104
help_list.txt
Normal file
104
help_list.txt
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
-h, --helpshow this help message and exit
|
||||||
|
-i, --interactive run in interactive mode
|
||||||
|
--interactive-first run in interactive mode and wait for input right away
|
||||||
|
-ins, --instructrun in instruction mode (use with Alpaca models)
|
||||||
|
--multiline-input allows you to write or paste multiple lines without ending each in '\\'
|
||||||
|
-r PROMPT, --reverse-prompt PROMPT
|
||||||
|
halt generation at PROMPT, return control in interactive mode
|
||||||
|
(can be specified more than once for multiple prompts).
|
||||||
|
--color colorise output to distinguish prompt and user input from generations
|
||||||
|
-s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)
|
||||||
|
-t N, --threads N number of threads to use during generation (default: %d)\n", params.n_threads);
|
||||||
|
-tb N, --threads-batch N
|
||||||
|
number of threads to use during batch and prompt processing (default: same as --threads)
|
||||||
|
-p PROMPT, --prompt PROMPT
|
||||||
|
prompt to start generation with (default: empty)
|
||||||
|
-e, --escape process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)
|
||||||
|
--prompt-cache FNAME file to cache prompt state for faster startup (default: none)
|
||||||
|
--prompt-cache-all if specified, saves user input and generations to cache as well.
|
||||||
|
not supported with --interactive or other interactive options
|
||||||
|
--prompt-cache-ro if specified, uses the prompt cache but does not update it.
|
||||||
|
--random-prompt start with a randomized prompt.
|
||||||
|
--in-prefix-bos prefix BOS to user inputs, preceding the `--in-prefix` string
|
||||||
|
--in-prefix STRING string to prefix user inputs with (default: empty)
|
||||||
|
--in-suffix STRING string to suffix after user inputs with (default: empty)
|
||||||
|
-f FNAME, --file FNAME
|
||||||
|
prompt file to start generation.
|
||||||
|
-n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
|
||||||
|
-c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
|
||||||
|
-b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||||
|
--top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k);
|
||||||
|
--top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p);
|
||||||
|
--tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z);
|
||||||
|
--typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p);
|
||||||
|
--repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n);
|
||||||
|
--repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty);
|
||||||
|
--presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty);
|
||||||
|
--frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty);
|
||||||
|
--mirostat N use Mirostat sampling.
|
||||||
|
Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.
|
||||||
|
(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat);
|
||||||
|
--mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta);
|
||||||
|
--mirostat-ent NMirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau);
|
||||||
|
-l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS
|
||||||
|
modifies the likelihood of token appearing in the completion,
|
||||||
|
i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',
|
||||||
|
or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'
|
||||||
|
--grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir)
|
||||||
|
--grammar-file FNAME file to read grammar from
|
||||||
|
--cfg-negative-prompt PROMPT
|
||||||
|
negative prompt to use for guidance. (default: empty)
|
||||||
|
--cfg-negative-prompt-file FNAME
|
||||||
|
negative prompt file to use for guidance. (default: empty)
|
||||||
|
--cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
|
||||||
|
--rope-scale N RoPE context linear scaling factor, inverse of --rope-freq-scale
|
||||||
|
--rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: loaded from model)
|
||||||
|
--rope-freq-scale N RoPE frequency linear scaling factor (default: loaded from model)
|
||||||
|
--ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)
|
||||||
|
--no-penalize-nldo not penalize newline token
|
||||||
|
--memory-f32 use f32 instead of f16 for memory key+value (default: disabled)
|
||||||
|
not recommended: doubles context memory required and no measurable increase in quality
|
||||||
|
--temp N temperature (default: %.1f)\n", (double)params.temp);
|
||||||
|
--logits-all return logits for all tokens in the batch (default: disabled)
|
||||||
|
--hellaswag compute HellaSwag score over random tasks from datafile supplied with -f
|
||||||
|
--hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
|
||||||
|
--keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
||||||
|
--draft N number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft);
|
||||||
|
--chunks Nmax number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
|
||||||
|
-np N, --parallel N number of parallel sequences to decode (default: %d)\n", params.n_parallel);
|
||||||
|
-ns N, --sequences N number of sequences to decode (default: %d)\n", params.n_sequences);
|
||||||
|
-cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)
|
||||||
|
if (llama_mlock_supported()) {
|
||||||
|
--mlock force system to keep model in RAM rather than swapping or compressing
|
||||||
|
}
|
||||||
|
if (llama_mmap_supported()) {
|
||||||
|
--no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)
|
||||||
|
}
|
||||||
|
--numa attempt optimizations that help on some NUMA systems
|
||||||
|
if run without this previously, it is recommended to drop the system page cache before using this
|
||||||
|
see https://github.com/ggerganov/llama.cpp/issues/1437
|
||||||
|
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
|
||||||
|
-ngl N, --n-gpu-layers N
|
||||||
|
number of layers to store in VRAM
|
||||||
|
-ngld N, --n-gpu-layers-draft N
|
||||||
|
number of layers to store in VRAM for the draft model
|
||||||
|
-ts SPLIT --tensor-split SPLIT
|
||||||
|
how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1
|
||||||
|
-mg i, --main-gpu i the GPU to use for scratch and small tensors
|
||||||
|
#ifdef GGML_USE_CUBLAS
|
||||||
|
-nommq, --no-mul-mat-q
|
||||||
|
use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.
|
||||||
|
Not recommended since this is both slower and uses more VRAM.
|
||||||
|
#endif // GGML_USE_CUBLAS
|
||||||
|
#endif
|
||||||
|
--verbose-promptprint prompt before generation
|
||||||
|
fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles
|
||||||
|
--lora FNAME apply LoRA adapter (implies --no-mmap)
|
||||||
|
--lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)
|
||||||
|
--lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter
|
||||||
|
-m FNAME, --model FNAME
|
||||||
|
model path (default: %s)\n", params.model.c_str());
|
||||||
|
-md FNAME, --model-draft FNAME
|
||||||
|
draft model for speculative decoding (default: %s)\n", params.model.c_str());
|
||||||
|
-ld LOGDIR, --logdir LOGDIR
|
||||||
|
path under which to save YAML logs (no logging if unset)
|
Loading…
Add table
Add a link
Reference in a new issue