From 5432cc18dadaa5039da0468532e3d470d7d04527 Mon Sep 17 00:00:00 2001 From: mike dupont Date: Tue, 21 Nov 2023 15:11:18 -0500 Subject: [PATCH] adding debug notes --- .gitignore | 1 + README.org | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/.gitignore b/.gitignore index 41259a12f..2fd33d08d 100644 --- a/.gitignore +++ b/.gitignore @@ -99,3 +99,4 @@ tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe +/#llama.cpp# diff --git a/README.org b/README.org index 4f7092f93..dba2d6dd8 100644 --- a/README.org +++ b/README.org @@ -1037,3 +1037,52 @@ nm /mnt/data1/2023/11/09/llama.cpp/build/bin/main >main.nm grep libcuda report7.gron -C10 > cudareport.txt grep -C1000 libcuda report7.jq > cuda.txt + + + +(gpt_params &) @0x7fffffffc960: {seed = 1700596789, n_threads = 12, + n_threads_batch = -1, n_predict = -1, n_ctx = 512, n_batch = 512, n_keep = 0, + n_draft = 16, n_chunks = -1, n_parallel = 1, n_sequences = 1, p_accept = 0.5, + p_split = 0.100000001, n_gpu_layers = -1, n_gpu_layers_draft = -1, main_gpu = 0, + tensor_split = {0 }, n_beams = 0, rope_freq_base = 0, +--Type for more, q to quit, c to continue without paging-- + rope_freq_scale = 0, yarn_ext_factor = -1, yarn_attn_factor = 1, + yarn_beta_fast = 32, yarn_beta_slow = 1, yarn_orig_ctx = 0, + rope_scaling_type = -1 '\377', sparams = {n_prev = 64, n_probs = 0, top_k = 40, + top_p = 0.949999988, min_p = 0.0500000007, tfs_z = 1, typical_p = 1, + temp = 0.800000012, penalty_last_n = 64, penalty_repeat = 1.10000002, +--Type for more, q to quit, c to continue without paging-- + penalty_freq = 0, penalty_present = 0, mirostat = 0, mirostat_tau = 5, + mirostat_eta = 0.100000001, penalize_nl = true, grammar = "", + cfg_negative_prompt = "", cfg_scale = 1, + logit_bias = std::unordered_map with 0 elements}, + model = "/home/mdupont/.ollama/models/mistral", model_draft = "", +--Type for more, q to quit, c to continue without paging-- + model_alias = "unknown", prompt = "", prompt_file = "", path_prompt_cache = "", + input_prefix = "", input_suffix = "", + antiprompt = std::vector of length 0, capacity 0, logdir = "", + lora_adapter = std::vector of length 0, capacity 0, lora_base = "", ppl_stride = 0, + ppl_output_type = 0, hellaswag = false, hellaswag_tasks = 400, mul_mat_q = true, +--Type for more, q to quit, c to continue without paging-- + memory_f16 = true, random_prompt = false, use_color = false, interactive = false, + chatml = false, prompt_cache_all = false, prompt_cache_ro = false, + embedding = false, escape = false, interactive_first = false, + multiline_input = false, simple_io = false, cont_batching = false, + input_prefix_bos = false, ignore_eos = false, instruct = false, logits_all = false, +--Type for more, q to quit, c to continue without paging-- + use_mmap = true, use_mlock = false, numa = false, verbose_prompt = false, + infill = false, mmproj = "", image = ""} +(gdb) + + llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams); + + at /home/mdupont/experiments/llama.cpp/ggml.cpp:18561 +18561 ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset); + + +p *ctx +$14 = {header = {magic = "GGUF", version = 2, n_tensors = 291, n_kv = 20}, + kv = 0x555556ffc2f0, infos = 0x55555716d5f0, alignment = 0, offset = 0, size = 0, + data = 0x0} +(gdb) +l