examples : replace fprintf to stdout with printf (#3017)
This commit is contained in:
		
							parent
							
								
									c9c3220c48
								
							
						
					
					
						commit
						de2fe892af
					
				
					 7 changed files with 245 additions and 245 deletions
				
			
		|  | @ -584,109 +584,109 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { | void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { | ||||||
|     fprintf(stdout, "usage: %s [options]\n", argv[0]); |     printf("usage: %s [options]\n", argv[0]); | ||||||
|     fprintf(stdout, "\n"); |     printf("\n"); | ||||||
|     fprintf(stdout, "options:\n"); |     printf("options:\n"); | ||||||
|     fprintf(stdout, "  -h, --help            show this help message and exit\n"); |     printf("  -h, --help            show this help message and exit\n"); | ||||||
|     fprintf(stdout, "  -i, --interactive     run in interactive mode\n"); |     printf("  -i, --interactive     run in interactive mode\n"); | ||||||
|     fprintf(stdout, "  --interactive-first   run in interactive mode and wait for input right away\n"); |     printf("  --interactive-first   run in interactive mode and wait for input right away\n"); | ||||||
|     fprintf(stdout, "  -ins, --instruct      run in instruction mode (use with Alpaca models)\n"); |     printf("  -ins, --instruct      run in instruction mode (use with Alpaca models)\n"); | ||||||
|     fprintf(stdout, "  --multiline-input     allows you to write or paste multiple lines without ending each in '\\'\n"); |     printf("  --multiline-input     allows you to write or paste multiple lines without ending each in '\\'\n"); | ||||||
|     fprintf(stdout, "  -r PROMPT, --reverse-prompt PROMPT\n"); |     printf("  -r PROMPT, --reverse-prompt PROMPT\n"); | ||||||
|     fprintf(stdout, "                        halt generation at PROMPT, return control in interactive mode\n"); |     printf("                        halt generation at PROMPT, return control in interactive mode\n"); | ||||||
|     fprintf(stdout, "                        (can be specified more than once for multiple prompts).\n"); |     printf("                        (can be specified more than once for multiple prompts).\n"); | ||||||
|     fprintf(stdout, "  --color               colorise output to distinguish prompt and user input from generations\n"); |     printf("  --color               colorise output to distinguish prompt and user input from generations\n"); | ||||||
|     fprintf(stdout, "  -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)\n"); |     printf("  -s SEED, --seed SEED  RNG seed (default: -1, use random seed for < 0)\n"); | ||||||
|     fprintf(stdout, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads); |     printf("  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads); | ||||||
|     fprintf(stdout, "  -p PROMPT, --prompt PROMPT\n"); |     printf("  -p PROMPT, --prompt PROMPT\n"); | ||||||
|     fprintf(stdout, "                        prompt to start generation with (default: empty)\n"); |     printf("                        prompt to start generation with (default: empty)\n"); | ||||||
|     fprintf(stdout, "  -e, --escape          process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"); |     printf("  -e, --escape          process prompt escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"); | ||||||
|     fprintf(stdout, "  --prompt-cache FNAME  file to cache prompt state for faster startup (default: none)\n"); |     printf("  --prompt-cache FNAME  file to cache prompt state for faster startup (default: none)\n"); | ||||||
|     fprintf(stdout, "  --prompt-cache-all    if specified, saves user input and generations to cache as well.\n"); |     printf("  --prompt-cache-all    if specified, saves user input and generations to cache as well.\n"); | ||||||
|     fprintf(stdout, "                        not supported with --interactive or other interactive options\n"); |     printf("                        not supported with --interactive or other interactive options\n"); | ||||||
|     fprintf(stdout, "  --prompt-cache-ro     if specified, uses the prompt cache but does not update it.\n"); |     printf("  --prompt-cache-ro     if specified, uses the prompt cache but does not update it.\n"); | ||||||
|     fprintf(stdout, "  --random-prompt       start with a randomized prompt.\n"); |     printf("  --random-prompt       start with a randomized prompt.\n"); | ||||||
|     fprintf(stdout, "  --in-prefix-bos       prefix BOS to user inputs, preceding the `--in-prefix` string\n"); |     printf("  --in-prefix-bos       prefix BOS to user inputs, preceding the `--in-prefix` string\n"); | ||||||
|     fprintf(stdout, "  --in-prefix STRING    string to prefix user inputs with (default: empty)\n"); |     printf("  --in-prefix STRING    string to prefix user inputs with (default: empty)\n"); | ||||||
|     fprintf(stdout, "  --in-suffix STRING    string to suffix after user inputs with (default: empty)\n"); |     printf("  --in-suffix STRING    string to suffix after user inputs with (default: empty)\n"); | ||||||
|     fprintf(stdout, "  -f FNAME, --file FNAME\n"); |     printf("  -f FNAME, --file FNAME\n"); | ||||||
|     fprintf(stdout, "                        prompt file to start generation.\n"); |     printf("                        prompt file to start generation.\n"); | ||||||
|     fprintf(stdout, "  -n N, --n-predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); |     printf("  -n N, --n-predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); | ||||||
|     fprintf(stdout, "  -c N, --ctx-size N    size of the prompt context (default: %d)\n", params.n_ctx); |     printf("  -c N, --ctx-size N    size of the prompt context (default: %d)\n", params.n_ctx); | ||||||
|     fprintf(stdout, "  -b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch); |     printf("  -b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch); | ||||||
|     fprintf(stdout, "  --top-k N             top-k sampling (default: %d, 0 = disabled)\n", params.top_k); |     printf("  --top-k N             top-k sampling (default: %d, 0 = disabled)\n", params.top_k); | ||||||
|     fprintf(stdout, "  --top-p N             top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); |     printf("  --top-p N             top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); | ||||||
|     fprintf(stdout, "  --tfs N               tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); |     printf("  --tfs N               tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); | ||||||
|     fprintf(stdout, "  --typical N           locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); |     printf("  --typical N           locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); | ||||||
|     fprintf(stdout, "  --repeat-last-n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); |     printf("  --repeat-last-n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); | ||||||
|     fprintf(stdout, "  --repeat-penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); |     printf("  --repeat-penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); | ||||||
|     fprintf(stdout, "  --presence-penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); |     printf("  --presence-penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); | ||||||
|     fprintf(stdout, "  --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); |     printf("  --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); | ||||||
|     fprintf(stdout, "  --mirostat N          use Mirostat sampling.\n"); |     printf("  --mirostat N          use Mirostat sampling.\n"); | ||||||
|     fprintf(stdout, "                        Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"); |     printf("                        Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"); | ||||||
|     fprintf(stdout, "                        (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); |     printf("                        (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); | ||||||
|     fprintf(stdout, "  --mirostat-lr N       Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); |     printf("  --mirostat-lr N       Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); | ||||||
|     fprintf(stdout, "  --mirostat-ent N      Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); |     printf("  --mirostat-ent N      Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); | ||||||
|     fprintf(stdout, "  -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n"); |     printf("  -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n"); | ||||||
|     fprintf(stdout, "                        modifies the likelihood of token appearing in the completion,\n"); |     printf("                        modifies the likelihood of token appearing in the completion,\n"); | ||||||
|     fprintf(stdout, "                        i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"); |     printf("                        i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"); | ||||||
|     fprintf(stdout, "                        or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); |     printf("                        or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); | ||||||
|     fprintf(stdout, "  --grammar GRAMMAR     BNF-like grammar to constrain generations (see samples in grammars/ dir)\n"); |     printf("  --grammar GRAMMAR     BNF-like grammar to constrain generations (see samples in grammars/ dir)\n"); | ||||||
|     fprintf(stdout, "  --grammar-file FNAME  file to read grammar from\n"); |     printf("  --grammar-file FNAME  file to read grammar from\n"); | ||||||
|     fprintf(stdout, "  --cfg-negative-prompt PROMPT\n"); |     printf("  --cfg-negative-prompt PROMPT\n"); | ||||||
|     fprintf(stdout, "                        negative prompt to use for guidance. (default: empty)\n"); |     printf("                        negative prompt to use for guidance. (default: empty)\n"); | ||||||
|     fprintf(stdout, "  --cfg-negative-prompt-file FNAME\n"); |     printf("  --cfg-negative-prompt-file FNAME\n"); | ||||||
|     fprintf(stdout, "                        negative prompt file to use for guidance. (default: empty)\n"); |     printf("                        negative prompt file to use for guidance. (default: empty)\n"); | ||||||
|     fprintf(stdout, "  --cfg-scale N         strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); |     printf("  --cfg-scale N         strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); | ||||||
|     fprintf(stdout, "  --rope-scale N        RoPE context linear scaling factor, inverse of --rope-freq-scale (default: %g)\n", 1.0f/params.rope_freq_scale); |     printf("  --rope-scale N        RoPE context linear scaling factor, inverse of --rope-freq-scale (default: %g)\n", 1.0f/params.rope_freq_scale); | ||||||
|     fprintf(stdout, "  --rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: %.1f)\n", params.rope_freq_base); |     printf("  --rope-freq-base N    RoPE base frequency, used by NTK-aware scaling (default: %.1f)\n", params.rope_freq_base); | ||||||
|     fprintf(stdout, "  --rope-freq-scale N   RoPE frequency linear scaling factor, inverse of --rope-scale (default: %g)\n", params.rope_freq_scale); |     printf("  --rope-freq-scale N   RoPE frequency linear scaling factor, inverse of --rope-scale (default: %g)\n", params.rope_freq_scale); | ||||||
|     fprintf(stdout, "  --ignore-eos          ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); |     printf("  --ignore-eos          ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); | ||||||
|     fprintf(stdout, "  --no-penalize-nl      do not penalize newline token\n"); |     printf("  --no-penalize-nl      do not penalize newline token\n"); | ||||||
|     fprintf(stdout, "  --memory-f32          use f32 instead of f16 for memory key+value (default: disabled)\n"); |     printf("  --memory-f32          use f32 instead of f16 for memory key+value (default: disabled)\n"); | ||||||
|     fprintf(stdout, "                        not recommended: doubles context memory required and no measurable increase in quality\n"); |     printf("                        not recommended: doubles context memory required and no measurable increase in quality\n"); | ||||||
|     fprintf(stdout, "  --temp N              temperature (default: %.1f)\n", (double)params.temp); |     printf("  --temp N              temperature (default: %.1f)\n", (double)params.temp); | ||||||
|     fprintf(stdout, "  --perplexity          compute perplexity over each ctx window of the prompt\n"); |     printf("  --perplexity          compute perplexity over each ctx window of the prompt\n"); | ||||||
|     fprintf(stdout, "  --hellaswag           compute HellaSwag score over random tasks from datafile supplied with -f\n"); |     printf("  --hellaswag           compute HellaSwag score over random tasks from datafile supplied with -f\n"); | ||||||
|     fprintf(stdout, "  --hellaswag-tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); |     printf("  --hellaswag-tasks N   number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); | ||||||
|     fprintf(stdout, "  --keep N              number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); |     printf("  --keep N              number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); | ||||||
|     fprintf(stdout, "  --draft N             number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft); |     printf("  --draft N             number of tokens to draft for speculative decoding (default: %d)\n", params.n_draft); | ||||||
|     fprintf(stdout, "  --chunks N            max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); |     printf("  --chunks N            max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); | ||||||
|     if (llama_mlock_supported()) { |     if (llama_mlock_supported()) { | ||||||
|         fprintf(stdout, "  --mlock               force system to keep model in RAM rather than swapping or compressing\n"); |         printf("  --mlock               force system to keep model in RAM rather than swapping or compressing\n"); | ||||||
|     } |     } | ||||||
|     if (llama_mmap_supported()) { |     if (llama_mmap_supported()) { | ||||||
|         fprintf(stdout, "  --no-mmap             do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); |         printf("  --no-mmap             do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); | ||||||
|     } |     } | ||||||
|     fprintf(stdout, "  --numa                attempt optimizations that help on some NUMA systems\n"); |     printf("  --numa                attempt optimizations that help on some NUMA systems\n"); | ||||||
|     fprintf(stdout, "                        if run without this previously, it is recommended to drop the system page cache before using this\n"); |     printf("                        if run without this previously, it is recommended to drop the system page cache before using this\n"); | ||||||
|     fprintf(stdout, "                        see https://github.com/ggerganov/llama.cpp/issues/1437\n"); |     printf("                        see https://github.com/ggerganov/llama.cpp/issues/1437\n"); | ||||||
| #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD | #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD | ||||||
|     fprintf(stdout, "  -ngl N, --n-gpu-layers N\n"); |     printf("  -ngl N, --n-gpu-layers N\n"); | ||||||
|     fprintf(stdout, "                        number of layers to store in VRAM\n"); |     printf("                        number of layers to store in VRAM\n"); | ||||||
|     fprintf(stdout, "  -ts SPLIT --tensor-split SPLIT\n"); |     printf("  -ts SPLIT --tensor-split SPLIT\n"); | ||||||
|     fprintf(stdout, "                        how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); |     printf("                        how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); | ||||||
|     fprintf(stdout, "  -mg i, --main-gpu i   the GPU to use for scratch and small tensors\n"); |     printf("  -mg i, --main-gpu i   the GPU to use for scratch and small tensors\n"); | ||||||
|     fprintf(stdout, "  -lv, --low-vram       don't allocate VRAM scratch buffer\n"); |     printf("  -lv, --low-vram       don't allocate VRAM scratch buffer\n"); | ||||||
| #ifdef GGML_USE_CUBLAS | #ifdef GGML_USE_CUBLAS | ||||||
|     fprintf(stdout, "  -nommq, --no-mul-mat-q\n"); |     printf("  -nommq, --no-mul-mat-q\n"); | ||||||
|     fprintf(stdout, "                        use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.\n"); |     printf("                        use " GGML_CUBLAS_NAME " instead of custom mul_mat_q " GGML_CUDA_NAME " kernels.\n"); | ||||||
|     fprintf(stdout, "                        Not recommended since this is both slower and uses more VRAM.\n"); |     printf("                        Not recommended since this is both slower and uses more VRAM.\n"); | ||||||
| #endif // GGML_USE_CUBLAS
 | #endif // GGML_USE_CUBLAS
 | ||||||
| #endif | #endif | ||||||
|     fprintf(stdout, "  --mtest               compute maximum memory usage\n"); |     printf("  --mtest               compute maximum memory usage\n"); | ||||||
|     fprintf(stdout, "  --export              export the computation graph to 'llama.ggml'\n"); |     printf("  --export              export the computation graph to 'llama.ggml'\n"); | ||||||
|     fprintf(stdout, "  --verbose-prompt      print prompt before generation\n"); |     printf("  --verbose-prompt      print prompt before generation\n"); | ||||||
|     fprintf(stderr, "  --simple-io           use basic IO for better compatibility in subprocesses and limited consoles\n"); |     fprintf(stderr, "  --simple-io           use basic IO for better compatibility in subprocesses and limited consoles\n"); | ||||||
|     fprintf(stdout, "  --lora FNAME          apply LoRA adapter (implies --no-mmap)\n"); |     printf("  --lora FNAME          apply LoRA adapter (implies --no-mmap)\n"); | ||||||
|     fprintf(stdout, "  --lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n"); |     printf("  --lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n"); | ||||||
|     fprintf(stdout, "  -m FNAME, --model FNAME\n"); |     printf("  -m FNAME, --model FNAME\n"); | ||||||
|     fprintf(stdout, "                        model path (default: %s)\n", params.model.c_str()); |     printf("                        model path (default: %s)\n", params.model.c_str()); | ||||||
|     fprintf(stdout, "  -md FNAME, --model-draft FNAME\n"); |     printf("  -md FNAME, --model-draft FNAME\n"); | ||||||
|     fprintf(stdout, "                        draft model for speculative decoding (default: %s)\n", params.model.c_str()); |     printf("                        draft model for speculative decoding (default: %s)\n", params.model.c_str()); | ||||||
|     fprintf(stdout, "  -ld LOGDIR, --logdir LOGDIR\n"); |     printf("  -ld LOGDIR, --logdir LOGDIR\n"); | ||||||
|     fprintf(stdout, "                        path under which to save YAML logs (no logging if unset)\n"); |     printf("                        path under which to save YAML logs (no logging if unset)\n"); | ||||||
|     fprintf(stdout, "\n"); |     printf("\n"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::string gpt_random_prompt(std::mt19937 & rng) { | std::string gpt_random_prompt(std::mt19937 & rng) { | ||||||
|  |  | ||||||
							
								
								
									
										16
									
								
								common/log.h
									
										
									
									
									
								
							
							
						
						
									
										16
									
								
								common/log.h
									
										
									
									
									
								
							|  | @ -513,16 +513,16 @@ inline bool log_param_pair_parse(bool check_but_dont_parse, const std::string & | ||||||
| 
 | 
 | ||||||
| inline void log_print_usage() | inline void log_print_usage() | ||||||
| { | { | ||||||
|     fprintf(stdout, "log options:\n"); |     printf("log options:\n"); | ||||||
|     /* format
 |     /* format
 | ||||||
|     fprintf(stdout, "  -h, --help            show this help message and exit\n");*/ |     printf("  -h, --help            show this help message and exit\n");*/ | ||||||
|     /* spacing
 |     /* spacing
 | ||||||
|     fprintf(stdout, "__-param----------------Description\n");*/ |     printf("__-param----------------Description\n");*/ | ||||||
|     fprintf(stdout, "  --log-test            Run simple logging test\n"); |     printf("  --log-test            Run simple logging test\n"); | ||||||
|     fprintf(stdout, "  --log-disable         Disable trace logs\n"); |     printf("  --log-disable         Disable trace logs\n"); | ||||||
|     fprintf(stdout, "  --log-enable          Enable trace logs\n"); |     printf("  --log-enable          Enable trace logs\n"); | ||||||
|     fprintf(stdout, "  --log-file            Specify a log filename (without extension)\n"); |     printf("  --log-file            Specify a log filename (without extension)\n"); | ||||||
|     fprintf(stdout, "                        Log file will be tagged with unique ID and written as \"<name>.<ID>.log\"\n"); /*  */ |     printf("                        Log file will be tagged with unique ID and written as \"<name>.<ID>.log\"\n"); /*  */ | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #define log_dump_cmdline(argc, argv) log_dump_cmdline_impl(argc, argv) | #define log_dump_cmdline(argc, argv) log_dump_cmdline_impl(argc, argv) | ||||||
|  |  | ||||||
|  | @ -76,7 +76,7 @@ bool gguf_ex_write(const std::string & fname) { | ||||||
| 
 | 
 | ||||||
|     gguf_write_to_file(ctx, fname.c_str(), false); |     gguf_write_to_file(ctx, fname.c_str(), false); | ||||||
| 
 | 
 | ||||||
|     fprintf(stdout, "%s: wrote file '%s;\n", __func__, fname.c_str()); |     printf("%s: wrote file '%s;\n", __func__, fname.c_str()); | ||||||
| 
 | 
 | ||||||
|     ggml_free(ctx_data); |     ggml_free(ctx_data); | ||||||
|     gguf_free(ctx); |     gguf_free(ctx); | ||||||
|  | @ -93,20 +93,20 @@ bool gguf_ex_read_0(const std::string & fname) { | ||||||
| 
 | 
 | ||||||
|     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); |     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); | ||||||
| 
 | 
 | ||||||
|     fprintf(stdout, "%s: version:      %d\n", __func__, gguf_get_version(ctx)); |     printf("%s: version:      %d\n", __func__, gguf_get_version(ctx)); | ||||||
|     fprintf(stdout, "%s: alignment:   %zu\n", __func__, gguf_get_alignment(ctx)); |     printf("%s: alignment:   %zu\n", __func__, gguf_get_alignment(ctx)); | ||||||
|     fprintf(stdout, "%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx)); |     printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx)); | ||||||
| 
 | 
 | ||||||
|     // kv
 |     // kv
 | ||||||
|     { |     { | ||||||
|         const int n_kv = gguf_get_n_kv(ctx); |         const int n_kv = gguf_get_n_kv(ctx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv); |         printf("%s: n_kv: %d\n", __func__, n_kv); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_kv; ++i) { |         for (int i = 0; i < n_kv; ++i) { | ||||||
|             const char * key = gguf_get_key(ctx, i); |             const char * key = gguf_get_key(ctx, i); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key); |             printf("%s: kv[%d]: key = %s\n", __func__, i, key); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -116,10 +116,10 @@ bool gguf_ex_read_0(const std::string & fname) { | ||||||
| 
 | 
 | ||||||
|         const int keyidx = gguf_find_key(ctx, findkey); |         const int keyidx = gguf_find_key(ctx, findkey); | ||||||
|         if (keyidx == -1) { |         if (keyidx == -1) { | ||||||
|             fprintf(stdout, "%s: find key: %s not found.\n", __func__, findkey); |             printf("%s: find key: %s not found.\n", __func__, findkey); | ||||||
|         } else { |         } else { | ||||||
|             const char * key_value = gguf_get_val_str(ctx, keyidx); |             const char * key_value = gguf_get_val_str(ctx, keyidx); | ||||||
|             fprintf(stdout, "%s: find key: %s found, kv[%d] value = %s\n", __func__, findkey, keyidx, key_value); |             printf("%s: find key: %s found, kv[%d] value = %s\n", __func__, findkey, keyidx, key_value); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -127,13 +127,13 @@ bool gguf_ex_read_0(const std::string & fname) { | ||||||
|     { |     { | ||||||
|         const int n_tensors = gguf_get_n_tensors(ctx); |         const int n_tensors = gguf_get_n_tensors(ctx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); |         printf("%s: n_tensors: %d\n", __func__, n_tensors); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_tensors; ++i) { |         for (int i = 0; i < n_tensors; ++i) { | ||||||
|             const char * name   = gguf_get_tensor_name  (ctx, i); |             const char * name   = gguf_get_tensor_name  (ctx, i); | ||||||
|             const size_t offset = gguf_get_tensor_offset(ctx, i); |             const size_t offset = gguf_get_tensor_offset(ctx, i); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); |             printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -153,20 +153,20 @@ bool gguf_ex_read_1(const std::string & fname) { | ||||||
| 
 | 
 | ||||||
|     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); |     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params); | ||||||
| 
 | 
 | ||||||
|     fprintf(stdout, "%s: version:      %d\n", __func__, gguf_get_version(ctx)); |     printf("%s: version:      %d\n", __func__, gguf_get_version(ctx)); | ||||||
|     fprintf(stdout, "%s: alignment:   %zu\n", __func__, gguf_get_alignment(ctx)); |     printf("%s: alignment:   %zu\n", __func__, gguf_get_alignment(ctx)); | ||||||
|     fprintf(stdout, "%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx)); |     printf("%s: data offset: %zu\n", __func__, gguf_get_data_offset(ctx)); | ||||||
| 
 | 
 | ||||||
|     // kv
 |     // kv
 | ||||||
|     { |     { | ||||||
|         const int n_kv = gguf_get_n_kv(ctx); |         const int n_kv = gguf_get_n_kv(ctx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv); |         printf("%s: n_kv: %d\n", __func__, n_kv); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_kv; ++i) { |         for (int i = 0; i < n_kv; ++i) { | ||||||
|             const char * key = gguf_get_key(ctx, i); |             const char * key = gguf_get_key(ctx, i); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key); |             printf("%s: kv[%d]: key = %s\n", __func__, i, key); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -174,13 +174,13 @@ bool gguf_ex_read_1(const std::string & fname) { | ||||||
|     { |     { | ||||||
|         const int n_tensors = gguf_get_n_tensors(ctx); |         const int n_tensors = gguf_get_n_tensors(ctx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); |         printf("%s: n_tensors: %d\n", __func__, n_tensors); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_tensors; ++i) { |         for (int i = 0; i < n_tensors; ++i) { | ||||||
|             const char * name   = gguf_get_tensor_name  (ctx, i); |             const char * name   = gguf_get_tensor_name  (ctx, i); | ||||||
|             const size_t offset = gguf_get_tensor_offset(ctx, i); |             const size_t offset = gguf_get_tensor_offset(ctx, i); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); |             printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -189,13 +189,13 @@ bool gguf_ex_read_1(const std::string & fname) { | ||||||
|         const int n_tensors = gguf_get_n_tensors(ctx); |         const int n_tensors = gguf_get_n_tensors(ctx); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_tensors; ++i) { |         for (int i = 0; i < n_tensors; ++i) { | ||||||
|             fprintf(stdout, "%s: reading tensor %d data\n", __func__, i); |             printf("%s: reading tensor %d data\n", __func__, i); | ||||||
| 
 | 
 | ||||||
|             const char * name = gguf_get_tensor_name(ctx, i); |             const char * name = gguf_get_tensor_name(ctx, i); | ||||||
| 
 | 
 | ||||||
|             struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name); |             struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, cur->n_dims, cur->name, cur->data); |             printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, cur->n_dims, cur->name, cur->data); | ||||||
| 
 | 
 | ||||||
|             // print first 10 elements
 |             // print first 10 elements
 | ||||||
|             const float * data = (const float *) cur->data; |             const float * data = (const float *) cur->data; | ||||||
|  | @ -219,7 +219,7 @@ bool gguf_ex_read_1(const std::string & fname) { | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fprintf(stdout, "%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data)); |     printf("%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data)); | ||||||
| 
 | 
 | ||||||
|     ggml_free(ctx_data); |     ggml_free(ctx_data); | ||||||
|     gguf_free(ctx); |     gguf_free(ctx); | ||||||
|  | @ -229,7 +229,7 @@ bool gguf_ex_read_1(const std::string & fname) { | ||||||
| 
 | 
 | ||||||
| int main(int argc, char ** argv) { | int main(int argc, char ** argv) { | ||||||
|     if (argc < 3) { |     if (argc < 3) { | ||||||
|         fprintf(stdout, "usage: %s data.gguf r|w\n", argv[0]); |         printf("usage: %s data.gguf r|w\n", argv[0]); | ||||||
|         return -1; |         return -1; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -305,9 +305,9 @@ struct ggml_tensor * get_tensor_ex( struct ggml_context * ctx, std::string name) | ||||||
| 
 | 
 | ||||||
|     struct ggml_tensor * cur = ggml_get_tensor(ctx, name.c_str()); |     struct ggml_tensor * cur = ggml_get_tensor(ctx, name.c_str()); | ||||||
|     if( cur == NULL ) { |     if( cur == NULL ) { | ||||||
|         fprintf(stdout, "%s: tensor '%s' not found!\n", __func__, name.c_str()); |         printf("%s: tensor '%s' not found!\n", __func__, name.c_str()); | ||||||
|     } else { |     } else { | ||||||
| //        fprintf(stdout, "%s: n_dims = %d, name = '%s'\n", __func__, cur->n_dims, cur->name);
 | //        printf("%s: n_dims = %d, name = '%s'\n", __func__, cur->n_dims, cur->name);
 | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return cur; |     return cur; | ||||||
|  | @ -333,21 +333,21 @@ bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_ | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fprintf(stdout, "%s: gguf version     = %d\n", __func__, gguf_get_version(ggufctx)); |     printf("%s: gguf version     = %d\n", __func__, gguf_get_version(ggufctx)); | ||||||
|     fprintf(stdout, "%s: gguf alignment   = %zu\n", __func__, gguf_get_alignment(ggufctx)); |     printf("%s: gguf alignment   = %zu\n", __func__, gguf_get_alignment(ggufctx)); | ||||||
|     fprintf(stdout, "%s: gguf data offset = %zu\n", __func__, gguf_get_data_offset(ggufctx)); |     printf("%s: gguf data offset = %zu\n", __func__, gguf_get_data_offset(ggufctx)); | ||||||
| 
 | 
 | ||||||
|     // print all kv
 |     // print all kv
 | ||||||
|     #if 0 |     #if 0 | ||||||
|     { |     { | ||||||
|         const int n_kv = gguf_get_n_kv(ggufctx); |         const int n_kv = gguf_get_n_kv(ggufctx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv); |         printf("%s: n_kv: %d\n", __func__, n_kv); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_kv; ++i) { |         for (int i = 0; i < n_kv; ++i) { | ||||||
|             const char * key = gguf_get_key(ggufctx, i); |             const char * key = gguf_get_key(ggufctx, i); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key); |             printf("%s: kv[%d]: key = %s\n", __func__, i, key); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     #endif |     #endif | ||||||
|  | @ -357,21 +357,21 @@ bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_ | ||||||
|         int keyidx; |         int keyidx; | ||||||
| 
 | 
 | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.name"); |         keyidx = gguf_find_key(ggufctx, "general.name"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model name           = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model name           = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.description"); |         keyidx = gguf_find_key(ggufctx, "general.description"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model description    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model description    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.author"); |         keyidx = gguf_find_key(ggufctx, "general.author"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model author         = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model author         = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.license"); |         keyidx = gguf_find_key(ggufctx, "general.license"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model license        = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model license        = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.architecture"); |         keyidx = gguf_find_key(ggufctx, "general.architecture"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model architecture   = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model architecture   = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.file_type"); |         keyidx = gguf_find_key(ggufctx, "general.file_type"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model file type      = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model file type      = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "gptneox.tensor_data_layout"); |         keyidx = gguf_find_key(ggufctx, "gptneox.tensor_data_layout"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model data layout    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model data layout    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.source.hugginface.repository"); |         keyidx = gguf_find_key(ggufctx, "general.source.hugginface.repository"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model source HF repo = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model source HF repo = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // check required metadata
 |     // check required metadata
 | ||||||
|  | @ -382,11 +382,11 @@ bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_ | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.architecture"); |         keyidx = gguf_find_key(ggufctx, "general.architecture"); | ||||||
|         if (keyidx != -1) { |         if (keyidx != -1) { | ||||||
|             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "falcon") != 0) { |             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "falcon") != 0) { | ||||||
|                 fprintf(stdout, "%s: model architecture not supported!\n", __func__); |                 printf("%s: model architecture not supported!\n", __func__); | ||||||
|                 return false; |                 return false; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             fprintf(stdout, "%s: gguf model architecture not found!\n", __func__); |             printf("%s: gguf model architecture not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -394,11 +394,11 @@ bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_ | ||||||
|         keyidx = gguf_find_key(ggufctx, "falcon.tensor_data_layout"); |         keyidx = gguf_find_key(ggufctx, "falcon.tensor_data_layout"); | ||||||
|         if (keyidx != -1) { |         if (keyidx != -1) { | ||||||
|             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "jploski") != 0) { |             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "jploski") != 0) { | ||||||
|                 fprintf(stdout, "%s: model tensor data layout not supported!\n", __func__); |                 printf("%s: model tensor data layout not supported!\n", __func__); | ||||||
|                 return false; |                 return false; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             fprintf(stdout, "%s: gguf model tensor data layout not found!\n", __func__); |             printf("%s: gguf model tensor data layout not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -455,11 +455,11 @@ bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_ | ||||||
| 
 | 
 | ||||||
|         if (keyidx != -1) { |         if (keyidx != -1) { | ||||||
|             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "gpt2") != 0) { |             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "gpt2") != 0) { | ||||||
|                 fprintf(stdout, "%s: tokenizer model not supported!\n", __func__); |                 printf("%s: tokenizer model not supported!\n", __func__); | ||||||
|                 return false; |                 return false; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             fprintf(stdout, "%s: tokenizer model not found!\n", __func__); |             printf("%s: tokenizer model not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -467,22 +467,22 @@ bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_ | ||||||
|         int tokens_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.tokens"); |         int tokens_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.tokens"); | ||||||
| 
 | 
 | ||||||
|         if (tokens_keyidx == -1) { |         if (tokens_keyidx == -1) { | ||||||
|             fprintf(stdout, "%s: gpt2 tokenizer vocab not found!\n", __func__); |             printf("%s: gpt2 tokenizer vocab not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         int merges_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.merges"); |         int merges_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.merges"); | ||||||
| 
 | 
 | ||||||
|         if (merges_keyidx == -1) { |         if (merges_keyidx == -1) { | ||||||
|             fprintf(stdout, "%s: gpt2 tokenizer merges not found!\n", __func__); |             printf("%s: gpt2 tokenizer merges not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         hparams.n_vocab = gguf_get_arr_n(ggufctx,tokens_keyidx); |         hparams.n_vocab = gguf_get_arr_n(ggufctx,tokens_keyidx); | ||||||
|         hparams.n_merges = gguf_get_arr_n(ggufctx,merges_keyidx); |         hparams.n_merges = gguf_get_arr_n(ggufctx,merges_keyidx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: gpt2 tokenizer vocab  = %zu\n", __func__, hparams.n_vocab); |         printf("%s: gpt2 tokenizer vocab  = %zu\n", __func__, hparams.n_vocab); | ||||||
|         fprintf(stdout, "%s: gpt2 tokenizer merges = %zu\n", __func__, hparams.n_merges); |         printf("%s: gpt2 tokenizer merges = %zu\n", __func__, hparams.n_merges); | ||||||
| 
 | 
 | ||||||
|         for (size_t i = 0; i < hparams.n_vocab; i++) { |         for (size_t i = 0; i < hparams.n_vocab; i++) { | ||||||
|             std::string word = gguf_get_arr_str(ggufctx, tokens_keyidx, i); |             std::string word = gguf_get_arr_str(ggufctx, tokens_keyidx, i); | ||||||
|  | @ -523,12 +523,12 @@ bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_ | ||||||
|         keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.separator_token_id"); if( keyidx != -1 ) { vocab.special_sep_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); } |         keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.separator_token_id"); if( keyidx != -1 ) { vocab.special_sep_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.padding_token_id"); if( keyidx != -1 ) {   vocab.special_pad_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); } |         keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.padding_token_id"); if( keyidx != -1 ) {   vocab.special_pad_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); } | ||||||
| 
 | 
 | ||||||
|         if( vocab.special_bos_id != -1 ) { fprintf(stdout, "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].c_str() ); } |         if( vocab.special_bos_id != -1 ) { printf("%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].c_str() ); } | ||||||
|         if( vocab.special_eos_id != -1 ) { fprintf(stdout, "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].c_str() ); } |         if( vocab.special_eos_id != -1 ) { printf("%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].c_str() ); } | ||||||
|         if( vocab.special_unk_id != -1 ) { fprintf(stdout, "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].c_str() ); } |         if( vocab.special_unk_id != -1 ) { printf("%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].c_str() ); } | ||||||
|         if( vocab.special_sep_id != -1 ) { fprintf(stdout, "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].c_str() ); } |         if( vocab.special_sep_id != -1 ) { printf("%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].c_str() ); } | ||||||
|         if( vocab.special_pad_id != -1 ) { fprintf(stdout, "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].c_str() ); } |         if( vocab.special_pad_id != -1 ) { printf("%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].c_str() ); } | ||||||
|         if( vocab.linefeed_id    != -1 ) { fprintf(stdout, "%s: LF token  = %d\n",      __func__, vocab.linefeed_id ); } |         if( vocab.linefeed_id    != -1 ) { printf("%s: LF token  = %d\n",      __func__, vocab.linefeed_id ); } | ||||||
| 
 | 
 | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -543,13 +543,13 @@ bool falcon_model_load(const std::string & fname, falcon_model & model, gpt2bpe_ | ||||||
|     { |     { | ||||||
|         const int n_tensors = gguf_get_n_tensors(ggufctx); |         const int n_tensors = gguf_get_n_tensors(ggufctx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); |         printf("%s: n_tensors: %d\n", __func__, n_tensors); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_tensors; ++i) { |         for (int i = 0; i < n_tensors; ++i) { | ||||||
|             const char * name   = gguf_get_tensor_name  (ggufctx, i); |             const char * name   = gguf_get_tensor_name  (ggufctx, i); | ||||||
|             const size_t offset = gguf_get_tensor_offset(ggufctx, i); |             const size_t offset = gguf_get_tensor_offset(ggufctx, i); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); |             printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     #endif |     #endif | ||||||
|  |  | ||||||
|  | @ -318,9 +318,9 @@ struct ggml_tensor * get_tensor_ex( struct ggml_context * ctx, std::string name) | ||||||
| 
 | 
 | ||||||
|     struct ggml_tensor * cur = ggml_get_tensor(ctx, name.c_str()); |     struct ggml_tensor * cur = ggml_get_tensor(ctx, name.c_str()); | ||||||
|     if( cur == NULL ) { |     if( cur == NULL ) { | ||||||
|         fprintf(stdout, "%s: tensor '%s' not found!\n", __func__, name.c_str()); |         printf("%s: tensor '%s' not found!\n", __func__, name.c_str()); | ||||||
|     } else { |     } else { | ||||||
| //        fprintf(stdout, "%s: n_dims = %d, name = '%s'\n", __func__, cur->n_dims, cur->name);
 | //        printf("%s: n_dims = %d, name = '%s'\n", __func__, cur->n_dims, cur->name);
 | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return cur; |     return cur; | ||||||
|  | @ -346,21 +346,21 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2 | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fprintf(stdout, "%s: gguf version     = %d\n", __func__, gguf_get_version(ggufctx)); |     printf("%s: gguf version     = %d\n", __func__, gguf_get_version(ggufctx)); | ||||||
|     fprintf(stdout, "%s: gguf alignment   = %zu\n", __func__, gguf_get_alignment(ggufctx)); |     printf("%s: gguf alignment   = %zu\n", __func__, gguf_get_alignment(ggufctx)); | ||||||
|     fprintf(stdout, "%s: gguf data offset = %zu\n", __func__, gguf_get_data_offset(ggufctx)); |     printf("%s: gguf data offset = %zu\n", __func__, gguf_get_data_offset(ggufctx)); | ||||||
| 
 | 
 | ||||||
|     // print all kv
 |     // print all kv
 | ||||||
|     #if 0 |     #if 0 | ||||||
|     { |     { | ||||||
|         const int n_kv = gguf_get_n_kv(ggufctx); |         const int n_kv = gguf_get_n_kv(ggufctx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: n_kv: %d\n", __func__, n_kv); |         printf("%s: n_kv: %d\n", __func__, n_kv); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_kv; ++i) { |         for (int i = 0; i < n_kv; ++i) { | ||||||
|             const char * key = gguf_get_key(ggufctx, i); |             const char * key = gguf_get_key(ggufctx, i); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: kv[%d]: key = %s\n", __func__, i, key); |             printf("%s: kv[%d]: key = %s\n", __func__, i, key); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     #endif |     #endif | ||||||
|  | @ -370,21 +370,21 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2 | ||||||
|         int keyidx; |         int keyidx; | ||||||
| 
 | 
 | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.name"); |         keyidx = gguf_find_key(ggufctx, "general.name"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model name           = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model name           = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.description"); |         keyidx = gguf_find_key(ggufctx, "general.description"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model description    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model description    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.author"); |         keyidx = gguf_find_key(ggufctx, "general.author"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model author         = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model author         = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.license"); |         keyidx = gguf_find_key(ggufctx, "general.license"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model license        = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model license        = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.architecture"); |         keyidx = gguf_find_key(ggufctx, "general.architecture"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model architecture   = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model architecture   = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.file_type"); |         keyidx = gguf_find_key(ggufctx, "general.file_type"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model file type      = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model file type      = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "gptneox.tensor_data_layout"); |         keyidx = gguf_find_key(ggufctx, "gptneox.tensor_data_layout"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model data layout    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model data layout    = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.source.hugginface.repository"); |         keyidx = gguf_find_key(ggufctx, "general.source.hugginface.repository"); | ||||||
|         if (keyidx != -1) { fprintf(stdout, "%s: model source HF repo = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } |         if (keyidx != -1) { printf("%s: model source HF repo = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // check required metadata
 |     // check required metadata
 | ||||||
|  | @ -395,11 +395,11 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2 | ||||||
|         keyidx = gguf_find_key(ggufctx, "general.architecture"); |         keyidx = gguf_find_key(ggufctx, "general.architecture"); | ||||||
|         if (keyidx != -1) { |         if (keyidx != -1) { | ||||||
|             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "gptneox") != 0) { |             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "gptneox") != 0) { | ||||||
|                 fprintf(stdout, "%s: model architecture not supported!\n", __func__); |                 printf("%s: model architecture not supported!\n", __func__); | ||||||
|                 return false; |                 return false; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             fprintf(stdout, "%s: gguf model architecture not found!\n", __func__); |             printf("%s: gguf model architecture not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -456,11 +456,11 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2 | ||||||
| 
 | 
 | ||||||
|         if (keyidx != -1) { |         if (keyidx != -1) { | ||||||
|             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "gpt2") != 0) { |             if ( strcmp(gguf_get_val_str(ggufctx, keyidx), "gpt2") != 0) { | ||||||
|                 fprintf(stdout, "%s: tokenizer model not supported!\n", __func__); |                 printf("%s: tokenizer model not supported!\n", __func__); | ||||||
|                 return false; |                 return false; | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             fprintf(stdout, "%s: tokenizer model not found!\n", __func__); |             printf("%s: tokenizer model not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  | @ -468,22 +468,22 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2 | ||||||
|         int tokens_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.tokens"); |         int tokens_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.tokens"); | ||||||
| 
 | 
 | ||||||
|         if (tokens_keyidx == -1) { |         if (tokens_keyidx == -1) { | ||||||
|             fprintf(stdout, "%s: gpt2 tokenizer vocab not found!\n", __func__); |             printf("%s: gpt2 tokenizer vocab not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         int merges_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.merges"); |         int merges_keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.merges"); | ||||||
| 
 | 
 | ||||||
|         if (merges_keyidx == -1) { |         if (merges_keyidx == -1) { | ||||||
|             fprintf(stdout, "%s: gpt2 tokenizer merges not found!\n", __func__); |             printf("%s: gpt2 tokenizer merges not found!\n", __func__); | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         hparams.n_vocab = gguf_get_arr_n(ggufctx,tokens_keyidx); |         hparams.n_vocab = gguf_get_arr_n(ggufctx,tokens_keyidx); | ||||||
|         hparams.n_merges = gguf_get_arr_n(ggufctx,merges_keyidx); |         hparams.n_merges = gguf_get_arr_n(ggufctx,merges_keyidx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: gpt2 tokenizer vocab  = %zu\n", __func__, hparams.n_vocab); |         printf("%s: gpt2 tokenizer vocab  = %zu\n", __func__, hparams.n_vocab); | ||||||
|         fprintf(stdout, "%s: gpt2 tokenizer merges = %zu\n", __func__, hparams.n_merges); |         printf("%s: gpt2 tokenizer merges = %zu\n", __func__, hparams.n_merges); | ||||||
| 
 | 
 | ||||||
|         for (size_t i = 0; i < hparams.n_vocab; i++) { |         for (size_t i = 0; i < hparams.n_vocab; i++) { | ||||||
|             std::string word = gguf_get_arr_str(ggufctx, tokens_keyidx, i); |             std::string word = gguf_get_arr_str(ggufctx, tokens_keyidx, i); | ||||||
|  | @ -524,12 +524,12 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2 | ||||||
|         keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.separator_token_id"); if( keyidx != -1 ) { vocab.special_sep_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); } |         keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.separator_token_id"); if( keyidx != -1 ) { vocab.special_sep_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); } | ||||||
|         keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.padding_token_id"); if( keyidx != -1 ) {   vocab.special_pad_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); } |         keyidx = gguf_find_key(ggufctx, "tokenizer.ggml.padding_token_id"); if( keyidx != -1 ) {   vocab.special_pad_id = (int32_t)gguf_get_val_u32(ggufctx, keyidx); } | ||||||
| 
 | 
 | ||||||
|         if( vocab.special_bos_id != -1 ) { fprintf(stdout, "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].c_str() ); } |         if( vocab.special_bos_id != -1 ) { printf("%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].c_str() ); } | ||||||
|         if( vocab.special_eos_id != -1 ) { fprintf(stdout, "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].c_str() ); } |         if( vocab.special_eos_id != -1 ) { printf("%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].c_str() ); } | ||||||
|         if( vocab.special_unk_id != -1 ) { fprintf(stdout, "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].c_str() ); } |         if( vocab.special_unk_id != -1 ) { printf("%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].c_str() ); } | ||||||
|         if( vocab.special_sep_id != -1 ) { fprintf(stdout, "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].c_str() ); } |         if( vocab.special_sep_id != -1 ) { printf("%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].c_str() ); } | ||||||
|         if( vocab.special_pad_id != -1 ) { fprintf(stdout, "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].c_str() ); } |         if( vocab.special_pad_id != -1 ) { printf("%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].c_str() ); } | ||||||
|         if( vocab.linefeed_id    != -1 ) { fprintf(stdout, "%s: LF token  = %d\n",      __func__, vocab.linefeed_id ); } |         if( vocab.linefeed_id    != -1 ) { printf("%s: LF token  = %d\n",      __func__, vocab.linefeed_id ); } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -543,13 +543,13 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2 | ||||||
|     { |     { | ||||||
|         const int n_tensors = gguf_get_n_tensors(ggufctx); |         const int n_tensors = gguf_get_n_tensors(ggufctx); | ||||||
| 
 | 
 | ||||||
|         fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors); |         printf("%s: n_tensors: %d\n", __func__, n_tensors); | ||||||
| 
 | 
 | ||||||
|         for (int i = 0; i < n_tensors; ++i) { |         for (int i = 0; i < n_tensors; ++i) { | ||||||
|             const char * name   = gguf_get_tensor_name  (ggufctx, i); |             const char * name   = gguf_get_tensor_name  (ggufctx, i); | ||||||
|             const size_t offset = gguf_get_tensor_offset(ggufctx, i); |             const size_t offset = gguf_get_tensor_offset(ggufctx, i); | ||||||
| 
 | 
 | ||||||
|             fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); |             printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     #endif |     #endif | ||||||
|  |  | ||||||
|  | @ -165,26 +165,26 @@ static const cmd_params cmd_params_defaults = { | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| static void print_usage(int /* argc */, char ** argv) { | static void print_usage(int /* argc */, char ** argv) { | ||||||
|     fprintf(stdout, "usage: %s [options]\n", argv[0]); |     printf("usage: %s [options]\n", argv[0]); | ||||||
|     fprintf(stdout, "\n"); |     printf("\n"); | ||||||
|     fprintf(stdout, "options:\n"); |     printf("options:\n"); | ||||||
|     fprintf(stdout, "  -h, --help\n"); |     printf("  -h, --help\n"); | ||||||
|     fprintf(stdout, "  -m, --model <filename>            (default: %s)\n", join(cmd_params_defaults.model, ",").c_str()); |     printf("  -m, --model <filename>            (default: %s)\n", join(cmd_params_defaults.model, ",").c_str()); | ||||||
|     fprintf(stdout, "  -p, --n-prompt <n>                (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str()); |     printf("  -p, --n-prompt <n>                (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str()); | ||||||
|     fprintf(stdout, "  -n, --n-gen <n>                   (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str()); |     printf("  -n, --n-gen <n>                   (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str()); | ||||||
|     fprintf(stdout, "  -b, --batch-size <n>              (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str()); |     printf("  -b, --batch-size <n>              (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str()); | ||||||
|     fprintf(stdout, "  --memory-f32 <0|1>                (default: %s)\n", join(cmd_params_defaults.f32_kv, ",").c_str()); |     printf("  --memory-f32 <0|1>                (default: %s)\n", join(cmd_params_defaults.f32_kv, ",").c_str()); | ||||||
|     fprintf(stdout, "  -t, --threads <n>                 (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str()); |     printf("  -t, --threads <n>                 (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str()); | ||||||
|     fprintf(stdout, "  -ngl N, --n-gpu-layers <n>        (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str()); |     printf("  -ngl N, --n-gpu-layers <n>        (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str()); | ||||||
|     fprintf(stdout, "  -mg i, --main-gpu <n>             (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str()); |     printf("  -mg i, --main-gpu <n>             (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str()); | ||||||
|     fprintf(stdout, "  -lv, --low-vram <0|1>             (default: %s)\n", join(cmd_params_defaults.low_vram, ",").c_str()); |     printf("  -lv, --low-vram <0|1>             (default: %s)\n", join(cmd_params_defaults.low_vram, ",").c_str()); | ||||||
|     fprintf(stdout, "  -mmq, --mul-mat-q <0|1>           (default: %s)\n", join(cmd_params_defaults.mul_mat_q, ",").c_str()); |     printf("  -mmq, --mul-mat-q <0|1>           (default: %s)\n", join(cmd_params_defaults.mul_mat_q, ",").c_str()); | ||||||
|     fprintf(stdout, "  -ts, --tensor_split <ts0/ts1/..>               \n"); |     printf("  -ts, --tensor_split <ts0/ts1/..>               \n"); | ||||||
|     fprintf(stdout, "  -r, --repetitions <n>             (default: %d)\n", cmd_params_defaults.reps); |     printf("  -r, --repetitions <n>             (default: %d)\n", cmd_params_defaults.reps); | ||||||
|     fprintf(stdout, "  -o, --output <csv|json|md|sql>    (default: %s)\n", cmd_params_defaults.output_format == CSV ? "csv" : cmd_params_defaults.output_format == JSON ? "json" : cmd_params_defaults.output_format == MARKDOWN ? "md" : "sql"); |     printf("  -o, --output <csv|json|md|sql>    (default: %s)\n", cmd_params_defaults.output_format == CSV ? "csv" : cmd_params_defaults.output_format == JSON ? "json" : cmd_params_defaults.output_format == MARKDOWN ? "md" : "sql"); | ||||||
|     fprintf(stdout, "  -v, --verbose                     (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); |     printf("  -v, --verbose                     (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); | ||||||
|     fprintf(stdout, "\n"); |     printf("\n"); | ||||||
|     fprintf(stdout, "Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n"); |     printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n"); | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -118,7 +118,7 @@ static void server_log(const char *level, const char *function, int line, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace); |     const std::string str = log.dump(-1, ' ', false, json::error_handler_t::replace); | ||||||
|     fprintf(stdout, "%.*s\n", (int)str.size(), str.data()); |     printf("%.*s\n", (int)str.size(), str.data()); | ||||||
|     fflush(stdout); |     fflush(stdout); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -694,50 +694,50 @@ struct llama_server_context | ||||||
| static void server_print_usage(const char *argv0, const gpt_params ¶ms, | static void server_print_usage(const char *argv0, const gpt_params ¶ms, | ||||||
|                                const server_params &sparams) |                                const server_params &sparams) | ||||||
| { | { | ||||||
|     fprintf(stdout, "usage: %s [options]\n", argv0); |     printf("usage: %s [options]\n", argv0); | ||||||
|     fprintf(stdout, "\n"); |     printf("\n"); | ||||||
|     fprintf(stdout, "options:\n"); |     printf("options:\n"); | ||||||
|     fprintf(stdout, "  -h, --help            show this help message and exit\n"); |     printf("  -h, --help            show this help message and exit\n"); | ||||||
|     fprintf(stdout, "  -v, --verbose         verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled"); |     printf("  -v, --verbose         verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled"); | ||||||
|     fprintf(stdout, "  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads); |     printf("  -t N, --threads N     number of threads to use during computation (default: %d)\n", params.n_threads); | ||||||
|     fprintf(stdout, "  -c N, --ctx-size N    size of the prompt context (default: %d)\n", params.n_ctx); |     printf("  -c N, --ctx-size N    size of the prompt context (default: %d)\n", params.n_ctx); | ||||||
|     fprintf(stdout, "  --rope-freq-base N    RoPE base frequency (default: %.1f)\n", params.rope_freq_base); |     printf("  --rope-freq-base N    RoPE base frequency (default: %.1f)\n", params.rope_freq_base); | ||||||
|     fprintf(stdout, "  --rope-freq-scale N   RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale); |     printf("  --rope-freq-scale N   RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale); | ||||||
|     fprintf(stdout, "  -b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch); |     printf("  -b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch); | ||||||
|     fprintf(stdout, "  --memory-f32          use f32 instead of f16 for memory key+value (default: disabled)\n"); |     printf("  --memory-f32          use f32 instead of f16 for memory key+value (default: disabled)\n"); | ||||||
|     fprintf(stdout, "                        not recommended: doubles context memory required and no measurable increase in quality\n"); |     printf("                        not recommended: doubles context memory required and no measurable increase in quality\n"); | ||||||
|     if (llama_mlock_supported()) |     if (llama_mlock_supported()) | ||||||
|     { |     { | ||||||
|         fprintf(stdout, "  --mlock               force system to keep model in RAM rather than swapping or compressing\n"); |         printf("  --mlock               force system to keep model in RAM rather than swapping or compressing\n"); | ||||||
|     } |     } | ||||||
|     if (llama_mmap_supported()) |     if (llama_mmap_supported()) | ||||||
|     { |     { | ||||||
|         fprintf(stdout, "  --no-mmap             do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); |         printf("  --no-mmap             do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); | ||||||
|     } |     } | ||||||
|     fprintf(stdout, "  --numa                attempt optimizations that help on some NUMA systems\n"); |     printf("  --numa                attempt optimizations that help on some NUMA systems\n"); | ||||||
| #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD | #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD | ||||||
|     fprintf(stdout, "  -ngl N, --n-gpu-layers N\n"); |     printf("  -ngl N, --n-gpu-layers N\n"); | ||||||
|     fprintf(stdout, "                        number of layers to store in VRAM\n"); |     printf("                        number of layers to store in VRAM\n"); | ||||||
|     fprintf(stdout, "  -ts SPLIT --tensor-split SPLIT\n"); |     printf("  -ts SPLIT --tensor-split SPLIT\n"); | ||||||
|     fprintf(stdout, "                        how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); |     printf("                        how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); | ||||||
|     fprintf(stdout, "  -mg i, --main-gpu i   the GPU to use for scratch and small tensors\n"); |     printf("  -mg i, --main-gpu i   the GPU to use for scratch and small tensors\n"); | ||||||
|     fprintf(stdout, "  -lv, --low-vram       don't allocate VRAM scratch buffer\n"); |     printf("  -lv, --low-vram       don't allocate VRAM scratch buffer\n"); | ||||||
|     fprintf(stdout, "  -nommq, --no-mul-mat-q\n"); |     printf("  -nommq, --no-mul-mat-q\n"); | ||||||
|     fprintf(stdout, "                        use cuBLAS instead of custom mul_mat_q CUDA kernels.\n"); |     printf("                        use cuBLAS instead of custom mul_mat_q CUDA kernels.\n"); | ||||||
|     fprintf(stdout, "                        Not recommended since this is both slower and uses more VRAM.\n"); |     printf("                        Not recommended since this is both slower and uses more VRAM.\n"); | ||||||
| #endif | #endif | ||||||
|     fprintf(stdout, "  -m FNAME, --model FNAME\n"); |     printf("  -m FNAME, --model FNAME\n"); | ||||||
|     fprintf(stdout, "                        model path (default: %s)\n", params.model.c_str()); |     printf("                        model path (default: %s)\n", params.model.c_str()); | ||||||
|     fprintf(stdout, "  -a ALIAS, --alias ALIAS\n"); |     printf("  -a ALIAS, --alias ALIAS\n"); | ||||||
|     fprintf(stdout, "                        set an alias for the model, will be added as `model` field in completion response\n"); |     printf("                        set an alias for the model, will be added as `model` field in completion response\n"); | ||||||
|     fprintf(stdout, "  --lora FNAME          apply LoRA adapter (implies --no-mmap)\n"); |     printf("  --lora FNAME          apply LoRA adapter (implies --no-mmap)\n"); | ||||||
|     fprintf(stdout, "  --lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n"); |     printf("  --lora-base FNAME     optional model to use as a base for the layers modified by the LoRA adapter\n"); | ||||||
|     fprintf(stdout, "  --host                ip address to listen (default  (default: %s)\n", sparams.hostname.c_str()); |     printf("  --host                ip address to listen (default  (default: %s)\n", sparams.hostname.c_str()); | ||||||
|     fprintf(stdout, "  --port PORT           port to listen (default  (default: %d)\n", sparams.port); |     printf("  --port PORT           port to listen (default  (default: %d)\n", sparams.port); | ||||||
|     fprintf(stdout, "  --path PUBLIC_PATH    path from which to serve static files (default %s)\n", sparams.public_path.c_str()); |     printf("  --path PUBLIC_PATH    path from which to serve static files (default %s)\n", sparams.public_path.c_str()); | ||||||
|     fprintf(stdout, "  -to N, --timeout N    server read/write timeout in seconds (default: %d)\n", sparams.read_timeout); |     printf("  -to N, --timeout N    server read/write timeout in seconds (default: %d)\n", sparams.read_timeout); | ||||||
|     fprintf(stdout, "  --embedding           enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled"); |     printf("  --embedding           enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled"); | ||||||
|     fprintf(stdout, "\n"); |     printf("\n"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void server_params_parse(int argc, char **argv, server_params &sparams, | static void server_params_parse(int argc, char **argv, server_params &sparams, | ||||||
|  | @ -1595,7 +1595,7 @@ int main(int argc, char **argv) | ||||||
|     svr.set_base_dir(sparams.public_path); |     svr.set_base_dir(sparams.public_path); | ||||||
| 
 | 
 | ||||||
|     // to make it ctrl+clickable:
 |     // to make it ctrl+clickable:
 | ||||||
|     fprintf(stdout, "\nllama server listening at http://%s:%d\n\n", sparams.hostname.c_str(), sparams.port); |     printf("\nllama server listening at http://%s:%d\n\n", sparams.hostname.c_str(), sparams.port); | ||||||
| 
 | 
 | ||||||
|     LOG_INFO("HTTP server listening", { |     LOG_INFO("HTTP server listening", { | ||||||
|                                           {"hostname", sparams.hostname}, |                                           {"hostname", sparams.hostname}, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue