llama.cpp : add documentation about rope_freq_base and scale values (#3401)

* llama.cpp : add documentation about rope_freq_base and scale values

* add notice to hot topics
This commit is contained in:
slaren 2023-09-29 18:42:32 +02:00 committed by GitHub
parent bc34dd4f5b
commit 40e07a60f9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 5 deletions

10
llama.h
View file

@ -167,18 +167,18 @@ extern "C" {
struct llama_context_params {
uint32_t seed; // RNG seed, -1 for random
uint32_t n_ctx; // text context
uint32_t n_batch; // prompt processing batch size
uint32_t n_ctx; // text context, 0 = from model
uint32_t n_batch; // prompt processing maximum batch size
uint32_t n_threads; // number of threads to use for generation
uint32_t n_threads_batch; // number of threads to use for batch processing
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
float rope_freq_base; // RoPE base frequency
float rope_freq_scale; // RoPE frequency scaling factor
float rope_freq_base; // RoPE base frequency, 0 = from model
float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model
// Keep the booleans together to avoid misalignment during copy-by-value.
bool mul_mat_q; // if true, use experimental mul_mat_q kernels
bool f16_kv; // use fp16 for KV cache
bool f16_kv; // use fp16 for KV cache, fp32 otherwise
bool logits_all; // the llama_eval() call computes all logits, not just the last one
bool embedding; // embedding mode only
};