remove from llama_context_params

2023-08-06 00:44:29 -04:00 · 2023-08-06 00:44:29 -04:00 · 0480362f12
commit 0480362f12
parent ce6d86ec41
3 changed files with 0 additions and 3 deletions
--- a/examples/common.cpp
+++ b/examples/common.cpp
@ -668,7 +668,6 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
    lparams.embedding       = params.embedding;
    lparams.rope_freq_base  = params.rope_freq_base;
    lparams.rope_freq_scale = params.rope_freq_scale;
-    lparams.pp_threads      = params.pp_threads;

    return lparams;
 }
--- a/llama.cpp
+++ b/llama.cpp
@ -895,7 +895,6 @@ struct llama_context_params llama_context_default_params() {
        /*.rms_norm_eps                =*/ LLAMA_DEFAULT_RMS_EPS,
        /*.gpu_layers                  =*/ 0,
        /*.main_gpu                    =*/ 0,
-        /*.pp_threads                  =*/ GGML_DEFAULT_N_THREADS,
        /*.tensor_split                =*/ nullptr,
        /*.rope_freq_base              =*/ 10000.0f,
        /*.rope_freq_scale             =*/ 1.0f,
--- a/llama.h
+++ b/llama.h
@ -94,7 +94,6 @@ extern "C" {
        float    rms_norm_eps; // rms norm epsilon (TEMP - will be moved to model hparams)
        int32_t  n_gpu_layers; // number of layers to store in VRAM
        int32_t  main_gpu;     // the GPU that is used for scratch and small tensors
-        int32_t  pp_threads;   // number of threads used for prompt processing only

        const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)