From e0504d536c0867ddba0b5b09cc5a9923263441ba Mon Sep 17 00:00:00 2001
From: Michael Podvitskiy <podvitskiymichael@gmail.com>
Date: Thu, 29 Feb 2024 18:01:14 +0100
Subject: [PATCH] PR clean up

---
 llama.cpp | 16 +---------------
 llama.h   |  4 ----
 2 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/llama.cpp b/llama.cpp
index 13af325a7..423333c85 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3072,6 +3072,7 @@ static void llm_load_hparams(
     // get general kv
     ml.get_key(LLM_KV_GENERAL_NAME, model.name, false);
 
+    // get hparams kv
     ml.get_key(LLM_KV_VOCAB_SIZE,           hparams.n_vocab,       false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, hparams.n_vocab);
     ml.get_key(LLM_KV_CONTEXT_LENGTH,       hparams.n_ctx_train);
     ml.get_key(LLM_KV_EMBEDDING_LENGTH,     hparams.n_embd);
@@ -12520,14 +12521,6 @@ int32_t llama_n_embd(const struct llama_model * model) {
     return model->hparams.n_embd;
 }
 
-int32_t llama_n_layers(const struct llama_model * model) {
-    return model->hparams.n_layer;
-}
-
-int32_t llama_n_heads(const struct llama_model * model) {
-    return model->hparams.n_head;
-}
-
 float llama_rope_freq_scale_train(const struct llama_model * model) {
     return model->hparams.rope_freq_scale_train;
 }
@@ -13185,13 +13178,6 @@ void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_
     ctx->cparams.n_threads_batch = n_threads_batch;
 }
 
-void llama_get_n_threads(struct llama_context * ctx, uint32_t * n_threads, uint32_t * n_threads_batch) {
-    GGML_ASSERT(n_threads);
-    GGML_ASSERT(n_threads_batch);
-    *n_threads = ctx->cparams.n_threads;
-    *n_threads_batch = ctx->cparams.n_threads_batch;
-}
-
 void llama_set_abort_callback(struct llama_context * ctx, bool (*abort_callback)(void * data), void * abort_callback_data) {
     ctx->abort_callback      = abort_callback;
     ctx->abort_callback_data = abort_callback_data;
diff --git a/llama.h b/llama.h
index af58a1d05..4c76c2cbb 100644
--- a/llama.h
+++ b/llama.h
@@ -384,8 +384,6 @@ extern "C" {
     LLAMA_API int32_t llama_n_vocab    (const struct llama_model * model);
     LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
     LLAMA_API int32_t llama_n_embd     (const struct llama_model * model);
-    LLAMA_API int32_t llama_n_layers   (const struct llama_model * model);
-    LLAMA_API int32_t llama_n_heads    (const struct llama_model * model);
 
     // Get the model's RoPE frequency scaling factor
     LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
@@ -643,8 +641,6 @@ extern "C" {
     // n_threads is the number of threads used for generation (single token)
     // n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
     LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
-    // Get the number of threads used for decoding
-    LLAMA_API void llama_get_n_threads(struct llama_context * ctx, uint32_t * n_threads, uint32_t * n_threads_batch);
 
     // Set abort callback
     LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);