llama : use _impl suffix instead of _internal (#11060)

ggml-ci
2025-01-06 10:52:01 +02:00 · 2025-01-06 10:52:01 +02:00 · 5047dd3546
commit 5047dd3546
parent 46e3556e01
2 changed files with 18 additions and 18 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -22,7 +22,7 @@ static void zeros(std::ofstream & file, size_t n) {
    }
 }

-struct quantize_state_internal {
+struct quantize_state_impl {
    const llama_model                 & model;
    const llama_model_quantize_params * params;

@ -43,13 +43,13 @@ struct quantize_state_internal {
    // used to figure out if a model shares tok_embd with the output weight
    bool has_output = false;

-    quantize_state_internal(const llama_model & model, const llama_model_quantize_params * params)
+    quantize_state_impl(const llama_model & model, const llama_model_quantize_params * params)
        : model(model)
        , params(params)
        {}
 };

-static void llama_tensor_dequantize_internal(
+static void llama_tensor_dequantize_impl(
    struct ggml_tensor * tensor, std::vector<no_init<float>> & output, std::vector<std::thread> & workers,
    const size_t nelements, const int nthread
 ) {
@ -121,7 +121,7 @@ static void llama_tensor_dequantize_internal(
    workers.clear();
 }

-static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) {
+static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_type, const ggml_tensor * tensor, llama_ftype ftype) {
    const std::string name = ggml_get_name(tensor);

    // TODO: avoid hardcoded tensor names - use the TN_* constants
@ -410,7 +410,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
    return new_type;
 }

-static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
+static size_t llama_tensor_quantize_impl(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
    if (nthread < 2) {
        // single-thread
        size_t new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
@ -464,7 +464,7 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
    return new_size;
 }

-static void llama_model_quantize_internal(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
+static void llama_model_quantize_impl(const std::string & fname_inp, const std::string & fname_out, const llama_model_quantize_params * params) {
    ggml_type default_type;
    llama_ftype ftype = params->ftype;

@ -534,7 +534,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
    llm_load_hparams(ml, model);
    llm_load_stats  (ml, model);

-    struct quantize_state_internal qs(model, params);
+    struct quantize_state_impl qs(model, params);

    if (params->only_copy) {
        ftype = model.ftype;
@ -837,7 +837,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
            } else if (ggml_is_quantized(tensor->type) && !params->allow_requantize) {
                throw std::runtime_error(format("requantizing from type %s is disabled", ggml_type_name(tensor->type)));
            } else {
-                llama_tensor_dequantize_internal(tensor, f32_conv_buf, workers, nelements, nthread);
+                llama_tensor_dequantize_impl(tensor, f32_conv_buf, workers, nelements, nthread);
                f32_data = (float *) f32_conv_buf.data();
            }

@ -866,7 +866,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                void * new_data_03 = (char *)new_data + ggml_row_size(new_type, n_per_row) * i03 * nrows;
                const float * imatrix_03 = imatrix ? imatrix + i03 * n_per_row : nullptr;

-                new_size += llama_tensor_quantize_internal(new_type, f32_data_03, new_data_03, chunk_size, nrows, n_per_row, imatrix_03, workers, nthread_use);
+                new_size += llama_tensor_quantize_impl(new_type, f32_data_03, new_data_03, chunk_size, nrows, n_per_row, imatrix_03, workers, nthread_use);
            }
            LLAMA_LOG_INFO("size = %8.2f MiB -> %8.2f MiB\n", ggml_nbytes(tensor)/1024.0/1024.0, new_size/1024.0/1024.0);
        }
@ -919,7 +919,7 @@ uint32_t llama_model_quantize(
        const char * fname_out,
        const llama_model_quantize_params * params) {
    try {
-        llama_model_quantize_internal(fname_inp, fname_out, params);
+        llama_model_quantize_impl(fname_inp, fname_out, params);
    } catch (const std::exception & err) {
        LLAMA_LOG_ERROR("%s: failed to quantize: %s\n", __func__, err.what());
        return 1;