minor : style

2024-05-16 13:33:01 +03:00 · 2024-05-16 13:33:01 +03:00 · f4cb482c62
commit f4cb482c62
parent 352c3859a7
2 changed files with 15 additions and 19 deletions
--- a/ggml-cuda/rope.cu
+++ b/ggml-cuda/rope.cu
@ -182,22 +182,19 @@ static void rope_neox_cuda(
                x, dst, ncols, n_dims, pos, freq_scale, p_delta_rows, ext_factor, attn_factor, corr_dims,
                theta_scale, inv_ndims, freq_factors
                );
-        }
-        else {
+        } else {
            rope_neox<T, false, true><<<block_nums, block_dims, 0, stream>>>(
                x, dst, ncols, n_dims, pos, freq_scale, p_delta_rows, ext_factor, attn_factor, corr_dims,
                theta_scale, inv_ndims, freq_factors
                );
        }
-    }
-    else {
+    } else {
        if (freq_factors == nullptr) {
            rope_neox<T, true, false><<<block_nums, block_dims, 0, stream>>>(
                x, dst, ncols, n_dims, pos, freq_scale, p_delta_rows, ext_factor, attn_factor, corr_dims,
                theta_scale, inv_ndims, freq_factors
                );
-        }
-        else {
+        } else {
            rope_neox<T, true, true><<<block_nums, block_dims, 0, stream>>>(
                x, dst, ncols, n_dims, pos, freq_scale, p_delta_rows, ext_factor, attn_factor, corr_dims,
                theta_scale, inv_ndims, freq_factors
--- a/llama.cpp
+++ b/llama.cpp
@ -3323,7 +3323,7 @@ struct llama_model_loader {
    }

    template<typename T>
-    bool get_arr(const std::string& key, std::vector<T>& result, const bool required = true) {
+    bool get_arr(const std::string & key, std::vector<T> & result, const bool required = true) {
        const int kid = gguf_find_key(meta, key.c_str());

        if (kid < 0) {
@ -3345,7 +3345,7 @@ struct llama_model_loader {
        GGML_ASSERT((arr_info.gt != GGUF_TYPE_INT32   || std::is_same<T, int>::value));

        result.resize(arr_info.length);
-        result.assign((const T*)arr_info.data, (const T*)arr_info.data + arr_info.length);
+        result.assign((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length);

        return true;
    }
@ -6994,8 +6994,7 @@ struct llm_build_context {
        return lctx.inp_pos;
    }

-    struct ggml_tensor* build_freq_factors() {
-
+    struct ggml_tensor * build_freq_factors() {
        if (hparams.rope_long_factors.empty() || hparams.rope_short_factors.empty()) {
            lctx.freq_factors = nullptr;
            return nullptr;
@ -10968,18 +10967,18 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
    }

    if (lctx.freq_factors) {
-        auto freq_dim = hparams.n_embd_head_k / 2;
+        // TODO: this might have to be hparams.n_rot instead of hparams.n_embd_head_k, but maybe it does not matter
+        const auto freq_dim = hparams.n_embd_head_k / 2;

        GGML_ASSERT(lctx.freq_factors->ne[0] == freq_dim);
        GGML_ASSERT(hparams.rope_long_factors.size()  == freq_dim);
        GGML_ASSERT(hparams.rope_short_factors.size() == freq_dim);

        // choose long/short freq factors based on the context size
-        auto n_ctx = llama_n_ctx(&lctx);
+        const auto n_ctx = llama_n_ctx(&lctx);
        if (n_ctx > hparams.n_yarn_orig_ctx) {
            ggml_backend_tensor_set(lctx.freq_factors, hparams.rope_long_factors.data(), 0, freq_dim * ggml_element_size(lctx.freq_factors));
-        }
-        else {
+        } else {
            ggml_backend_tensor_set(lctx.freq_factors, hparams.rope_short_factors.data(), 0, freq_dim * ggml_element_size(lctx.freq_factors));
        }
    }