split: move llama_tensor_offset to llama_model_loader

2024-03-21 07:06:14 +01:00 · 2024-03-21 07:06:14 +01:00 · 18ff6ca847
commit 18ff6ca847
parent b8feff411f
1 changed files with 13 additions and 13 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1460,17 +1460,6 @@ struct llama_mlock {
 #endif
 };

-// Holds information on a tensor data source location.
-struct llama_tensor_offset  {
-    uint16_t  idx;  // source file index
-    size_t    offs; // tensor data offset in the original file
-
-    llama_tensor_offset(uint16_t idx, const char * name, struct gguf_context * gguf_ctx) : idx(idx) {
-        const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
-        offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
-    }
-};
-
 static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
    std::vector<char> result(8, 0);
    const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
@ -2829,7 +2818,18 @@ struct llama_model_loader {
    llama_fver  fver;

    std::vector<std::unique_ptr<llama_mmap>> mappings;
-    std::unordered_map<std::string, struct llama_tensor_offset> tensors_offs; // unified tensor data offset accross files
+
+    // Holds information on a tensor data source location.
+    struct llama_tensor_offset  {
+        uint16_t  idx;  // source file index
+        size_t    offs; // tensor data offset in the original file
+
+        llama_tensor_offset(uint16_t idx, const char * name, struct gguf_context * gguf_ctx) : idx(idx) {
+            const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
+            offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
+        }
+    };
+    std::unordered_map<std::string, struct llama_tensor_offset> tensors_offs; // unified tensor data offset across files

    std::unordered_map<std::string, struct llama_model_kv_override> kv_overrides;

@ -2884,7 +2884,7 @@ struct llama_model_loader {
            }
            get_key(llm_kv(LLM_KV_SPLIT_TENSORS_COUNT), n_tensors);

-            char split_prefix[4096] = {0};
+            char split_prefix[PATH_MAX] = {0};
            if (!llama_split_prefix(split_prefix, fname.c_str(), fname.size(), idx, n_split)) {
                throw std::runtime_error(format("invalid split file: %s", fname.c_str()));
            }