lora: load to devide buft

2024-07-06 02:12:53 +02:00 · 2024-07-06 02:12:53 +02:00 · 67c5e14d06
commit 67c5e14d06
parent 213701b51a
3 changed files with 172 additions and 274 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -406,6 +406,9 @@ extern "C" {
        const char * content;
    } llama_chat_message;

+    // lora adapter
+    struct llama_lora_adapter;
+
    // Helpers for getting default parameters
    LLAMA_API struct llama_model_params llama_model_default_params(void);
    LLAMA_API struct llama_context_params llama_context_default_params(void);
@ -510,13 +513,9 @@ extern "C" {
    // the layers modified by the adapter. Can be NULL to use the current loaded model.
    // The model needs to be reloaded before applying a new adapter, otherwise the adapter
    // will be applied on top of the previous one
-    // Returns 0 on success
-    LLAMA_API int32_t llama_model_apply_lora_from_file(
-            const struct llama_model * model,
-                          const char * path_lora,
-                               float   scale,
-                          const char * path_base_model,
-                             int32_t   n_threads);
+    LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
+            struct llama_context * ctx,
+            const char * path_lora);

    // Apply a loaded control vector to a llama_context, or if data is NULL, clear
    // the currently loaded vector.