From d8931a701c8d339cc97a24fdb6c8dfd3c6179d19 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 9 Jan 2025 16:03:09 +0200 Subject: [PATCH] llama.android : update to new API ggml-ci --- examples/batched.swift/Sources/main.swift | 4 ++-- .../llama.android/llama/src/main/cpp/llama-android.cpp | 7 ++++--- examples/llama.swiftui/llama.cpp.swift/LibLlama.swift | 4 ++-- tests/test-tokenizer-random.py | 4 ++-- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/examples/batched.swift/Sources/main.swift b/examples/batched.swift/Sources/main.swift index 10f2e7fd1..d196a01ed 100644 --- a/examples/batched.swift/Sources/main.swift +++ b/examples/batched.swift/Sources/main.swift @@ -23,12 +23,12 @@ defer { } let model_params = llama_model_default_params() -guard let model = llama_load_model_from_file(modelPath.cString(using: .utf8), model_params) else { +guard let model = llama_model_load_from_file(modelPath.cString(using: .utf8), model_params) else { print("Failed to load model") exit(1) } defer { - llama_free_model(model) + llama_model_free(model) } var tokens = tokenize(text: prompt, add_bos: true) diff --git a/examples/llama.android/llama/src/main/cpp/llama-android.cpp b/examples/llama.android/llama/src/main/cpp/llama-android.cpp index 66ec2aeeb..d5dd78c2c 100644 --- a/examples/llama.android/llama/src/main/cpp/llama-android.cpp +++ b/examples/llama.android/llama/src/main/cpp/llama-android.cpp @@ -87,7 +87,7 @@ Java_android_llama_cpp_LLamaAndroid_load_1model(JNIEnv *env, jobject, jstring fi auto path_to_model = env->GetStringUTFChars(filename, 0); LOGi("Loading model from %s", path_to_model); - auto model = llama_load_model_from_file(path_to_model, model_params); + auto model = llama_model_load_from_file(path_to_model, model_params); env->ReleaseStringUTFChars(filename, path_to_model); if (!model) { @@ -102,7 +102,7 @@ Java_android_llama_cpp_LLamaAndroid_load_1model(JNIEnv *env, jobject, jstring fi extern "C" JNIEXPORT void JNICALL Java_android_llama_cpp_LLamaAndroid_free_1model(JNIEnv *, jobject, jlong model) { - llama_free_model(reinterpret_cast(model)); + llama_model_free(reinterpret_cast(model)); } extern "C" @@ -405,6 +405,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop( const auto batch = reinterpret_cast(batch_pointer); const auto sampler = reinterpret_cast(sampler_pointer); const auto model = llama_get_model(context); + const auto vocab = llama_get_vocab(model); if (!la_int_var) la_int_var = env->GetObjectClass(intvar_ncur); if (!la_int_var_value) la_int_var_value = env->GetMethodID(la_int_var, "getValue", "()I"); @@ -414,7 +415,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop( const auto new_token_id = llama_sampler_sample(sampler, context, -1); const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value); - if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) { + if (llama_token_is_eog(vocab, new_token_id) || n_cur == n_len) { return nullptr; } diff --git a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift index 998c673d5..f88b1fdcc 100644 --- a/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift +++ b/examples/llama.swiftui/llama.cpp.swift/LibLlama.swift @@ -52,8 +52,8 @@ actor LlamaContext { deinit { llama_sampler_free(sampling) llama_batch_free(batch) + llama_model_free(model) llama_free(context) - llama_free_model(model) llama_backend_free() } @@ -65,7 +65,7 @@ actor LlamaContext { model_params.n_gpu_layers = 0 print("Running on simulator, force use n_gpu_layers = 0") #endif - let model = llama_load_model_from_file(path, model_params) + let model = llama_model_load_from_file(path, model_params) guard let model else { print("Could not load model at \(path)") throw LlamaError.couldNotInitializeContext diff --git a/tests/test-tokenizer-random.py b/tests/test-tokenizer-random.py index 9ebe6c891..c6cdcb554 100644 --- a/tests/test-tokenizer-random.py +++ b/tests/test-tokenizer-random.py @@ -76,7 +76,7 @@ class LibLlamaModel: self.ffi = libllama.ffi if isinstance(mparams, dict): mparams = libllama.model_default_params(**mparams) - self.model = self.lib.llama_load_model_from_file(path_model.encode(), mparams) + self.model = self.lib.llama_model_load_from_file(path_model.encode(), mparams) if not self.model: raise RuntimeError("error: failed to load model '%s'" % path_model) if isinstance(cparams, dict): @@ -92,7 +92,7 @@ class LibLlamaModel: if self.ctx: self.lib.llama_free(self.ctx) if self.model: - self.lib.llama_free_model(self.model) + self.lib.llama_model_free(self.model) self.ctx = None self.model = None self.lib = None