llama.android : update to new API
ggml-ci
This commit is contained in:
parent
330bd07b82
commit
d8931a701c
4 changed files with 10 additions and 9 deletions
|
@ -23,12 +23,12 @@ defer {
|
||||||
}
|
}
|
||||||
|
|
||||||
let model_params = llama_model_default_params()
|
let model_params = llama_model_default_params()
|
||||||
guard let model = llama_load_model_from_file(modelPath.cString(using: .utf8), model_params) else {
|
guard let model = llama_model_load_from_file(modelPath.cString(using: .utf8), model_params) else {
|
||||||
print("Failed to load model")
|
print("Failed to load model")
|
||||||
exit(1)
|
exit(1)
|
||||||
}
|
}
|
||||||
defer {
|
defer {
|
||||||
llama_free_model(model)
|
llama_model_free(model)
|
||||||
}
|
}
|
||||||
|
|
||||||
var tokens = tokenize(text: prompt, add_bos: true)
|
var tokens = tokenize(text: prompt, add_bos: true)
|
||||||
|
|
|
@ -87,7 +87,7 @@ Java_android_llama_cpp_LLamaAndroid_load_1model(JNIEnv *env, jobject, jstring fi
|
||||||
auto path_to_model = env->GetStringUTFChars(filename, 0);
|
auto path_to_model = env->GetStringUTFChars(filename, 0);
|
||||||
LOGi("Loading model from %s", path_to_model);
|
LOGi("Loading model from %s", path_to_model);
|
||||||
|
|
||||||
auto model = llama_load_model_from_file(path_to_model, model_params);
|
auto model = llama_model_load_from_file(path_to_model, model_params);
|
||||||
env->ReleaseStringUTFChars(filename, path_to_model);
|
env->ReleaseStringUTFChars(filename, path_to_model);
|
||||||
|
|
||||||
if (!model) {
|
if (!model) {
|
||||||
|
@ -102,7 +102,7 @@ Java_android_llama_cpp_LLamaAndroid_load_1model(JNIEnv *env, jobject, jstring fi
|
||||||
extern "C"
|
extern "C"
|
||||||
JNIEXPORT void JNICALL
|
JNIEXPORT void JNICALL
|
||||||
Java_android_llama_cpp_LLamaAndroid_free_1model(JNIEnv *, jobject, jlong model) {
|
Java_android_llama_cpp_LLamaAndroid_free_1model(JNIEnv *, jobject, jlong model) {
|
||||||
llama_free_model(reinterpret_cast<llama_model *>(model));
|
llama_model_free(reinterpret_cast<llama_model *>(model));
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
|
@ -405,6 +405,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop(
|
||||||
const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
|
const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
|
||||||
const auto sampler = reinterpret_cast<llama_sampler *>(sampler_pointer);
|
const auto sampler = reinterpret_cast<llama_sampler *>(sampler_pointer);
|
||||||
const auto model = llama_get_model(context);
|
const auto model = llama_get_model(context);
|
||||||
|
const auto vocab = llama_get_vocab(model);
|
||||||
|
|
||||||
if (!la_int_var) la_int_var = env->GetObjectClass(intvar_ncur);
|
if (!la_int_var) la_int_var = env->GetObjectClass(intvar_ncur);
|
||||||
if (!la_int_var_value) la_int_var_value = env->GetMethodID(la_int_var, "getValue", "()I");
|
if (!la_int_var_value) la_int_var_value = env->GetMethodID(la_int_var, "getValue", "()I");
|
||||||
|
@ -414,7 +415,7 @@ Java_android_llama_cpp_LLamaAndroid_completion_1loop(
|
||||||
const auto new_token_id = llama_sampler_sample(sampler, context, -1);
|
const auto new_token_id = llama_sampler_sample(sampler, context, -1);
|
||||||
|
|
||||||
const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value);
|
const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value);
|
||||||
if (llama_token_is_eog(model, new_token_id) || n_cur == n_len) {
|
if (llama_token_is_eog(vocab, new_token_id) || n_cur == n_len) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,8 +52,8 @@ actor LlamaContext {
|
||||||
deinit {
|
deinit {
|
||||||
llama_sampler_free(sampling)
|
llama_sampler_free(sampling)
|
||||||
llama_batch_free(batch)
|
llama_batch_free(batch)
|
||||||
|
llama_model_free(model)
|
||||||
llama_free(context)
|
llama_free(context)
|
||||||
llama_free_model(model)
|
|
||||||
llama_backend_free()
|
llama_backend_free()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,7 +65,7 @@ actor LlamaContext {
|
||||||
model_params.n_gpu_layers = 0
|
model_params.n_gpu_layers = 0
|
||||||
print("Running on simulator, force use n_gpu_layers = 0")
|
print("Running on simulator, force use n_gpu_layers = 0")
|
||||||
#endif
|
#endif
|
||||||
let model = llama_load_model_from_file(path, model_params)
|
let model = llama_model_load_from_file(path, model_params)
|
||||||
guard let model else {
|
guard let model else {
|
||||||
print("Could not load model at \(path)")
|
print("Could not load model at \(path)")
|
||||||
throw LlamaError.couldNotInitializeContext
|
throw LlamaError.couldNotInitializeContext
|
||||||
|
|
|
@ -76,7 +76,7 @@ class LibLlamaModel:
|
||||||
self.ffi = libllama.ffi
|
self.ffi = libllama.ffi
|
||||||
if isinstance(mparams, dict):
|
if isinstance(mparams, dict):
|
||||||
mparams = libllama.model_default_params(**mparams)
|
mparams = libllama.model_default_params(**mparams)
|
||||||
self.model = self.lib.llama_load_model_from_file(path_model.encode(), mparams)
|
self.model = self.lib.llama_model_load_from_file(path_model.encode(), mparams)
|
||||||
if not self.model:
|
if not self.model:
|
||||||
raise RuntimeError("error: failed to load model '%s'" % path_model)
|
raise RuntimeError("error: failed to load model '%s'" % path_model)
|
||||||
if isinstance(cparams, dict):
|
if isinstance(cparams, dict):
|
||||||
|
@ -92,7 +92,7 @@ class LibLlamaModel:
|
||||||
if self.ctx:
|
if self.ctx:
|
||||||
self.lib.llama_free(self.ctx)
|
self.lib.llama_free(self.ctx)
|
||||||
if self.model:
|
if self.model:
|
||||||
self.lib.llama_free_model(self.model)
|
self.lib.llama_model_free(self.model)
|
||||||
self.ctx = None
|
self.ctx = None
|
||||||
self.model = None
|
self.model = None
|
||||||
self.lib = None
|
self.lib = None
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue