Merge branch 'master' into gg/imatrix-gpu-4931
ggml-ci
|
@ -1138,9 +1138,8 @@ static void save_as_llama_lora(const char * filename, struct my_llama_lora * lor
|
|||
return tn_buf.data();
|
||||
};
|
||||
|
||||
uint32_t LLAMA_FILE_MAGIC_LORA = 0x67676C61; // 'ggla'
|
||||
// write_magic
|
||||
file.write_u32(LLAMA_FILE_MAGIC_LORA); // magic
|
||||
file.write_u32(LLAMA_FILE_MAGIC_GGLA); // magic
|
||||
file.write_u32(1); // version
|
||||
// write_hparams
|
||||
file.write_u32(lora->hparams.lora_r);
|
||||
|
@ -1800,7 +1799,7 @@ int main(int argc, char ** argv) {
|
|||
std::vector<llama_token> train_tokens;
|
||||
std::vector<size_t> train_samples_begin;
|
||||
std::vector<size_t> train_samples_size;
|
||||
printf("%s: tokenize training data\n", __func__);
|
||||
printf("%s: tokenize training data from %s\n", __func__, params.common.fn_train_data);
|
||||
tokenize_file(lctx,
|
||||
params.common.fn_train_data,
|
||||
params.common.sample_start,
|
||||
|
|
33
examples/llama.android/.gitignore
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
# Gradle files
|
||||
.gradle/
|
||||
build/
|
||||
|
||||
# Local configuration file (sdk path, etc)
|
||||
local.properties
|
||||
|
||||
# Log/OS Files
|
||||
*.log
|
||||
|
||||
# Android Studio generated files and folders
|
||||
captures/
|
||||
.externalNativeBuild/
|
||||
.cxx/
|
||||
*.apk
|
||||
output.json
|
||||
|
||||
# IntelliJ
|
||||
*.iml
|
||||
.idea/
|
||||
misc.xml
|
||||
deploymentTargetDropDown.xml
|
||||
render.experimental.xml
|
||||
|
||||
# Keystore files
|
||||
*.jks
|
||||
*.keystore
|
||||
|
||||
# Google Services (e.g. APIs or Firebase)
|
||||
google-services.json
|
||||
|
||||
# Android Profiling
|
||||
*.hprof
|
0
examples/llama.android/README.md
Normal file
1
examples/llama.android/app/.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/build
|
91
examples/llama.android/app/build.gradle.kts
Normal file
|
@ -0,0 +1,91 @@
|
|||
plugins {
|
||||
id("com.android.application")
|
||||
id("org.jetbrains.kotlin.android")
|
||||
}
|
||||
|
||||
android {
|
||||
namespace = "com.example.llama"
|
||||
compileSdk = 34
|
||||
|
||||
ndkVersion = "26.1.10909125"
|
||||
|
||||
defaultConfig {
|
||||
applicationId = "com.example.llama"
|
||||
minSdk = 33
|
||||
targetSdk = 34
|
||||
versionCode = 1
|
||||
versionName = "1.0"
|
||||
|
||||
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
|
||||
vectorDrawables {
|
||||
useSupportLibrary = true
|
||||
}
|
||||
ndk {
|
||||
// Workaround for https://github.com/llvm/llvm-project/issues/65820
|
||||
// affecting armeabi-v7a. Skip armeabi-v7a when invoked with
|
||||
// -Pskip-armeabi-v7a (e.g., ./gradlew build -Pskip-armeabi-v7a).
|
||||
if (project.hasProperty("skip-armeabi-v7a")) {
|
||||
abiFilters += listOf("arm64-v8a", "x86_64", "x86")
|
||||
}
|
||||
}
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
cppFlags += listOf()
|
||||
arguments += listOf()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buildTypes {
|
||||
release {
|
||||
isMinifyEnabled = false
|
||||
proguardFiles(
|
||||
getDefaultProguardFile("proguard-android-optimize.txt"),
|
||||
"proguard-rules.pro"
|
||||
)
|
||||
}
|
||||
}
|
||||
compileOptions {
|
||||
sourceCompatibility = JavaVersion.VERSION_1_8
|
||||
targetCompatibility = JavaVersion.VERSION_1_8
|
||||
}
|
||||
kotlinOptions {
|
||||
jvmTarget = "1.8"
|
||||
}
|
||||
buildFeatures {
|
||||
compose = true
|
||||
}
|
||||
composeOptions {
|
||||
kotlinCompilerExtensionVersion = "1.5.1"
|
||||
}
|
||||
packaging {
|
||||
resources {
|
||||
excludes += "/META-INF/{AL2.0,LGPL2.1}"
|
||||
}
|
||||
}
|
||||
externalNativeBuild {
|
||||
cmake {
|
||||
path = file("src/main/cpp/CMakeLists.txt")
|
||||
version = "3.22.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
|
||||
implementation("androidx.core:core-ktx:1.12.0")
|
||||
implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.6.2")
|
||||
implementation("androidx.activity:activity-compose:1.8.2")
|
||||
implementation(platform("androidx.compose:compose-bom:2023.08.00"))
|
||||
implementation("androidx.compose.ui:ui")
|
||||
implementation("androidx.compose.ui:ui-graphics")
|
||||
implementation("androidx.compose.ui:ui-tooling-preview")
|
||||
implementation("androidx.compose.material3:material3")
|
||||
testImplementation("junit:junit:4.13.2")
|
||||
androidTestImplementation("androidx.test.ext:junit:1.1.5")
|
||||
androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
|
||||
androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00"))
|
||||
androidTestImplementation("androidx.compose.ui:ui-test-junit4")
|
||||
debugImplementation("androidx.compose.ui:ui-tooling")
|
||||
debugImplementation("androidx.compose.ui:ui-test-manifest")
|
||||
}
|
21
examples/llama.android/app/proguard-rules.pro
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
# Add project specific ProGuard rules here.
|
||||
# You can control the set of applied configuration files using the
|
||||
# proguardFiles setting in build.gradle.
|
||||
#
|
||||
# For more details, see
|
||||
# http://developer.android.com/guide/developing/tools/proguard.html
|
||||
|
||||
# If your project uses WebView with JS, uncomment the following
|
||||
# and specify the fully qualified class name to the JavaScript interface
|
||||
# class:
|
||||
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
|
||||
# public *;
|
||||
#}
|
||||
|
||||
# Uncomment this to preserve the line number information for
|
||||
# debugging stack traces.
|
||||
#-keepattributes SourceFile,LineNumberTable
|
||||
|
||||
# If you keep the line number information, uncomment this to
|
||||
# hide the original source file name.
|
||||
#-renamesourcefileattribute SourceFile
|
30
examples/llama.android/app/src/main/AndroidManifest.xml
Normal file
|
@ -0,0 +1,30 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:tools="http://schemas.android.com/tools">
|
||||
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:dataExtractionRules="@xml/data_extraction_rules"
|
||||
android:fullBackupContent="@xml/backup_rules"
|
||||
android:icon="@mipmap/ic_launcher"
|
||||
android:label="@string/app_name"
|
||||
android:roundIcon="@mipmap/ic_launcher_round"
|
||||
android:supportsRtl="true"
|
||||
android:theme="@style/Theme.LlamaAndroid"
|
||||
>
|
||||
|
||||
<activity
|
||||
android:name=".MainActivity"
|
||||
android:exported="true"
|
||||
android:theme="@style/Theme.LlamaAndroid">
|
||||
<intent-filter>
|
||||
<action android:name="android.intent.action.MAIN" />
|
||||
|
||||
<category android:name="android.intent.category.LAUNCHER" />
|
||||
</intent-filter>
|
||||
</activity>
|
||||
</application>
|
||||
|
||||
</manifest>
|
50
examples/llama.android/app/src/main/cpp/CMakeLists.txt
Normal file
|
@ -0,0 +1,50 @@
|
|||
|
||||
# For more information about using CMake with Android Studio, read the
|
||||
# documentation: https://d.android.com/studio/projects/add-native-code.html.
|
||||
# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
|
||||
|
||||
# Sets the minimum CMake version required for this project.
|
||||
cmake_minimum_required(VERSION 3.22.1)
|
||||
|
||||
# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
|
||||
# Since this is the top level CMakeLists.txt, the project name is also accessible
|
||||
# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
|
||||
# build script scope).
|
||||
project("llama-android")
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
llama
|
||||
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
|
||||
GIT_TAG master
|
||||
)
|
||||
|
||||
# Also provides "common"
|
||||
FetchContent_MakeAvailable(llama)
|
||||
|
||||
# Creates and names a library, sets it as either STATIC
|
||||
# or SHARED, and provides the relative paths to its source code.
|
||||
# You can define multiple libraries, and CMake builds them for you.
|
||||
# Gradle automatically packages shared libraries with your APK.
|
||||
#
|
||||
# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
|
||||
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
||||
# is preferred for the same purpose.
|
||||
#
|
||||
# In order to load a library into your app from Java/Kotlin, you must call
|
||||
# System.loadLibrary() and pass the name of the library defined here;
|
||||
# for GameActivity/NativeActivity derived applications, the same library name must be
|
||||
# used in the AndroidManifest.xml file.
|
||||
add_library(${CMAKE_PROJECT_NAME} SHARED
|
||||
# List C/C++ source files with relative paths to this CMakeLists.txt.
|
||||
llama-android.cpp)
|
||||
|
||||
# Specifies libraries CMake should link to your target library. You
|
||||
# can link libraries from various origins, such as libraries defined in this
|
||||
# build script, prebuilt third-party libraries, or Android system libraries.
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME}
|
||||
# List libraries link to the target library
|
||||
llama
|
||||
common
|
||||
android
|
||||
log)
|
394
examples/llama.android/app/src/main/cpp/llama-android.cpp
Normal file
|
@ -0,0 +1,394 @@
|
|||
#include <android/log.h>
|
||||
#include <jni.h>
|
||||
#include <iomanip>
|
||||
#include <math.h>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include "llama.h"
|
||||
#include "common/common.h"
|
||||
|
||||
// Write C++ code here.
|
||||
//
|
||||
// Do not forget to dynamically load the C++ library into your application.
|
||||
//
|
||||
// For instance,
|
||||
//
|
||||
// In MainActivity.java:
|
||||
// static {
|
||||
// System.loadLibrary("llama-android");
|
||||
// }
|
||||
//
|
||||
// Or, in MainActivity.kt:
|
||||
// companion object {
|
||||
// init {
|
||||
// System.loadLibrary("llama-android")
|
||||
// }
|
||||
// }
|
||||
|
||||
#define TAG "llama-android.cpp"
|
||||
#define LOGi(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
|
||||
#define LOGe(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)
|
||||
|
||||
jclass la_int_var;
|
||||
jmethodID la_int_var_value;
|
||||
jmethodID la_int_var_inc;
|
||||
|
||||
static void log_callback(ggml_log_level level, const char * fmt, void * data) {
|
||||
if (level == GGML_LOG_LEVEL_ERROR) __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data);
|
||||
else if (level == GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data);
|
||||
else if (level == GGML_LOG_LEVEL_WARN) __android_log_print(ANDROID_LOG_WARN, TAG, fmt, data);
|
||||
else __android_log_print(ANDROID_LOG_DEFAULT, TAG, fmt, data);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_example_llama_Llm_load_1model(JNIEnv *env, jobject, jstring filename) {
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
|
||||
auto path_to_model = env->GetStringUTFChars(filename, 0);
|
||||
LOGi("Loading model from %s", path_to_model);
|
||||
|
||||
auto model = llama_load_model_from_file(path_to_model, model_params);
|
||||
env->ReleaseStringUTFChars(filename, path_to_model);
|
||||
|
||||
if (!model) {
|
||||
LOGe("load_model() failed");
|
||||
env->ThrowNew(env->FindClass("java/lang/IllegalStateException"), "load_model() failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return reinterpret_cast<jlong>(model);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_example_llama_Llm_free_1model(JNIEnv *, jobject, jlong model) {
|
||||
llama_free_model(reinterpret_cast<llama_model *>(model));
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_example_llama_Llm_new_1context(JNIEnv *env, jobject, jlong jmodel) {
|
||||
auto model = reinterpret_cast<llama_model *>(jmodel);
|
||||
|
||||
if (!model) {
|
||||
LOGe("new_context(): model cannot be null");
|
||||
env->ThrowNew(env->FindClass("java/lang/IllegalArgumentException"), "Model cannot be null");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int n_threads = std::max(1, std::min(8, (int) sysconf(_SC_NPROCESSORS_ONLN) - 2));
|
||||
LOGi("Using %d threads", n_threads);
|
||||
|
||||
llama_context_params ctx_params = llama_context_default_params();
|
||||
ctx_params.seed = 1234;
|
||||
ctx_params.n_ctx = 2048;
|
||||
ctx_params.n_threads = n_threads;
|
||||
ctx_params.n_threads_batch = n_threads;
|
||||
|
||||
llama_context * context = llama_new_context_with_model(model, ctx_params);
|
||||
|
||||
if (!context) {
|
||||
LOGe("llama_new_context_with_model() returned null)");
|
||||
env->ThrowNew(env->FindClass("java/lang/IllegalStateException"),
|
||||
"llama_new_context_with_model() returned null)");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return reinterpret_cast<jlong>(context);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_example_llama_Llm_free_1context(JNIEnv *, jobject, jlong context) {
|
||||
llama_free(reinterpret_cast<llama_context *>(context));
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_example_llama_Llm_backend_1free(JNIEnv *, jobject) {
|
||||
llama_backend_free();
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_example_llama_Llm_log_1to_1android(JNIEnv *, jobject) {
|
||||
llama_log_set(log_callback, NULL);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_example_llama_Llm_bench_1model(
|
||||
JNIEnv *env,
|
||||
jobject,
|
||||
jlong context_pointer,
|
||||
jlong model_pointer,
|
||||
jlong batch_pointer,
|
||||
jint pp,
|
||||
jint tg,
|
||||
jint pl,
|
||||
jint nr
|
||||
) {
|
||||
auto pp_avg = 0.0;
|
||||
auto tg_avg = 0.0;
|
||||
auto pp_std = 0.0;
|
||||
auto tg_std = 0.0;
|
||||
|
||||
const auto context = reinterpret_cast<llama_context *>(context_pointer);
|
||||
const auto model = reinterpret_cast<llama_model *>(model_pointer);
|
||||
const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
|
||||
|
||||
const int n_ctx = llama_n_ctx(context);
|
||||
|
||||
LOGi("n_ctx = %d", n_ctx);
|
||||
|
||||
int i, j;
|
||||
int nri;
|
||||
for (nri = 0; nri < nr; nri++) {
|
||||
LOGi("Benchmark prompt processing (pp)");
|
||||
|
||||
llama_batch_clear(*batch);
|
||||
|
||||
const int n_tokens = pp;
|
||||
for (i = 0; i < n_tokens; i++) {
|
||||
llama_batch_add(*batch, 0, i, { 0 }, false);
|
||||
}
|
||||
|
||||
batch->logits[batch->n_tokens - 1] = true;
|
||||
llama_kv_cache_clear(context);
|
||||
|
||||
const auto t_pp_start = ggml_time_us();
|
||||
if (llama_decode(context, *batch) != 0) {
|
||||
LOGi("llama_decode() failed during prompt processing");
|
||||
}
|
||||
const auto t_pp_end = ggml_time_us();
|
||||
|
||||
// bench text generation
|
||||
|
||||
LOGi("Benchmark text generation (tg)");
|
||||
|
||||
llama_kv_cache_clear(context);
|
||||
const auto t_tg_start = ggml_time_us();
|
||||
for (i = 0; i < tg; i++) {
|
||||
|
||||
llama_batch_clear(*batch);
|
||||
for (j = 0; j < pl; j++) {
|
||||
llama_batch_add(*batch, 0, i, { j }, true);
|
||||
}
|
||||
|
||||
LOGi("llama_decode() text generation: %d", i);
|
||||
if (llama_decode(context, *batch) != 0) {
|
||||
LOGi("llama_decode() failed during text generation");
|
||||
}
|
||||
}
|
||||
|
||||
const auto t_tg_end = ggml_time_us();
|
||||
|
||||
llama_kv_cache_clear(context);
|
||||
|
||||
const auto t_pp = double(t_pp_end - t_pp_start) / 1000000.0;
|
||||
const auto t_tg = double(t_tg_end - t_tg_start) / 1000000.0;
|
||||
|
||||
const auto speed_pp = double(pp) / t_pp;
|
||||
const auto speed_tg = double(pl * tg) / t_tg;
|
||||
|
||||
pp_avg += speed_pp;
|
||||
tg_avg += speed_tg;
|
||||
|
||||
pp_std += speed_pp * speed_pp;
|
||||
tg_std += speed_tg * speed_tg;
|
||||
|
||||
LOGi("pp %f t/s, tg %f t/s", speed_pp, speed_tg);
|
||||
}
|
||||
|
||||
pp_avg /= double(nr);
|
||||
tg_avg /= double(nr);
|
||||
|
||||
if (nr > 1) {
|
||||
pp_std = sqrt(pp_std / double(nr - 1) - pp_avg * pp_avg * double(nr) / double(nr - 1));
|
||||
tg_std = sqrt(tg_std / double(nr - 1) - tg_avg * tg_avg * double(nr) / double(nr - 1));
|
||||
} else {
|
||||
pp_std = 0;
|
||||
tg_std = 0;
|
||||
}
|
||||
|
||||
char model_desc[128];
|
||||
llama_model_desc(model, model_desc, sizeof(model_desc));
|
||||
|
||||
const auto model_size = double(llama_model_size(model)) / 1024.0 / 1024.0 / 1024.0;
|
||||
const auto model_n_params = double(llama_model_n_params(model)) / 1e9;
|
||||
|
||||
const auto backend = "(Android)"; // TODO: What should this be?
|
||||
|
||||
std::stringstream result;
|
||||
result << std::setprecision(2);
|
||||
result << "| model | size | params | backend | test | t/s |\n";
|
||||
result << "| --- | --- | --- | --- | --- | --- |\n";
|
||||
result << "| " << model_desc << " | " << model_size << "GiB | " << model_n_params << "B | " << backend << " | pp " << pp << " | " << pp_avg << " ± " << pp_std << " |\n";
|
||||
result << "| " << model_desc << " | " << model_size << "GiB | " << model_n_params << "B | " << backend << " | tg " << tg << " | " << tg_avg << " ± " << tg_std << " |\n";
|
||||
|
||||
return env->NewStringUTF(result.str().c_str());
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_example_llama_Llm_free_1batch(JNIEnv *, jobject, jlong batch_pointer) {
|
||||
llama_batch_free(*reinterpret_cast<llama_batch *>(batch_pointer));
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jlong JNICALL
|
||||
Java_com_example_llama_Llm_new_1batch(JNIEnv *, jobject, jint n_tokens, jint embd, jint n_seq_max) {
|
||||
|
||||
// Source: Copy of llama.cpp:llama_batch_init but heap-allocated.
|
||||
|
||||
llama_batch *batch = new llama_batch {
|
||||
0,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
};
|
||||
|
||||
if (embd) {
|
||||
batch->embd = (float *) malloc(sizeof(float) * n_tokens * embd);
|
||||
} else {
|
||||
batch->token = (llama_token *) malloc(sizeof(llama_token) * n_tokens);
|
||||
}
|
||||
|
||||
batch->pos = (llama_pos *) malloc(sizeof(llama_pos) * n_tokens);
|
||||
batch->n_seq_id = (int32_t *) malloc(sizeof(int32_t) * n_tokens);
|
||||
batch->seq_id = (llama_seq_id **) malloc(sizeof(llama_seq_id *) * n_tokens);
|
||||
for (int i = 0; i < n_tokens; ++i) {
|
||||
batch->seq_id[i] = (llama_seq_id *) malloc(sizeof(llama_seq_id) * n_seq_max);
|
||||
}
|
||||
batch->logits = (int8_t *) malloc(sizeof(int8_t) * n_tokens);
|
||||
|
||||
return reinterpret_cast<jlong>(batch);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_example_llama_Llm_backend_1init(JNIEnv *, jobject, jboolean numa) {
|
||||
llama_backend_init(numa);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_example_llama_Llm_system_1info(JNIEnv *env, jobject) {
|
||||
return env->NewStringUTF(llama_print_system_info());
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jint JNICALL
|
||||
Java_com_example_llama_Llm_completion_1init(
|
||||
JNIEnv *env,
|
||||
jobject,
|
||||
jlong context_pointer,
|
||||
jlong batch_pointer,
|
||||
jstring jtext,
|
||||
jint n_len
|
||||
) {
|
||||
|
||||
const auto text = env->GetStringUTFChars(jtext, 0);
|
||||
const auto context = reinterpret_cast<llama_context *>(context_pointer);
|
||||
const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
|
||||
|
||||
const auto tokens_list = llama_tokenize(context, text, 1);
|
||||
|
||||
auto n_ctx = llama_n_ctx(context);
|
||||
auto n_kv_req = tokens_list.size() + (n_len - tokens_list.size());
|
||||
|
||||
LOGi("n_len = %d, n_ctx = %d, n_kv_req = %d", n_len, n_ctx, n_kv_req);
|
||||
|
||||
if (n_kv_req > n_ctx) {
|
||||
LOGe("error: n_kv_req > n_ctx, the required KV cache size is not big enough");
|
||||
}
|
||||
|
||||
for (auto id : tokens_list) {
|
||||
LOGi("%s", llama_token_to_piece(context, id).c_str());
|
||||
}
|
||||
|
||||
llama_batch_clear(*batch);
|
||||
|
||||
// evaluate the initial prompt
|
||||
for (auto i = 0; i < tokens_list.size(); i++) {
|
||||
llama_batch_add(*batch, tokens_list[i], i, { 0 }, false);
|
||||
}
|
||||
|
||||
// llama_decode will output logits only for the last token of the prompt
|
||||
batch->logits[batch->n_tokens - 1] = true;
|
||||
|
||||
if (llama_decode(context, *batch) != 0) {
|
||||
LOGe("llama_decode() failed");
|
||||
}
|
||||
|
||||
env->ReleaseStringUTFChars(jtext, text);
|
||||
|
||||
return batch->n_tokens;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT jstring JNICALL
|
||||
Java_com_example_llama_Llm_completion_1loop(
|
||||
JNIEnv * env,
|
||||
jobject,
|
||||
jlong context_pointer,
|
||||
jlong batch_pointer,
|
||||
jint n_len,
|
||||
jobject intvar_ncur
|
||||
) {
|
||||
const auto context = reinterpret_cast<llama_context *>(context_pointer);
|
||||
const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
|
||||
const auto model = llama_get_model(context);
|
||||
|
||||
if (!la_int_var) la_int_var = env->GetObjectClass(intvar_ncur);
|
||||
if (!la_int_var_value) la_int_var_value = env->GetMethodID(la_int_var, "getValue", "()I");
|
||||
if (!la_int_var_inc) la_int_var_inc = env->GetMethodID(la_int_var, "inc", "()V");
|
||||
|
||||
auto n_vocab = llama_n_vocab(model);
|
||||
auto logits = llama_get_logits_ith(context, batch->n_tokens - 1);
|
||||
|
||||
std::vector<llama_token_data> candidates;
|
||||
candidates.reserve(n_vocab);
|
||||
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
candidates.emplace_back(llama_token_data{ token_id, logits[token_id], 0.0f });
|
||||
}
|
||||
|
||||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||
|
||||
// sample the most likely token
|
||||
const auto new_token_id = llama_sample_token_greedy(context, &candidates_p);
|
||||
|
||||
const auto n_cur = env->CallIntMethod(intvar_ncur, la_int_var_value);
|
||||
if (new_token_id == llama_token_eos(model) || n_cur == n_len) {
|
||||
return env->NewStringUTF("");
|
||||
}
|
||||
|
||||
auto new_token_chars = llama_token_to_piece(context, new_token_id);
|
||||
LOGi("new_token_chars: `%s`", new_token_chars.c_str());
|
||||
auto new_token = env->NewStringUTF(new_token_chars.c_str());
|
||||
|
||||
llama_batch_clear(*batch);
|
||||
llama_batch_add(*batch, new_token_id, n_cur, { 0 }, true);
|
||||
|
||||
env->CallVoidMethod(intvar_ncur, la_int_var_inc);
|
||||
|
||||
if (llama_decode(context, *batch) != 0) {
|
||||
LOGe("llama_decode() returned null");
|
||||
}
|
||||
|
||||
return new_token;
|
||||
}
|
||||
|
||||
extern "C"
|
||||
JNIEXPORT void JNICALL
|
||||
Java_com_example_llama_Llm_kv_1cache_1clear(JNIEnv *, jobject, jlong context) {
|
||||
llama_kv_cache_clear(reinterpret_cast<llama_context *>(context));
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
package com.example.llama
|
||||
|
||||
import android.app.DownloadManager
|
||||
import android.net.Uri
|
||||
import android.util.Log
|
||||
import androidx.compose.material3.Button
|
||||
import androidx.compose.material3.Text
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.runtime.getValue
|
||||
import androidx.compose.runtime.mutableDoubleStateOf
|
||||
import androidx.compose.runtime.mutableStateOf
|
||||
import androidx.compose.runtime.remember
|
||||
import androidx.compose.runtime.rememberCoroutineScope
|
||||
import androidx.compose.runtime.setValue
|
||||
import androidx.core.database.getLongOrNull
|
||||
import androidx.core.net.toUri
|
||||
import kotlinx.coroutines.delay
|
||||
import kotlinx.coroutines.launch
|
||||
import java.io.File
|
||||
|
||||
data class Downloadable(val name: String, val source: Uri, val destination: File) {
|
||||
companion object {
|
||||
@JvmStatic
|
||||
private val tag: String? = this::class.qualifiedName
|
||||
|
||||
sealed interface State
|
||||
data object Ready: State
|
||||
data class Downloading(val id: Long): State
|
||||
data class Downloaded(val downloadable: Downloadable): State
|
||||
data class Error(val message: String): State
|
||||
|
||||
@JvmStatic
|
||||
@Composable
|
||||
fun Button(viewModel: MainViewModel, dm: DownloadManager, item: Downloadable) {
|
||||
var status: State by remember {
|
||||
mutableStateOf(
|
||||
if (item.destination.exists()) Downloaded(item)
|
||||
else Ready
|
||||
)
|
||||
}
|
||||
var progress by remember { mutableDoubleStateOf(0.0) }
|
||||
|
||||
val coroutineScope = rememberCoroutineScope()
|
||||
|
||||
suspend fun waitForDownload(result: Downloading, item: Downloadable): State {
|
||||
while (true) {
|
||||
val cursor = dm.query(DownloadManager.Query().setFilterById(result.id))
|
||||
|
||||
if (cursor == null) {
|
||||
Log.e(tag, "dm.query() returned null")
|
||||
return Error("dm.query() returned null")
|
||||
}
|
||||
|
||||
if (!cursor.moveToFirst() || cursor.count < 1) {
|
||||
cursor.close()
|
||||
Log.i(tag, "cursor.moveToFirst() returned false or cursor.count < 1, download canceled?")
|
||||
return Ready
|
||||
}
|
||||
|
||||
val pix = cursor.getColumnIndex(DownloadManager.COLUMN_BYTES_DOWNLOADED_SO_FAR)
|
||||
val tix = cursor.getColumnIndex(DownloadManager.COLUMN_TOTAL_SIZE_BYTES)
|
||||
val sofar = cursor.getLongOrNull(pix) ?: 0
|
||||
val total = cursor.getLongOrNull(tix) ?: 1
|
||||
cursor.close()
|
||||
|
||||
if (sofar == total) {
|
||||
return Downloaded(item)
|
||||
}
|
||||
|
||||
progress = (sofar * 1.0) / total
|
||||
|
||||
delay(1000L)
|
||||
}
|
||||
}
|
||||
|
||||
fun onClick() {
|
||||
when (val s = status) {
|
||||
is Downloaded -> {
|
||||
viewModel.load(item.destination.path)
|
||||
}
|
||||
|
||||
is Downloading -> {
|
||||
coroutineScope.launch {
|
||||
status = waitForDownload(s, item)
|
||||
}
|
||||
}
|
||||
|
||||
else -> {
|
||||
item.destination.delete()
|
||||
|
||||
val request = DownloadManager.Request(item.source).apply {
|
||||
setTitle("Downloading model")
|
||||
setDescription("Downloading model: ${item.name}")
|
||||
setAllowedNetworkTypes(DownloadManager.Request.NETWORK_WIFI)
|
||||
setDestinationUri(item.destination.toUri())
|
||||
}
|
||||
|
||||
viewModel.log("Saving ${item.name} to ${item.destination.path}")
|
||||
Log.i(tag, "Saving ${item.name} to ${item.destination.path}")
|
||||
|
||||
val id = dm.enqueue(request)
|
||||
status = Downloading(id)
|
||||
onClick()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Button(onClick = { onClick() }, enabled = status !is Downloading) {
|
||||
when (status) {
|
||||
is Downloading -> Text(text = "Downloading ${(progress * 100).toInt()}%")
|
||||
is Downloaded -> Text("Load ${item.name}")
|
||||
is Ready -> Text("Download ${item.name}")
|
||||
is Error -> Text("Download ${item.name}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
package com.example.llama
|
||||
|
||||
import android.util.Log
|
||||
import kotlinx.coroutines.CoroutineDispatcher
|
||||
import kotlinx.coroutines.asCoroutineDispatcher
|
||||
import kotlinx.coroutines.flow.Flow
|
||||
import kotlinx.coroutines.flow.flow
|
||||
import kotlinx.coroutines.flow.flowOn
|
||||
import kotlinx.coroutines.withContext
|
||||
import java.util.concurrent.Executors
|
||||
import kotlin.concurrent.thread
|
||||
|
||||
class Llm {
|
||||
private val tag: String? = this::class.simpleName
|
||||
|
||||
private val threadLocalState: ThreadLocal<State> = ThreadLocal.withInitial { State.Idle }
|
||||
|
||||
private val runLoop: CoroutineDispatcher = Executors.newSingleThreadExecutor {
|
||||
thread(start = false, name = "Llm-RunLoop") {
|
||||
Log.d(tag, "Dedicated thread for native code: ${Thread.currentThread().name}")
|
||||
|
||||
// No-op if called more than once.
|
||||
System.loadLibrary("llama-android")
|
||||
|
||||
// Set llama log handler to Android
|
||||
log_to_android()
|
||||
backend_init(false)
|
||||
|
||||
Log.d(tag, system_info())
|
||||
|
||||
it.run()
|
||||
}.apply {
|
||||
uncaughtExceptionHandler = Thread.UncaughtExceptionHandler { _, exception: Throwable ->
|
||||
Log.e(tag, "Unhandled exception", exception)
|
||||
}
|
||||
}
|
||||
}.asCoroutineDispatcher()
|
||||
|
||||
private val nlen: Int = 64
|
||||
|
||||
private external fun log_to_android()
|
||||
private external fun load_model(filename: String): Long
|
||||
private external fun free_model(model: Long)
|
||||
private external fun new_context(model: Long): Long
|
||||
private external fun free_context(context: Long)
|
||||
private external fun backend_init(numa: Boolean)
|
||||
private external fun backend_free()
|
||||
private external fun free_batch(batch: Long)
|
||||
private external fun new_batch(nTokens: Int, embd: Int, nSeqMax: Int): Long
|
||||
private external fun bench_model(
|
||||
context: Long,
|
||||
model: Long,
|
||||
batch: Long,
|
||||
pp: Int,
|
||||
tg: Int,
|
||||
pl: Int,
|
||||
nr: Int
|
||||
): String
|
||||
|
||||
private external fun system_info(): String
|
||||
|
||||
private external fun completion_init(
|
||||
context: Long,
|
||||
batch: Long,
|
||||
text: String,
|
||||
nLen: Int
|
||||
): Int
|
||||
|
||||
private external fun completion_loop(
|
||||
context: Long,
|
||||
batch: Long,
|
||||
nLen: Int,
|
||||
ncur: IntVar
|
||||
): String
|
||||
|
||||
private external fun kv_cache_clear(context: Long)
|
||||
|
||||
suspend fun bench(pp: Int, tg: Int, pl: Int, nr: Int = 1): String {
|
||||
return withContext(runLoop) {
|
||||
when (val state = threadLocalState.get()) {
|
||||
is State.Loaded -> {
|
||||
Log.d(tag, "bench(): $state")
|
||||
bench_model(state.context, state.model, state.batch, pp, tg, pl, nr)
|
||||
}
|
||||
|
||||
else -> throw IllegalStateException("No model loaded")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
suspend fun load(pathToModel: String) {
|
||||
withContext(runLoop) {
|
||||
when (threadLocalState.get()) {
|
||||
is State.Idle -> {
|
||||
val model = load_model(pathToModel)
|
||||
if (model == 0L) throw IllegalStateException("load_model() failed")
|
||||
|
||||
val context = new_context(model)
|
||||
if (context == 0L) throw IllegalStateException("new_context() failed")
|
||||
|
||||
val batch = new_batch(512, 0, 1)
|
||||
if (batch == 0L) throw IllegalStateException("new_batch() failed")
|
||||
|
||||
Log.i(tag, "Loaded model $pathToModel")
|
||||
threadLocalState.set(State.Loaded(model, context, batch))
|
||||
}
|
||||
else -> throw IllegalStateException("Model already loaded")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun send(message: String): Flow<String> = flow {
|
||||
when (val state = threadLocalState.get()) {
|
||||
is State.Loaded -> {
|
||||
val ncur = IntVar(completion_init(state.context, state.batch, message, nlen))
|
||||
while (ncur.value <= nlen) {
|
||||
val str = completion_loop(state.context, state.batch, nlen, ncur)
|
||||
if (str.isEmpty()) {
|
||||
break
|
||||
}
|
||||
emit(str)
|
||||
}
|
||||
kv_cache_clear(state.context)
|
||||
}
|
||||
else -> {}
|
||||
}
|
||||
}.flowOn(runLoop)
|
||||
|
||||
/**
|
||||
* Unloads the model and frees resources.
|
||||
*
|
||||
* This is a no-op if there's no model loaded.
|
||||
*/
|
||||
suspend fun unload() {
|
||||
withContext(runLoop) {
|
||||
when (val state = threadLocalState.get()) {
|
||||
is State.Loaded -> {
|
||||
free_context(state.context)
|
||||
free_model(state.model)
|
||||
free_batch(state.batch)
|
||||
|
||||
threadLocalState.set(State.Idle)
|
||||
}
|
||||
else -> {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
companion object {
|
||||
private class IntVar(value: Int) {
|
||||
@Volatile
|
||||
var value: Int = value
|
||||
private set
|
||||
|
||||
fun inc() {
|
||||
synchronized(this) {
|
||||
value += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private sealed interface State {
|
||||
data object Idle: State
|
||||
data class Loaded(val model: Long, val context: Long, val batch: Long): State
|
||||
}
|
||||
|
||||
// Enforce only one instance of Llm.
|
||||
private val _instance: Llm = Llm()
|
||||
|
||||
fun instance(): Llm = _instance
|
||||
}
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
package com.example.llama
|
||||
|
||||
import android.app.ActivityManager
|
||||
import android.app.DownloadManager
|
||||
import android.content.ClipData
|
||||
import android.content.ClipboardManager
|
||||
import android.net.Uri
|
||||
import android.os.Bundle
|
||||
import android.os.StrictMode
|
||||
import android.os.StrictMode.VmPolicy
|
||||
import android.text.format.Formatter
|
||||
import androidx.activity.ComponentActivity
|
||||
import androidx.activity.compose.setContent
|
||||
import androidx.activity.viewModels
|
||||
import androidx.compose.foundation.layout.Box
|
||||
import androidx.compose.foundation.layout.Column
|
||||
import androidx.compose.foundation.layout.Row
|
||||
import androidx.compose.foundation.layout.fillMaxSize
|
||||
import androidx.compose.foundation.layout.padding
|
||||
import androidx.compose.foundation.lazy.LazyColumn
|
||||
import androidx.compose.foundation.lazy.items
|
||||
import androidx.compose.foundation.lazy.rememberLazyListState
|
||||
import androidx.compose.material3.Button
|
||||
import androidx.compose.material3.LocalContentColor
|
||||
import androidx.compose.material3.MaterialTheme
|
||||
import androidx.compose.material3.OutlinedTextField
|
||||
import androidx.compose.material3.Surface
|
||||
import androidx.compose.material3.Text
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.ui.Modifier
|
||||
import androidx.compose.ui.unit.dp
|
||||
import androidx.core.content.getSystemService
|
||||
import com.example.llama.ui.theme.LlamaAndroidTheme
|
||||
import java.io.File
|
||||
|
||||
class MainActivity(
|
||||
activityManager: ActivityManager? = null,
|
||||
downloadManager: DownloadManager? = null,
|
||||
clipboardManager: ClipboardManager? = null,
|
||||
): ComponentActivity() {
|
||||
private val tag: String? = this::class.simpleName
|
||||
|
||||
private val activityManager by lazy { activityManager ?: getSystemService<ActivityManager>()!! }
|
||||
private val downloadManager by lazy { downloadManager ?: getSystemService<DownloadManager>()!! }
|
||||
private val clipboardManager by lazy { clipboardManager ?: getSystemService<ClipboardManager>()!! }
|
||||
|
||||
private val viewModel: MainViewModel by viewModels()
|
||||
|
||||
// Get a MemoryInfo object for the device's current memory status.
|
||||
private fun availableMemory(): ActivityManager.MemoryInfo {
|
||||
return ActivityManager.MemoryInfo().also { memoryInfo ->
|
||||
activityManager.getMemoryInfo(memoryInfo)
|
||||
}
|
||||
}
|
||||
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
|
||||
StrictMode.setVmPolicy(
|
||||
VmPolicy.Builder(StrictMode.getVmPolicy())
|
||||
.detectLeakedClosableObjects()
|
||||
.build()
|
||||
)
|
||||
|
||||
val free = Formatter.formatFileSize(this, availableMemory().availMem)
|
||||
val total = Formatter.formatFileSize(this, availableMemory().totalMem)
|
||||
|
||||
viewModel.log("Current memory: $free / $total")
|
||||
viewModel.log("Downloads directory: ${getExternalFilesDir(null)}")
|
||||
|
||||
val extFilesDir = getExternalFilesDir(null)
|
||||
|
||||
val models = listOf(
|
||||
Downloadable(
|
||||
"Phi-2 7B (Q4_0, 1.6 GiB)",
|
||||
Uri.parse("https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf?download=true"),
|
||||
File(extFilesDir, "phi-2-q4_0.gguf"),
|
||||
),
|
||||
Downloadable(
|
||||
"TinyLlama 1.1B (f16, 2.2 GiB)",
|
||||
Uri.parse("https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf?download=true"),
|
||||
File(extFilesDir, "tinyllama-1.1-f16.gguf"),
|
||||
),
|
||||
Downloadable(
|
||||
"Phi 2 DPO (Q3_K_M, 1.48 GiB)",
|
||||
Uri.parse("https://huggingface.co/TheBloke/phi-2-dpo-GGUF/resolve/main/phi-2-dpo.Q3_K_M.gguf?download=true"),
|
||||
File(extFilesDir, "phi-2-dpo.Q3_K_M.gguf")
|
||||
),
|
||||
)
|
||||
|
||||
setContent {
|
||||
LlamaAndroidTheme {
|
||||
// A surface container using the 'background' color from the theme
|
||||
Surface(
|
||||
modifier = Modifier.fillMaxSize(),
|
||||
color = MaterialTheme.colorScheme.background
|
||||
) {
|
||||
MainCompose(
|
||||
viewModel,
|
||||
clipboardManager,
|
||||
downloadManager,
|
||||
models,
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Composable
|
||||
fun MainCompose(
|
||||
viewModel: MainViewModel,
|
||||
clipboard: ClipboardManager,
|
||||
dm: DownloadManager,
|
||||
models: List<Downloadable>
|
||||
) {
|
||||
Column {
|
||||
val scrollState = rememberLazyListState()
|
||||
|
||||
Box(modifier = Modifier.weight(1f)) {
|
||||
LazyColumn(state = scrollState) {
|
||||
items(viewModel.messages) {
|
||||
Text(
|
||||
it,
|
||||
style = MaterialTheme.typography.bodyLarge.copy(color = LocalContentColor.current),
|
||||
modifier = Modifier.padding(16.dp)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
OutlinedTextField(
|
||||
value = viewModel.message,
|
||||
onValueChange = { viewModel.updateMessage(it) },
|
||||
label = { Text("Message") },
|
||||
)
|
||||
Row {
|
||||
Button({ viewModel.send() }) { Text("Send") }
|
||||
Button({ viewModel.bench(8, 4, 1) }) { Text("Bench") }
|
||||
Button({ viewModel.clear() }) { Text("Clear") }
|
||||
Button({
|
||||
viewModel.messages.joinToString("\n").let {
|
||||
clipboard.setPrimaryClip(ClipData.newPlainText("", it))
|
||||
}
|
||||
}) { Text("Copy") }
|
||||
}
|
||||
|
||||
Column {
|
||||
for (model in models) {
|
||||
Downloadable.Button(viewModel, dm, model)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
package com.example.llama
|
||||
|
||||
import android.util.Log
|
||||
import androidx.compose.runtime.getValue
|
||||
import androidx.compose.runtime.mutableStateOf
|
||||
import androidx.compose.runtime.setValue
|
||||
import androidx.lifecycle.ViewModel
|
||||
import androidx.lifecycle.viewModelScope
|
||||
import kotlinx.coroutines.flow.catch
|
||||
import kotlinx.coroutines.launch
|
||||
|
||||
class MainViewModel(private val llm: Llm = Llm.instance()): ViewModel() {
|
||||
companion object {
|
||||
@JvmStatic
|
||||
private val NanosPerSecond = 1_000_000_000.0
|
||||
}
|
||||
|
||||
private val tag: String? = this::class.simpleName
|
||||
|
||||
var messages by mutableStateOf(listOf("Initializing..."))
|
||||
private set
|
||||
|
||||
var message by mutableStateOf("")
|
||||
private set
|
||||
|
||||
override fun onCleared() {
|
||||
super.onCleared()
|
||||
|
||||
viewModelScope.launch {
|
||||
try {
|
||||
llm.unload()
|
||||
} catch (exc: IllegalStateException) {
|
||||
messages += exc.message!!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun send() {
|
||||
val text = message
|
||||
message = ""
|
||||
|
||||
// Add to messages console.
|
||||
messages += text
|
||||
messages += ""
|
||||
|
||||
viewModelScope.launch {
|
||||
llm.send(text)
|
||||
.catch {
|
||||
Log.e(tag, "send() failed", it)
|
||||
messages += it.message!!
|
||||
}
|
||||
.collect { messages = messages.dropLast(1) + (messages.last() + it) }
|
||||
}
|
||||
}
|
||||
|
||||
fun bench(pp: Int, tg: Int, pl: Int, nr: Int = 1) {
|
||||
viewModelScope.launch {
|
||||
try {
|
||||
val start = System.nanoTime()
|
||||
val warmupResult = llm.bench(pp, tg, pl, nr)
|
||||
val end = System.nanoTime()
|
||||
|
||||
messages += warmupResult
|
||||
|
||||
val warmup = (end - start).toDouble() / NanosPerSecond
|
||||
messages += "Warm up time: $warmup seconds, please wait..."
|
||||
|
||||
if (warmup > 5.0) {
|
||||
messages += "Warm up took too long, aborting benchmark"
|
||||
return@launch
|
||||
}
|
||||
|
||||
messages += llm.bench(512, 128, 1, 3)
|
||||
} catch (exc: IllegalStateException) {
|
||||
Log.e(tag, "bench() failed", exc)
|
||||
messages += exc.message!!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun load(pathToModel: String) {
|
||||
viewModelScope.launch {
|
||||
try {
|
||||
llm.load(pathToModel)
|
||||
messages += "Loaded $pathToModel"
|
||||
} catch (exc: IllegalStateException) {
|
||||
Log.e(tag, "load() failed", exc)
|
||||
messages += exc.message!!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun updateMessage(newMessage: String) {
|
||||
message = newMessage
|
||||
}
|
||||
|
||||
fun clear() {
|
||||
messages = listOf()
|
||||
}
|
||||
|
||||
fun log(message: String) {
|
||||
messages += message
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package com.example.llama.ui.theme
|
||||
|
||||
import androidx.compose.ui.graphics.Color
|
||||
|
||||
val Purple80 = Color(0xFFD0BCFF)
|
||||
val PurpleGrey80 = Color(0xFFCCC2DC)
|
||||
val Pink80 = Color(0xFFEFB8C8)
|
||||
|
||||
val Purple40 = Color(0xFF6650a4)
|
||||
val PurpleGrey40 = Color(0xFF625b71)
|
||||
val Pink40 = Color(0xFF7D5260)
|
|
@ -0,0 +1,70 @@
|
|||
package com.example.llama.ui.theme
|
||||
|
||||
import android.app.Activity
|
||||
import android.os.Build
|
||||
import androidx.compose.foundation.isSystemInDarkTheme
|
||||
import androidx.compose.material3.MaterialTheme
|
||||
import androidx.compose.material3.darkColorScheme
|
||||
import androidx.compose.material3.dynamicDarkColorScheme
|
||||
import androidx.compose.material3.dynamicLightColorScheme
|
||||
import androidx.compose.material3.lightColorScheme
|
||||
import androidx.compose.runtime.Composable
|
||||
import androidx.compose.runtime.SideEffect
|
||||
import androidx.compose.ui.graphics.toArgb
|
||||
import androidx.compose.ui.platform.LocalContext
|
||||
import androidx.compose.ui.platform.LocalView
|
||||
import androidx.core.view.WindowCompat
|
||||
|
||||
private val DarkColorScheme = darkColorScheme(
|
||||
primary = Purple80,
|
||||
secondary = PurpleGrey80,
|
||||
tertiary = Pink80
|
||||
)
|
||||
|
||||
private val LightColorScheme = lightColorScheme(
|
||||
primary = Purple40,
|
||||
secondary = PurpleGrey40,
|
||||
tertiary = Pink40
|
||||
|
||||
/* Other default colors to override
|
||||
background = Color(0xFFFFFBFE),
|
||||
surface = Color(0xFFFFFBFE),
|
||||
onPrimary = Color.White,
|
||||
onSecondary = Color.White,
|
||||
onTertiary = Color.White,
|
||||
onBackground = Color(0xFF1C1B1F),
|
||||
onSurface = Color(0xFF1C1B1F),
|
||||
*/
|
||||
)
|
||||
|
||||
@Composable
|
||||
fun LlamaAndroidTheme(
|
||||
darkTheme: Boolean = isSystemInDarkTheme(),
|
||||
// Dynamic color is available on Android 12+
|
||||
dynamicColor: Boolean = true,
|
||||
content: @Composable () -> Unit
|
||||
) {
|
||||
val colorScheme = when {
|
||||
dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
|
||||
val context = LocalContext.current
|
||||
if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
|
||||
}
|
||||
|
||||
darkTheme -> DarkColorScheme
|
||||
else -> LightColorScheme
|
||||
}
|
||||
val view = LocalView.current
|
||||
if (!view.isInEditMode) {
|
||||
SideEffect {
|
||||
val window = (view.context as Activity).window
|
||||
window.statusBarColor = colorScheme.primary.toArgb()
|
||||
WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme
|
||||
}
|
||||
}
|
||||
|
||||
MaterialTheme(
|
||||
colorScheme = colorScheme,
|
||||
typography = Typography,
|
||||
content = content
|
||||
)
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
package com.example.llama.ui.theme
|
||||
|
||||
import androidx.compose.material3.Typography
|
||||
import androidx.compose.ui.text.TextStyle
|
||||
import androidx.compose.ui.text.font.FontFamily
|
||||
import androidx.compose.ui.text.font.FontWeight
|
||||
import androidx.compose.ui.unit.sp
|
||||
|
||||
// Set of Material typography styles to start with
|
||||
val Typography = Typography(
|
||||
bodyLarge = TextStyle(
|
||||
fontFamily = FontFamily.Default,
|
||||
fontWeight = FontWeight.Normal,
|
||||
fontSize = 16.sp,
|
||||
lineHeight = 24.sp,
|
||||
letterSpacing = 0.5.sp
|
||||
)
|
||||
/* Other default text styles to override
|
||||
titleLarge = TextStyle(
|
||||
fontFamily = FontFamily.Default,
|
||||
fontWeight = FontWeight.Normal,
|
||||
fontSize = 22.sp,
|
||||
lineHeight = 28.sp,
|
||||
letterSpacing = 0.sp
|
||||
),
|
||||
labelSmall = TextStyle(
|
||||
fontFamily = FontFamily.Default,
|
||||
fontWeight = FontWeight.Medium,
|
||||
fontSize = 11.sp,
|
||||
lineHeight = 16.sp,
|
||||
letterSpacing = 0.5.sp
|
||||
)
|
||||
*/
|
||||
)
|
|
@ -0,0 +1,170 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path
|
||||
android:fillColor="#3DDC84"
|
||||
android:pathData="M0,0h108v108h-108z" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M9,0L9,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,0L19,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,0L29,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,0L39,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,0L49,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,0L59,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,0L69,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,0L79,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M89,0L89,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M99,0L99,108"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,9L108,9"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,19L108,19"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,29L108,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,39L108,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,49L108,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,59L108,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,69L108,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,79L108,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,89L108,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M0,99L108,99"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,29L89,29"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,39L89,39"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,49L89,49"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,59L89,59"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,69L89,69"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M19,79L89,79"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M29,19L29,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M39,19L39,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M49,19L49,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M59,19L59,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M69,19L69,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
<path
|
||||
android:fillColor="#00000000"
|
||||
android:pathData="M79,19L79,89"
|
||||
android:strokeWidth="0.8"
|
||||
android:strokeColor="#33FFFFFF" />
|
||||
</vector>
|
|
@ -0,0 +1,30 @@
|
|||
<vector xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:aapt="http://schemas.android.com/aapt"
|
||||
android:width="108dp"
|
||||
android:height="108dp"
|
||||
android:viewportWidth="108"
|
||||
android:viewportHeight="108">
|
||||
<path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
|
||||
<aapt:attr name="android:fillColor">
|
||||
<gradient
|
||||
android:endX="85.84757"
|
||||
android:endY="92.4963"
|
||||
android:startX="42.9492"
|
||||
android:startY="49.59793"
|
||||
android:type="linear">
|
||||
<item
|
||||
android:color="#44000000"
|
||||
android:offset="0.0" />
|
||||
<item
|
||||
android:color="#00000000"
|
||||
android:offset="1.0" />
|
||||
</gradient>
|
||||
</aapt:attr>
|
||||
</path>
|
||||
<path
|
||||
android:fillColor="#FFFFFF"
|
||||
android:fillType="nonZero"
|
||||
android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
|
||||
android:strokeWidth="1"
|
||||
android:strokeColor="#00000000" />
|
||||
</vector>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
|
||||
<background android:drawable="@drawable/ic_launcher_background" />
|
||||
<foreground android:drawable="@drawable/ic_launcher_foreground" />
|
||||
<monochrome android:drawable="@drawable/ic_launcher_foreground" />
|
||||
</adaptive-icon>
|
After Width: | Height: | Size: 1.4 KiB |
After Width: | Height: | Size: 2.8 KiB |
After Width: | Height: | Size: 982 B |
After Width: | Height: | Size: 1.7 KiB |
After Width: | Height: | Size: 1.9 KiB |
After Width: | Height: | Size: 3.8 KiB |
After Width: | Height: | Size: 2.8 KiB |
After Width: | Height: | Size: 5.8 KiB |
After Width: | Height: | Size: 3.8 KiB |
After Width: | Height: | Size: 7.6 KiB |
10
examples/llama.android/app/src/main/res/values/colors.xml
Normal file
|
@ -0,0 +1,10 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<color name="purple_200">#FFBB86FC</color>
|
||||
<color name="purple_500">#FF6200EE</color>
|
||||
<color name="purple_700">#FF3700B3</color>
|
||||
<color name="teal_200">#FF03DAC5</color>
|
||||
<color name="teal_700">#FF018786</color>
|
||||
<color name="black">#FF000000</color>
|
||||
<color name="white">#FFFFFFFF</color>
|
||||
</resources>
|
|
@ -0,0 +1,3 @@
|
|||
<resources>
|
||||
<string name="app_name">LlamaAndroid</string>
|
||||
</resources>
|
|
@ -0,0 +1,5 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
|
||||
<style name="Theme.LlamaAndroid" parent="android:Theme.Material.Light.NoActionBar" />
|
||||
</resources>
|
13
examples/llama.android/app/src/main/res/xml/backup_rules.xml
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Sample backup rules file; uncomment and customize as necessary.
|
||||
See https://developer.android.com/guide/topics/data/autobackup
|
||||
for details.
|
||||
Note: This file is ignored for devices older that API 31
|
||||
See https://developer.android.com/about/versions/12/backup-restore
|
||||
-->
|
||||
<full-backup-content>
|
||||
<!--
|
||||
<include domain="sharedpref" path="."/>
|
||||
<exclude domain="sharedpref" path="device.xml"/>
|
||||
-->
|
||||
</full-backup-content>
|
|
@ -0,0 +1,19 @@
|
|||
<?xml version="1.0" encoding="utf-8"?><!--
|
||||
Sample data extraction rules file; uncomment and customize as necessary.
|
||||
See https://developer.android.com/about/versions/12/backup-restore#xml-changes
|
||||
for details.
|
||||
-->
|
||||
<data-extraction-rules>
|
||||
<cloud-backup>
|
||||
<!-- TODO: Use <include> and <exclude> to control what is backed up.
|
||||
<include .../>
|
||||
<exclude .../>
|
||||
-->
|
||||
</cloud-backup>
|
||||
<!--
|
||||
<device-transfer>
|
||||
<include .../>
|
||||
<exclude .../>
|
||||
</device-transfer>
|
||||
-->
|
||||
</data-extraction-rules>
|
5
examples/llama.android/build.gradle.kts
Normal file
|
@ -0,0 +1,5 @@
|
|||
// Top-level build file where you can add configuration options common to all sub-projects/modules.
|
||||
plugins {
|
||||
id("com.android.application") version "8.2.0" apply false
|
||||
id("org.jetbrains.kotlin.android") version "1.9.0" apply false
|
||||
}
|
23
examples/llama.android/gradle.properties
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Project-wide Gradle settings.
|
||||
# IDE (e.g. Android Studio) users:
|
||||
# Gradle settings configured through the IDE *will override*
|
||||
# any settings specified in this file.
|
||||
# For more details on how to configure your build environment visit
|
||||
# http://www.gradle.org/docs/current/userguide/build_environment.html
|
||||
# Specifies the JVM arguments used for the daemon process.
|
||||
# The setting is particularly useful for tweaking memory settings.
|
||||
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
|
||||
# When configured, Gradle will run in incubating parallel mode.
|
||||
# This option should only be used with decoupled projects. More details, visit
|
||||
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
|
||||
# org.gradle.parallel=true
|
||||
# AndroidX package structure to make it clearer which packages are bundled with the
|
||||
# Android operating system, and which are packaged with your app's APK
|
||||
# https://developer.android.com/topic/libraries/support-library/androidx-rn
|
||||
android.useAndroidX=true
|
||||
# Kotlin code style for this project: "official" or "obsolete":
|
||||
kotlin.code.style=official
|
||||
# Enables namespacing of each library's R class so that its R class includes only the
|
||||
# resources declared in the library itself and none from the library's dependencies,
|
||||
# thereby reducing the size of the R class for that library
|
||||
android.nonTransitiveRClass=true
|
BIN
examples/llama.android/gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
6
examples/llama.android/gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
#Thu Dec 21 14:31:09 AEDT 2023
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
185
examples/llama.android/gradlew
vendored
Executable file
|
@ -0,0 +1,185 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
#
|
||||
# Copyright 2015 the original author or authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
##############################################################################
|
||||
##
|
||||
## Gradle start up script for UN*X
|
||||
##
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
# Resolve links: $0 may be a link
|
||||
PRG="$0"
|
||||
# Need this for relative symlinks.
|
||||
while [ -h "$PRG" ] ; do
|
||||
ls=`ls -ld "$PRG"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG=`dirname "$PRG"`"/$link"
|
||||
fi
|
||||
done
|
||||
SAVED="`pwd`"
|
||||
cd "`dirname \"$PRG\"`/" >/dev/null
|
||||
APP_HOME="`pwd -P`"
|
||||
cd "$SAVED" >/dev/null
|
||||
|
||||
APP_NAME="Gradle"
|
||||
APP_BASE_NAME=`basename "$0"`
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD="maximum"
|
||||
|
||||
warn () {
|
||||
echo "$*"
|
||||
}
|
||||
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
}
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "`uname`" in
|
||||
CYGWIN* )
|
||||
cygwin=true
|
||||
;;
|
||||
Darwin* )
|
||||
darwin=true
|
||||
;;
|
||||
MINGW* )
|
||||
msys=true
|
||||
;;
|
||||
NONSTOP* )
|
||||
nonstop=true
|
||||
;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
||||
else
|
||||
JAVACMD="$JAVA_HOME/bin/java"
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD="java"
|
||||
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
|
||||
MAX_FD_LIMIT=`ulimit -H -n`
|
||||
if [ $? -eq 0 ] ; then
|
||||
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
||||
MAX_FD="$MAX_FD_LIMIT"
|
||||
fi
|
||||
ulimit -n $MAX_FD
|
||||
if [ $? -ne 0 ] ; then
|
||||
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
||||
fi
|
||||
else
|
||||
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
||||
fi
|
||||
fi
|
||||
|
||||
# For Darwin, add options to specify how the application appears in the dock
|
||||
if $darwin; then
|
||||
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
||||
fi
|
||||
|
||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
|
||||
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
||||
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
||||
|
||||
JAVACMD=`cygpath --unix "$JAVACMD"`
|
||||
|
||||
# We build the pattern for arguments to be converted via cygpath
|
||||
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
||||
SEP=""
|
||||
for dir in $ROOTDIRSRAW ; do
|
||||
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
||||
SEP="|"
|
||||
done
|
||||
OURCYGPATTERN="(^($ROOTDIRS))"
|
||||
# Add a user-defined pattern to the cygpath arguments
|
||||
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
||||
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
||||
fi
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
i=0
|
||||
for arg in "$@" ; do
|
||||
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
||||
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
||||
|
||||
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
||||
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
||||
else
|
||||
eval `echo args$i`="\"$arg\""
|
||||
fi
|
||||
i=`expr $i + 1`
|
||||
done
|
||||
case $i in
|
||||
0) set -- ;;
|
||||
1) set -- "$args0" ;;
|
||||
2) set -- "$args0" "$args1" ;;
|
||||
3) set -- "$args0" "$args1" "$args2" ;;
|
||||
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
||||
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
||||
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
||||
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
||||
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
||||
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# Escape application args
|
||||
save () {
|
||||
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
|
||||
echo " "
|
||||
}
|
||||
APP_ARGS=`save "$@"`
|
||||
|
||||
# Collect all arguments for the java command, following the shell quoting and substitution rules
|
||||
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
|
||||
|
||||
exec "$JAVACMD" "$@"
|
17
examples/llama.android/settings.gradle.kts
Normal file
|
@ -0,0 +1,17 @@
|
|||
pluginManagement {
|
||||
repositories {
|
||||
google()
|
||||
mavenCentral()
|
||||
gradlePluginPortal()
|
||||
}
|
||||
}
|
||||
dependencyResolutionManagement {
|
||||
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
|
||||
repositories {
|
||||
google()
|
||||
mavenCentral()
|
||||
}
|
||||
}
|
||||
|
||||
rootProject.name = "LlamaAndroid"
|
||||
include(":app")
|
|
@ -428,6 +428,7 @@ static std::vector<float> hellaswag_evaluate_tokens(
|
|||
for (size_t i_chunk = 0; i_chunk < n_chunk; ++i_chunk) {
|
||||
size_t n_tokens = tokens.size() - i_chunk * n_batch;
|
||||
n_tokens = std::min(n_tokens, size_t(n_batch));
|
||||
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||
if (llama_decode(ctx, llama_batch_get_one(tokens.data() + i_chunk * n_batch, n_tokens, n_past, 0))) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return {};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Function calling example using pydantic models.
|
||||
|
||||
import datetime
|
||||
import json
|
||||
from enum import Enum
|
||||
from typing import Union, Optional
|
||||
|
@ -8,7 +8,8 @@ import requests
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
import importlib
|
||||
from pydantic_models_to_grammar import generate_gbnf_grammar_and_documentation
|
||||
from pydantic_models_to_grammar import generate_gbnf_grammar_and_documentation, convert_dictionary_to_pydantic_model, add_run_method_to_dynamic_model, create_dynamic_model_from_function
|
||||
|
||||
|
||||
# Function to get completion on the llama.cpp server with grammar.
|
||||
def create_completion(prompt, grammar):
|
||||
|
@ -134,3 +135,121 @@ text = create_completion(prompt=prompt, grammar=gbnf_grammar)
|
|||
json_data = json.loads(text)
|
||||
|
||||
print(Book(**json_data))
|
||||
# An example for parallel function calling with a Python function, a pydantic function model and an OpenAI like function definition.
|
||||
|
||||
def get_current_datetime(output_format: Optional[str] = None):
|
||||
"""
|
||||
Get the current date and time in the given format.
|
||||
Args:
|
||||
output_format: formatting string for the date and time, defaults to '%Y-%m-%d %H:%M:%S'
|
||||
"""
|
||||
if output_format is None:
|
||||
output_format = '%Y-%m-%d %H:%M:%S'
|
||||
return datetime.datetime.now().strftime(output_format)
|
||||
|
||||
|
||||
# Enum for the calculator tool.
|
||||
class MathOperation(Enum):
|
||||
ADD = "add"
|
||||
SUBTRACT = "subtract"
|
||||
MULTIPLY = "multiply"
|
||||
DIVIDE = "divide"
|
||||
|
||||
|
||||
|
||||
# Simple pydantic calculator tool for the agent that can add, subtract, multiply, and divide. Docstring and description of fields will be used in system prompt.
|
||||
class Calculator(BaseModel):
|
||||
"""
|
||||
Perform a math operation on two numbers.
|
||||
"""
|
||||
number_one: Union[int, float] = Field(..., description="First number.")
|
||||
operation: MathOperation = Field(..., description="Math operation to perform.")
|
||||
number_two: Union[int, float] = Field(..., description="Second number.")
|
||||
|
||||
def run(self):
|
||||
if self.operation == MathOperation.ADD:
|
||||
return self.number_one + self.number_two
|
||||
elif self.operation == MathOperation.SUBTRACT:
|
||||
return self.number_one - self.number_two
|
||||
elif self.operation == MathOperation.MULTIPLY:
|
||||
return self.number_one * self.number_two
|
||||
elif self.operation == MathOperation.DIVIDE:
|
||||
return self.number_one / self.number_two
|
||||
else:
|
||||
raise ValueError("Unknown operation.")
|
||||
|
||||
|
||||
# Example function to get the weather
|
||||
def get_current_weather(location, unit):
|
||||
"""Get the current weather in a given location"""
|
||||
if "London" in location:
|
||||
return json.dumps({"location": "London", "temperature": "42", "unit": unit.value})
|
||||
elif "New York" in location:
|
||||
return json.dumps({"location": "New York", "temperature": "24", "unit": unit.value})
|
||||
elif "North Pole" in location:
|
||||
return json.dumps({"location": "North Pole", "temperature": "-42", "unit": unit.value})
|
||||
else:
|
||||
return json.dumps({"location": location, "temperature": "unknown"})
|
||||
|
||||
|
||||
# Here is a function definition in OpenAI style
|
||||
current_weather_tool = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
# Convert OpenAI function definition into pydantic model
|
||||
current_weather_tool_model = convert_dictionary_to_pydantic_model(current_weather_tool)
|
||||
# Add the actual function to a pydantic model
|
||||
current_weather_tool_model = add_run_method_to_dynamic_model(current_weather_tool_model, get_current_weather)
|
||||
|
||||
# Convert normal Python function to a pydantic model
|
||||
current_datetime_model = create_dynamic_model_from_function(get_current_datetime)
|
||||
|
||||
tool_list = [SendMessageToUser, Calculator, current_datetime_model, current_weather_tool_model]
|
||||
|
||||
|
||||
gbnf_grammar, documentation = generate_gbnf_grammar_and_documentation(
|
||||
pydantic_model_list=tool_list, outer_object_name="function",
|
||||
outer_object_content="params", model_prefix="Function", fields_prefix="Parameters", list_of_outputs=True)
|
||||
|
||||
system_message = "You are an advanced AI assistant. You are interacting with the user and with your environment by calling functions. You call functions by writing JSON objects, which represent specific function calls.\nBelow is a list of your available function calls:\n\n" + documentation
|
||||
|
||||
|
||||
text = """Get the date and time, get the current weather in celsius in London and solve the following calculation: 42 * 42"""
|
||||
prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant"
|
||||
|
||||
text = create_completion(prompt=prompt, grammar=gbnf_grammar)
|
||||
|
||||
json_data = json.loads(text)
|
||||
|
||||
print(json_data)
|
||||
# Should output something like this:
|
||||
# [{'function': 'get_current_datetime', 'params': {'output_format': '%Y-%m-%d %H:%M:%S'}}, {'function': 'get_current_weather', 'params': {'location': 'London', 'unit': 'celsius'}}, {'function': 'Calculator', 'params': {'number_one': 42, 'operation': 'multiply', 'number_two': 42}}]
|
||||
|
||||
|
||||
for call in json_data:
|
||||
if call["function"] == "Calculator":
|
||||
print(Calculator(**call["params"]).run())
|
||||
elif call["function"] == "get_current_datetime":
|
||||
print(current_datetime_model(**call["params"]).run())
|
||||
elif call["function"] == "get_current_weather":
|
||||
print(current_weather_tool_model(**call["params"]).run())
|
||||
# Should output something like this:
|
||||
# 2024-01-14 13:36:06
|
||||
# {"location": "London", "temperature": "42", "unit": "celsius"}
|
||||
# 1764
|
||||
|
|
|
@ -6,49 +6,11 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// a function that can be called for every computed node during graph evaluation
|
||||
// the user can choose to whether to observe the data of the node depending on the tensor parameters
|
||||
static bool observe_compute(struct ggml_tensor * t, bool ask, void * user_data) {
|
||||
GGML_UNUSED(user_data);
|
||||
|
||||
// the scheduler is asking us if we want to observe this node
|
||||
if (ask) {
|
||||
// check if name contains soft_max (customize to your needs)
|
||||
return strstr(t->name, "soft_max") != 0;
|
||||
}
|
||||
|
||||
// print the node info
|
||||
printf("%s: t->name = %32s, t->op = %12s, [%5d, %5d, %5d, %5d]\n",
|
||||
__func__, t->name, ggml_op_name(t->op), (int) t->ne[0], (int) t->ne[1], (int) t->ne[2], (int) t->ne[3]);
|
||||
|
||||
// this will copy the data to host memory (if needed)
|
||||
static std::vector<float> t_data;
|
||||
|
||||
const bool is_host = ggml_backend_buffer_is_host(t->buffer);
|
||||
|
||||
if (!is_host || !ggml_is_contiguous(t)) {
|
||||
t_data.resize(ggml_nelements(t));
|
||||
ggml_backend_tensor_get(t, t_data.data(), 0, ggml_nbytes(t));
|
||||
}
|
||||
|
||||
const float * data = is_host ? (const float *) t->data : t_data.data();
|
||||
|
||||
// print first row
|
||||
for (int i = 0; i < t->ne[0]; i++) {
|
||||
printf("%8.4f ", data[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
|
||||
bool observe = false;
|
||||
|
||||
if (argc == 1 || argv[1][0] == '-') {
|
||||
printf("usage: %s MODEL_PATH [PROMPT] [OBSERV]\n" , argv[0]);
|
||||
printf("usage: %s MODEL_PATH [PROMPT]\n" , argv[0]);
|
||||
return 1 ;
|
||||
}
|
||||
|
||||
|
@ -60,10 +22,6 @@ int main(int argc, char ** argv) {
|
|||
params.prompt = argv[2];
|
||||
}
|
||||
|
||||
if (argc >= 4) {
|
||||
observe = atoi(argv[3]);
|
||||
}
|
||||
|
||||
if (params.prompt.empty()) {
|
||||
params.prompt = "Hello my name is";
|
||||
}
|
||||
|
@ -79,7 +37,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
llama_model_params model_params = llama_model_default_params();
|
||||
|
||||
model_params.n_gpu_layers = 99; // offload all layers to the GPU
|
||||
// model_params.n_gpu_layers = 99; // offload all layers to the GPU
|
||||
|
||||
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
|
||||
|
||||
|
@ -97,9 +55,6 @@ int main(int argc, char ** argv) {
|
|||
ctx_params.n_threads = params.n_threads;
|
||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
|
||||
ctx_params.cb_eval = observe ? observe_compute : NULL;
|
||||
ctx_params.cb_eval_user_data = NULL;
|
||||
|
||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||
|
||||
if (ctx == NULL) {
|
||||
|
|
|
@ -65,6 +65,10 @@ int main(int argc, char ** argv) {
|
|||
// load the draft model
|
||||
params.model = params.model_draft;
|
||||
params.n_gpu_layers = params.n_gpu_layers_draft;
|
||||
if (params.n_threads_draft > 0) {
|
||||
params.n_threads = params.n_threads_draft;
|
||||
}
|
||||
params.n_threads_batch = params.n_threads_batch_draft;
|
||||
std::tie(model_dft, ctx_dft) = llama_init_from_gpt_params(params);
|
||||
|
||||
{
|
||||
|
|