Merge commit '31f27758fa
' into ceb/nomic-vulkan
This commit is contained in:
commit
3959283eed
62 changed files with 10619 additions and 2566 deletions
|
@ -1258,9 +1258,9 @@ static struct ggml_tensor * forward_lora(
|
|||
}
|
||||
|
||||
static void sample_softmax(struct ggml_tensor * logits, struct ggml_tensor * probs, struct ggml_tensor * best_samples) {
|
||||
assert(logits->n_dims == 2);
|
||||
assert(probs->n_dims == 2);
|
||||
assert(best_samples->n_dims == 1);
|
||||
assert(ggml_is_matrix(logits));
|
||||
assert(ggml_is_matrix(probs));
|
||||
assert(ggml_is_vector(best_samples));
|
||||
assert(logits->ne[1] == best_samples->ne[0]);
|
||||
assert(logits->ne[0] == probs->ne[0]);
|
||||
assert(logits->ne[1] == probs->ne[1]);
|
||||
|
@ -1292,9 +1292,9 @@ static void sample_softmax_batch(
|
|||
struct ggml_context * ctx, struct ggml_tensor * logits, struct ggml_tensor * probs,
|
||||
struct ggml_tensor * best_samples
|
||||
) {
|
||||
GGML_ASSERT(best_samples->n_dims == 2);
|
||||
GGML_ASSERT(logits->n_dims == 3);
|
||||
GGML_ASSERT(probs->n_dims == 3);
|
||||
GGML_ASSERT(ggml_is_matrix(best_samples));
|
||||
GGML_ASSERT(ggml_is_3d(logits));
|
||||
GGML_ASSERT(ggml_is_3d(probs));
|
||||
int n_tokens = best_samples->ne[0];
|
||||
int n_batch = best_samples->ne[1];
|
||||
int n_vocab = logits->ne[0];
|
||||
|
@ -1334,7 +1334,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
|
|||
}
|
||||
|
||||
static void print_matrix(struct ggml_tensor * probs) {
|
||||
assert(probs->n_dims == 2);
|
||||
assert(ggml_is_matrix(probs));
|
||||
for (int i = 0; i < probs->ne[1]; ++i) {
|
||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||
float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k);
|
||||
|
@ -1386,8 +1386,8 @@ static void get_example_targets(int example_id, struct ggml_tensor * tokens_inpu
|
|||
static void get_example_targets_batch(
|
||||
struct ggml_context * ctx, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * targets
|
||||
) {
|
||||
GGML_ASSERT(tokens_input->n_dims == 2);
|
||||
GGML_ASSERT( targets->n_dims == 3);
|
||||
GGML_ASSERT(ggml_is_matrix(tokens_input));
|
||||
GGML_ASSERT(ggml_is_3d(targets));
|
||||
int n_tokens = tokens_input->ne[0];
|
||||
int n_batch = tokens_input->ne[1];
|
||||
GGML_ASSERT(n_tokens == targets->ne[1]);
|
||||
|
|
|
@ -129,13 +129,13 @@ int main(int argc, char ** argv) {
|
|||
const ggml_type qtype = GGML_TYPE_Q4_1;
|
||||
|
||||
size_t ctx_size = 0;
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32);
|
||||
ctx_size += sizex*sizez*ggml_type_sizef(GGML_TYPE_F32);
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(qtype);
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(qtype);
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
|
||||
ctx_size += sizex*sizey*ggml_type_sizef(GGML_TYPE_F32); // BLAS
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey);
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey);
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizez);
|
||||
ctx_size += ggml_row_size(qtype, sizex*sizey);
|
||||
ctx_size += ggml_row_size(qtype, sizex*sizey);
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS
|
||||
ctx_size += ggml_row_size(GGML_TYPE_F32, sizex*sizey); // BLAS
|
||||
ctx_size += 1024*1024*16;
|
||||
|
||||
printf("Allocating Memory of size %zi bytes, %zi MB\n",ctx_size, (ctx_size/1024/1024));
|
||||
|
|
|
@ -427,7 +427,7 @@ static void print_row(struct ggml_tensor * probs, int i) {
|
|||
}
|
||||
|
||||
static void print_matrix(struct ggml_tensor * probs) {
|
||||
assert(probs->n_dims == 2);
|
||||
assert(ggml_is_matrix(probs));
|
||||
for (int i = 0; i < probs->ne[1]; ++i) {
|
||||
for (int k = 0; k < probs->ne[0]; ++k) {
|
||||
float p = get_f32_2d(probs, k, i);
|
||||
|
@ -639,7 +639,7 @@ static void load_vocab(const char *filename, Config *config, struct llama_vocab
|
|||
|
||||
static void convert_weights_ak_to_gg(struct ggml_tensor * gg_weights, const float * karpathy_weights) {
|
||||
int ct;
|
||||
switch (gg_weights->n_dims){
|
||||
switch (ggml_n_dims(gg_weights)) {
|
||||
case 1:
|
||||
ct = 0;
|
||||
for (int i0 = 0; i0 < gg_weights->ne[0]; i0++){
|
||||
|
|
|
@ -1110,7 +1110,7 @@ static void write_tensor(struct llama_file * file, struct ggml_tensor * tensor,
|
|||
name = ggml_get_name(tensor);
|
||||
}
|
||||
uint32_t name_len = strlen(name);
|
||||
uint32_t nd = tensor->n_dims;
|
||||
uint32_t nd = ggml_n_dims(tensor);
|
||||
uint32_t ne[4] = { (uint32_t)tensor->ne[0],
|
||||
(uint32_t)tensor->ne[1],
|
||||
(uint32_t)tensor->ne[2],
|
||||
|
@ -1620,8 +1620,6 @@ int main(int argc, char ** argv) {
|
|||
opt->params.adam.gclip = params.common.adam_gclip;
|
||||
opt->params.adam.eps_f = params.common.adam_eps_f;
|
||||
|
||||
ggml_allocr * alloc = NULL;
|
||||
|
||||
printf("%s: init model\n", __func__);
|
||||
bool existed = load_checkpoint_lora_file(params.common.fn_checkpoint_in, &model, &lora, train);
|
||||
|
||||
|
@ -1725,10 +1723,9 @@ int main(int argc, char ** argv) {
|
|||
|
||||
// allocate input tensors
|
||||
mem_input_data.resize(max_input_size);
|
||||
alloc = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment);
|
||||
ggml_allocr_alloc(alloc, tokens_input);
|
||||
ggml_allocr_alloc(alloc, target_probs);
|
||||
ggml_allocr_free(alloc);
|
||||
ggml_allocr_t alloc_inps = ggml_allocr_new(mem_input_data.data(), mem_input_data.size(), tensor_alignment);
|
||||
ggml_allocr_alloc(alloc_inps, tokens_input);
|
||||
ggml_allocr_alloc(alloc_inps, target_probs);
|
||||
|
||||
// context for compute tensors without their data
|
||||
const size_t estimated_compute_size_wo_data = (
|
||||
|
@ -1755,7 +1752,7 @@ int main(int argc, char ** argv) {
|
|||
// find best evaluation order
|
||||
for (unsigned order = 0; order < (unsigned) GGML_CGRAPH_EVAL_ORDER_COUNT; ++order) {
|
||||
ctx_compute = ggml_init(ctx_compute_params);
|
||||
alloc = ggml_allocr_new_measure(tensor_alignment);
|
||||
ggml_allocr_t alloc = ggml_allocr_new_measure(tensor_alignment);
|
||||
gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
||||
gf->order = (enum ggml_cgraph_eval_order) order;
|
||||
gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
||||
|
@ -1788,7 +1785,7 @@ int main(int argc, char ** argv) {
|
|||
// allocate compute tensors
|
||||
mem_compute_data.resize(max_compute_size);
|
||||
ctx_compute = ggml_init(ctx_compute_params);
|
||||
alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment);
|
||||
ggml_allocr_t alloc = ggml_allocr_new(mem_compute_data.data(), mem_compute_data.size(), tensor_alignment);
|
||||
gf = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
||||
gf->order = best_order;
|
||||
gb = ggml_new_graph_custom(ctx_compute, LLAMA_TRAIN_MAX_NODES, true);
|
||||
|
@ -1804,6 +1801,8 @@ int main(int argc, char ** argv) {
|
|||
params.common.use_checkpointing
|
||||
);
|
||||
ggml_allocr_free(alloc);
|
||||
ggml_allocr_free(alloc_inps);
|
||||
|
||||
|
||||
// tokenize data
|
||||
std::vector<llama_token> train_tokens;
|
||||
|
|
|
@ -195,7 +195,7 @@ static bool gguf_ex_read_1(const std::string & fname) {
|
|||
|
||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||
|
||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, cur->n_dims, cur->name, cur->data);
|
||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
|
||||
|
||||
// print first 10 elements
|
||||
const float * data = (const float *) cur->data;
|
||||
|
|
1
examples/llama.swiftui/.gitignore
vendored
1
examples/llama.swiftui/.gitignore
vendored
|
@ -1 +1,2 @@
|
|||
xcuserdata
|
||||
xcshareddata
|
||||
|
|
|
@ -6,16 +6,34 @@ enum LlamaError: Error {
|
|||
case couldNotInitializeContext
|
||||
}
|
||||
|
||||
func llama_batch_clear(_ batch: inout llama_batch) {
|
||||
batch.n_tokens = 0
|
||||
}
|
||||
|
||||
func llama_batch_add(_ batch: inout llama_batch, _ id: llama_token, _ pos: llama_pos, _ seq_ids: [llama_seq_id], _ logits: Bool) {
|
||||
batch.token [Int(batch.n_tokens)] = id
|
||||
batch.pos [Int(batch.n_tokens)] = pos
|
||||
batch.n_seq_id[Int(batch.n_tokens)] = Int32(seq_ids.count)
|
||||
for i in 0..<seq_ids.count {
|
||||
batch.seq_id[Int(batch.n_tokens)]![Int(i)] = seq_ids[i]
|
||||
}
|
||||
batch.logits [Int(batch.n_tokens)] = logits ? 1 : 0
|
||||
|
||||
batch.n_tokens += 1
|
||||
}
|
||||
|
||||
actor LlamaContext {
|
||||
private var model: OpaquePointer
|
||||
private var context: OpaquePointer
|
||||
private var batch: llama_batch
|
||||
private var tokens_list: [llama_token]
|
||||
|
||||
/// This variable is used to store temporarily invalid cchars
|
||||
private var temporary_invalid_cchars: [CChar]
|
||||
|
||||
var n_len: Int32 = 512
|
||||
var n_len: Int32 = 64
|
||||
var n_cur: Int32 = 0
|
||||
|
||||
var n_decode: Int32 = 0
|
||||
|
||||
init(model: OpaquePointer, context: OpaquePointer) {
|
||||
|
@ -27,25 +45,34 @@ actor LlamaContext {
|
|||
}
|
||||
|
||||
deinit {
|
||||
llama_batch_free(batch)
|
||||
llama_free(context)
|
||||
llama_free_model(model)
|
||||
llama_backend_free()
|
||||
}
|
||||
|
||||
static func createContext(path: String) throws -> LlamaContext {
|
||||
static func create_context(path: String) throws -> LlamaContext {
|
||||
llama_backend_init(false)
|
||||
let model_params = llama_model_default_params()
|
||||
var model_params = llama_model_default_params()
|
||||
|
||||
#if targetEnvironment(simulator)
|
||||
model_params.n_gpu_layers = 0
|
||||
print("Running on simulator, force use n_gpu_layers = 0")
|
||||
#endif
|
||||
let model = llama_load_model_from_file(path, model_params)
|
||||
guard let model else {
|
||||
print("Could not load model at \(path)")
|
||||
throw LlamaError.couldNotInitializeContext
|
||||
}
|
||||
|
||||
let n_threads = max(1, min(8, ProcessInfo.processInfo.processorCount - 2))
|
||||
print("Using \(n_threads) threads")
|
||||
|
||||
var ctx_params = llama_context_default_params()
|
||||
ctx_params.seed = 1234
|
||||
ctx_params.seed = 1234
|
||||
ctx_params.n_ctx = 2048
|
||||
ctx_params.n_threads = 8
|
||||
ctx_params.n_threads_batch = 8
|
||||
ctx_params.n_threads = UInt32(n_threads)
|
||||
ctx_params.n_threads_batch = UInt32(n_threads)
|
||||
|
||||
let context = llama_new_context_with_model(model, ctx_params)
|
||||
guard let context else {
|
||||
|
@ -56,6 +83,26 @@ actor LlamaContext {
|
|||
return LlamaContext(model: model, context: context)
|
||||
}
|
||||
|
||||
func model_info() -> String {
|
||||
let result = UnsafeMutablePointer<Int8>.allocate(capacity: 256)
|
||||
result.initialize(repeating: Int8(0), count: 256)
|
||||
defer {
|
||||
result.deallocate()
|
||||
}
|
||||
|
||||
// TODO: this is probably very stupid way to get the string from C
|
||||
|
||||
let nChars = llama_model_desc(model, result, 256)
|
||||
let bufferPointer = UnsafeBufferPointer(start: result, count: Int(nChars))
|
||||
|
||||
var SwiftString = ""
|
||||
for char in bufferPointer {
|
||||
SwiftString.append(Character(UnicodeScalar(UInt8(char))))
|
||||
}
|
||||
|
||||
return SwiftString
|
||||
}
|
||||
|
||||
func get_n_tokens() -> Int32 {
|
||||
return batch.n_tokens;
|
||||
}
|
||||
|
@ -79,16 +126,11 @@ actor LlamaContext {
|
|||
print(String(cString: token_to_piece(token: id) + [0]))
|
||||
}
|
||||
|
||||
// batch = llama_batch_init(512, 0) // done in init()
|
||||
batch.n_tokens = Int32(tokens_list.count)
|
||||
llama_batch_clear(&batch)
|
||||
|
||||
for i1 in 0..<batch.n_tokens {
|
||||
for i1 in 0..<tokens_list.count {
|
||||
let i = Int(i1)
|
||||
batch.token[i] = tokens_list[i]
|
||||
batch.pos[i] = i1
|
||||
batch.n_seq_id[Int(i)] = 1
|
||||
batch.seq_id[Int(i)]![0] = 0
|
||||
batch.logits[i] = 0
|
||||
llama_batch_add(&batch, tokens_list[i], Int32(i), [0], false)
|
||||
}
|
||||
batch.logits[Int(batch.n_tokens) - 1] = 1 // true
|
||||
|
||||
|
@ -141,18 +183,11 @@ actor LlamaContext {
|
|||
print(new_token_str)
|
||||
// tokens_list.append(new_token_id)
|
||||
|
||||
batch.n_tokens = 0
|
||||
|
||||
batch.token[Int(batch.n_tokens)] = new_token_id
|
||||
batch.pos[Int(batch.n_tokens)] = n_cur
|
||||
batch.n_seq_id[Int(batch.n_tokens)] = 1
|
||||
batch.seq_id[Int(batch.n_tokens)]![0] = 0
|
||||
batch.logits[Int(batch.n_tokens)] = 1 // true
|
||||
batch.n_tokens += 1
|
||||
llama_batch_clear(&batch)
|
||||
llama_batch_add(&batch, new_token_id, n_cur, [0], true)
|
||||
|
||||
n_decode += 1
|
||||
|
||||
n_cur += 1
|
||||
n_cur += 1
|
||||
|
||||
if llama_decode(context, batch) != 0 {
|
||||
print("failed to evaluate llama!")
|
||||
|
@ -161,14 +196,111 @@ actor LlamaContext {
|
|||
return new_token_str
|
||||
}
|
||||
|
||||
func bench(pp: Int, tg: Int, pl: Int, nr: Int = 1) -> String {
|
||||
var pp_avg: Double = 0
|
||||
var tg_avg: Double = 0
|
||||
|
||||
var pp_std: Double = 0
|
||||
var tg_std: Double = 0
|
||||
|
||||
for _ in 0..<nr {
|
||||
// bench prompt processing
|
||||
|
||||
llama_batch_clear(&batch)
|
||||
|
||||
let n_tokens = pp
|
||||
|
||||
for i in 0..<n_tokens {
|
||||
llama_batch_add(&batch, 0, Int32(i), [0], false)
|
||||
}
|
||||
batch.logits[Int(batch.n_tokens) - 1] = 1 // true
|
||||
|
||||
llama_kv_cache_clear(context)
|
||||
|
||||
let t_pp_start = ggml_time_us()
|
||||
|
||||
if llama_decode(context, batch) != 0 {
|
||||
print("llama_decode() failed during prompt")
|
||||
}
|
||||
|
||||
let t_pp_end = ggml_time_us()
|
||||
|
||||
// bench text generation
|
||||
|
||||
llama_kv_cache_clear(context)
|
||||
|
||||
let t_tg_start = ggml_time_us()
|
||||
|
||||
for i in 0..<tg {
|
||||
llama_batch_clear(&batch)
|
||||
|
||||
for j in 0..<pl {
|
||||
llama_batch_add(&batch, 0, Int32(i), [Int32(j)], true)
|
||||
}
|
||||
|
||||
if llama_decode(context, batch) != 0 {
|
||||
print("llama_decode() failed during text generation")
|
||||
}
|
||||
}
|
||||
|
||||
let t_tg_end = ggml_time_us()
|
||||
|
||||
llama_kv_cache_clear(context)
|
||||
|
||||
let t_pp = Double(t_pp_end - t_pp_start) / 1000000.0
|
||||
let t_tg = Double(t_tg_end - t_tg_start) / 1000000.0
|
||||
|
||||
let speed_pp = Double(pp) / t_pp
|
||||
let speed_tg = Double(pl*tg) / t_tg
|
||||
|
||||
pp_avg += speed_pp
|
||||
tg_avg += speed_tg
|
||||
|
||||
pp_std += speed_pp * speed_pp
|
||||
tg_std += speed_tg * speed_tg
|
||||
|
||||
print("pp \(speed_pp) t/s, tg \(speed_tg) t/s")
|
||||
}
|
||||
|
||||
pp_avg /= Double(nr)
|
||||
tg_avg /= Double(nr)
|
||||
|
||||
if nr > 1 {
|
||||
pp_std = sqrt(pp_std / Double(nr - 1) - pp_avg * pp_avg * Double(nr) / Double(nr - 1))
|
||||
tg_std = sqrt(tg_std / Double(nr - 1) - tg_avg * tg_avg * Double(nr) / Double(nr - 1))
|
||||
} else {
|
||||
pp_std = 0
|
||||
tg_std = 0
|
||||
}
|
||||
|
||||
let model_desc = model_info();
|
||||
let model_size = String(format: "%.2f GiB", Double(llama_model_size(model)) / 1024.0 / 1024.0 / 1024.0);
|
||||
let model_n_params = String(format: "%.2f B", Double(llama_model_n_params(model)) / 1e9);
|
||||
let backend = "Metal";
|
||||
let pp_avg_str = String(format: "%.2f", pp_avg);
|
||||
let tg_avg_str = String(format: "%.2f", tg_avg);
|
||||
let pp_std_str = String(format: "%.2f", pp_std);
|
||||
let tg_std_str = String(format: "%.2f", tg_std);
|
||||
|
||||
var result = ""
|
||||
|
||||
result += String("| model | size | params | backend | test | t/s |\n")
|
||||
result += String("| --- | --- | --- | --- | --- | --- |\n")
|
||||
result += String("| \(model_desc) | \(model_size) | \(model_n_params) | \(backend) | pp \(pp) | \(pp_avg_str) ± \(pp_std_str) |\n")
|
||||
result += String("| \(model_desc) | \(model_size) | \(model_n_params) | \(backend) | tg \(tg) | \(tg_avg_str) ± \(tg_std_str) |\n")
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
func clear() {
|
||||
tokens_list.removeAll()
|
||||
temporary_invalid_cchars.removeAll()
|
||||
llama_kv_cache_clear(context)
|
||||
}
|
||||
|
||||
private func tokenize(text: String, add_bos: Bool) -> [llama_token] {
|
||||
let utf8Count = text.utf8.count
|
||||
let n_tokens = utf8Count + (add_bos ? 1 : 0)
|
||||
let n_tokens = utf8Count + (add_bos ? 1 : 0) + 1
|
||||
let tokens = UnsafeMutablePointer<llama_token>.allocate(capacity: n_tokens)
|
||||
let tokenCount = llama_tokenize(model, text, Int32(utf8Count), tokens, Int32(n_tokens), add_bos, false)
|
||||
|
||||
|
|
|
@ -1,481 +1,483 @@
|
|||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 56;
|
||||
objects = {
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 56;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; };
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; };
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; };
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; };
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; };
|
||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; };
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 549479C52AC9E0F200E0F78B /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc -DGGML_SWIFT -DGGML_USE_METAL -O3"; }; };
|
||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; };
|
||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; };
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; };
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */; };
|
||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A39BE092AC7601000BFEB40 /* Accelerate.framework */; };
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; };
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; };
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; };
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; };
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; };
|
||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 549479CA2AC9E16000E0F78B /* Metal.framework */; };
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 549479C52AC9E0F200E0F78B /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc -DGGML_SWIFT -DGGML_USE_METAL -O3"; }; };
|
||||
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */; };
|
||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */; };
|
||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A1C83782AC328BD0096AF73 /* ContentView.swift */; };
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837A2AC328BE0096AF73 /* Assets.xcassets */; };
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */; };
|
||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8A39BE092AC7601000BFEB40 /* Accelerate.framework */; };
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; };
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; };
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = "<group>"; };
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = "<group>"; };
|
||||
542376092B0D9C40008E6A1C /* ggml-backend.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../ggml-backend.h"; sourceTree = "<group>"; };
|
||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../ggml-backend.c"; sourceTree = "<group>"; };
|
||||
542EA09B2AC8723900A8AEE9 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../ggml.c; sourceTree = "<group>"; };
|
||||
542EA09C2AC8723900A8AEE9 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../ggml.h; sourceTree = "<group>"; };
|
||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../ggml-alloc.h"; sourceTree = "<group>"; };
|
||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../ggml-alloc.c"; sourceTree = "<group>"; };
|
||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = llama.cpp; path = ../../llama.cpp; sourceTree = "<group>"; };
|
||||
542EA0A22AC8729100A8AEE9 /* llama.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = llama.h; path = ../../llama.h; sourceTree = "<group>"; };
|
||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../ggml-metal.m"; sourceTree = "<group>"; };
|
||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../ggml-metal.h"; sourceTree = "<group>"; };
|
||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../ggml-metal.metal"; sourceTree = "<group>"; };
|
||||
549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
|
||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "bridging-header.h"; sourceTree = "<group>"; };
|
||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = "<group>"; };
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||
8A3F841F2AC4C824005E2EE8 /* llama-2-7b-chat.Q2_K.gguf */ = {isa = PBXFileReference; lastKnownFileType = file; path = "llama-2-7b-chat.Q2_K.gguf"; sourceTree = "<group>"; };
|
||||
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = "<group>"; };
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = "<group>"; };
|
||||
542376092B0D9C40008E6A1C /* ggml-backend.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../ggml-backend.h"; sourceTree = "<group>"; };
|
||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-backend.c"; path = "../../ggml-backend.c"; sourceTree = "<group>"; };
|
||||
542EA09B2AC8723900A8AEE9 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../ggml.c; sourceTree = "<group>"; };
|
||||
542EA09C2AC8723900A8AEE9 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../ggml.h; sourceTree = "<group>"; };
|
||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../ggml-alloc.h"; sourceTree = "<group>"; };
|
||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../ggml-alloc.c"; sourceTree = "<group>"; };
|
||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = llama.cpp; path = ../../llama.cpp; sourceTree = "<group>"; };
|
||||
542EA0A22AC8729100A8AEE9 /* llama.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = llama.h; path = ../../llama.h; sourceTree = "<group>"; };
|
||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../ggml-metal.m"; sourceTree = "<group>"; };
|
||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../ggml-metal.h"; sourceTree = "<group>"; };
|
||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../ggml-metal.metal"; sourceTree = "<group>"; };
|
||||
549479CA2AC9E16000E0F78B /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
|
||||
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DownloadButton.swift; sourceTree = "<group>"; };
|
||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "bridging-header.h"; sourceTree = "<group>"; };
|
||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = llama.swiftui.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = llama_swiftuiApp.swift; sourceTree = "<group>"; };
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
|
||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
|
||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
|
||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
|
||||
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
8A1C83702AC328BD0096AF73 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */,
|
||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
8A1C83702AC328BD0096AF73 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
549479CB2AC9E16000E0F78B /* Metal.framework in Frameworks */,
|
||||
8A39BE0A2AC7601100BFEB40 /* Accelerate.framework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */,
|
||||
542376092B0D9C40008E6A1C /* ggml-backend.h */,
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */,
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */,
|
||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */,
|
||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */,
|
||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */,
|
||||
542EA09B2AC8723900A8AEE9 /* ggml.c */,
|
||||
542EA09C2AC8723900A8AEE9 /* ggml.h */,
|
||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */,
|
||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */,
|
||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */,
|
||||
542EA0A22AC8729100A8AEE9 /* llama.h */,
|
||||
);
|
||||
name = llama.cpp;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C836A2AC328BD0096AF73 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */,
|
||||
8A907F312AC7134E006146EA /* llama.cpp.swift */,
|
||||
8A3F84232AC4C891005E2EE8 /* models */,
|
||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */,
|
||||
8A1C83742AC328BD0096AF73 /* Products */,
|
||||
8A39BE082AC7601000BFEB40 /* Frameworks */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C83742AC328BD0096AF73 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F84102AC4BD85005E2EE8 /* Resources */,
|
||||
8A9F7C4B2AC332DC008AE1EA /* Models */,
|
||||
8A9F7C4A2AC332BF008AE1EA /* UI */,
|
||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */,
|
||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */,
|
||||
8A1C837C2AC328BE0096AF73 /* Preview Content */,
|
||||
);
|
||||
path = llama.swiftui;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C837C2AC328BE0096AF73 /* Preview Content */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */,
|
||||
);
|
||||
path = "Preview Content";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A39BE082AC7601000BFEB40 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
549479CA2AC9E16000E0F78B /* Metal.framework */,
|
||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */,
|
||||
);
|
||||
name = Frameworks;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A3F84102AC4BD85005E2EE8 /* Resources */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F84112AC4BD8C005E2EE8 /* models */,
|
||||
);
|
||||
path = Resources;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A3F84112AC4BD8C005E2EE8 /* models */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F841F2AC4C824005E2EE8 /* llama-2-7b-chat.Q2_K.gguf */,
|
||||
);
|
||||
path = models;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A907F312AC7134E006146EA /* llama.cpp.swift */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */,
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */,
|
||||
);
|
||||
path = llama.cpp.swift;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A9F7C4A2AC332BF008AE1EA /* UI */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */,
|
||||
);
|
||||
path = UI;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A9F7C4B2AC332DC008AE1EA /* Models */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */,
|
||||
);
|
||||
path = Models;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
5423760A2B0D9C4B008E6A1C /* ggml-backend.c */,
|
||||
542376092B0D9C40008E6A1C /* ggml-backend.h */,
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */,
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */,
|
||||
549479C82AC9E10B00E0F78B /* ggml-metal.metal */,
|
||||
549479C62AC9E0F200E0F78B /* ggml-metal.h */,
|
||||
549479C52AC9E0F200E0F78B /* ggml-metal.m */,
|
||||
542EA09B2AC8723900A8AEE9 /* ggml.c */,
|
||||
542EA09C2AC8723900A8AEE9 /* ggml.h */,
|
||||
542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */,
|
||||
542EA09E2AC8725700A8AEE9 /* ggml-alloc.h */,
|
||||
542EA0A12AC8729100A8AEE9 /* llama.cpp */,
|
||||
542EA0A22AC8729100A8AEE9 /* llama.h */,
|
||||
);
|
||||
name = llama.cpp;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C836A2AC328BD0096AF73 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A08D1F62AC7383900FE6CD4 /* llama.cpp */,
|
||||
8A907F312AC7134E006146EA /* llama.cpp.swift */,
|
||||
8A3F84232AC4C891005E2EE8 /* models */,
|
||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */,
|
||||
8A1C83742AC328BD0096AF73 /* Products */,
|
||||
8A39BE082AC7601000BFEB40 /* Frameworks */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C83742AC328BD0096AF73 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C83732AC328BD0096AF73 /* llama.swiftui.app */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C83752AC328BD0096AF73 /* llama.swiftui */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F84102AC4BD85005E2EE8 /* Resources */,
|
||||
8A9F7C4B2AC332DC008AE1EA /* Models */,
|
||||
8A9F7C4A2AC332BF008AE1EA /* UI */,
|
||||
8A1C83762AC328BD0096AF73 /* llama_swiftuiApp.swift */,
|
||||
8A1C837A2AC328BE0096AF73 /* Assets.xcassets */,
|
||||
8A1C837C2AC328BE0096AF73 /* Preview Content */,
|
||||
);
|
||||
path = llama.swiftui;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A1C837C2AC328BE0096AF73 /* Preview Content */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A1C837D2AC328BE0096AF73 /* Preview Assets.xcassets */,
|
||||
);
|
||||
path = "Preview Content";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A39BE082AC7601000BFEB40 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
549479CA2AC9E16000E0F78B /* Metal.framework */,
|
||||
8A39BE092AC7601000BFEB40 /* Accelerate.framework */,
|
||||
);
|
||||
name = Frameworks;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A3F84102AC4BD85005E2EE8 /* Resources */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A3F84112AC4BD8C005E2EE8 /* models */,
|
||||
);
|
||||
path = Resources;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A3F84112AC4BD8C005E2EE8 /* models */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
);
|
||||
path = models;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A907F312AC7134E006146EA /* llama.cpp.swift */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A08D20A2AC73B1500FE6CD4 /* bridging-header.h */,
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */,
|
||||
);
|
||||
path = llama.cpp.swift;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A9F7C4A2AC332BF008AE1EA /* UI */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */,
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */,
|
||||
);
|
||||
path = UI;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
8A9F7C4B2AC332DC008AE1EA /* Models */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */,
|
||||
);
|
||||
path = Models;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
8A1C83722AC328BD0096AF73 /* llama.swiftui */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */;
|
||||
buildPhases = (
|
||||
8A1C836F2AC328BD0096AF73 /* Sources */,
|
||||
8A1C83702AC328BD0096AF73 /* Frameworks */,
|
||||
8A1C83712AC328BD0096AF73 /* Resources */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = llama.swiftui;
|
||||
packageProductDependencies = (
|
||||
);
|
||||
productName = llama.swiftui;
|
||||
productReference = 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */;
|
||||
productType = "com.apple.product-type.application";
|
||||
};
|
||||
8A1C83722AC328BD0096AF73 /* llama.swiftui */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */;
|
||||
buildPhases = (
|
||||
8A1C836F2AC328BD0096AF73 /* Sources */,
|
||||
8A1C83702AC328BD0096AF73 /* Frameworks */,
|
||||
8A1C83712AC328BD0096AF73 /* Resources */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = llama.swiftui;
|
||||
packageProductDependencies = (
|
||||
);
|
||||
productName = llama.swiftui;
|
||||
productReference = 8A1C83732AC328BD0096AF73 /* llama.swiftui.app */;
|
||||
productType = "com.apple.product-type.application";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
8A1C836B2AC328BD0096AF73 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
BuildIndependentTargetsInParallel = 1;
|
||||
LastSwiftUpdateCheck = 1500;
|
||||
LastUpgradeCheck = 1500;
|
||||
TargetAttributes = {
|
||||
8A1C83722AC328BD0096AF73 = {
|
||||
CreatedOnToolsVersion = 15.0;
|
||||
LastSwiftMigration = 1500;
|
||||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */;
|
||||
compatibilityVersion = "Xcode 14.0";
|
||||
developmentRegion = en;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
Base,
|
||||
);
|
||||
mainGroup = 8A1C836A2AC328BD0096AF73;
|
||||
packageReferences = (
|
||||
);
|
||||
productRefGroup = 8A1C83742AC328BD0096AF73 /* Products */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
8A1C83722AC328BD0096AF73 /* llama.swiftui */,
|
||||
);
|
||||
};
|
||||
8A1C836B2AC328BD0096AF73 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
attributes = {
|
||||
BuildIndependentTargetsInParallel = 1;
|
||||
LastSwiftUpdateCheck = 1500;
|
||||
LastUpgradeCheck = 1500;
|
||||
TargetAttributes = {
|
||||
8A1C83722AC328BD0096AF73 = {
|
||||
CreatedOnToolsVersion = 15.0;
|
||||
LastSwiftMigration = 1500;
|
||||
};
|
||||
};
|
||||
};
|
||||
buildConfigurationList = 8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */;
|
||||
compatibilityVersion = "Xcode 14.0";
|
||||
developmentRegion = en;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
Base,
|
||||
);
|
||||
mainGroup = 8A1C836A2AC328BD0096AF73;
|
||||
packageReferences = (
|
||||
);
|
||||
productRefGroup = 8A1C83742AC328BD0096AF73 /* Products */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
8A1C83722AC328BD0096AF73 /* llama.swiftui */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXResourcesBuildPhase section */
|
||||
8A1C83712AC328BD0096AF73 /* Resources */ = {
|
||||
isa = PBXResourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */,
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */,
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */,
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
8A1C83712AC328BD0096AF73 /* Resources */ = {
|
||||
isa = PBXResourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */,
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */,
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */,
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXResourcesBuildPhase section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
8A1C836F2AC328BD0096AF73 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */,
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */,
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */,
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */,
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */,
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */,
|
||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */,
|
||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */,
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */,
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
8A1C836F2AC328BD0096AF73 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */,
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */,
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */,
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */,
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */,
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */,
|
||||
8A1C83792AC328BD0096AF73 /* ContentView.swift in Sources */,
|
||||
8A1C83772AC328BD0096AF73 /* llama_swiftuiApp.swift in Sources */,
|
||||
7FA3D2B32B2EA2F600543F92 /* DownloadButton.swift in Sources */,
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */,
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
8A1C837F2AC328BE0096AF73 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_TESTABILITY = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 17.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
||||
MTL_FAST_MATH = YES;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
8A1C83802AC328BE0096AF73 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
ENABLE_NS_ASSERTIONS = NO;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 17.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = NO;
|
||||
MTL_FAST_MATH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_COMPILATION_MODE = wholemodule;
|
||||
VALIDATE_PRODUCT = YES;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
8A1C83822AC328BE0096AF73 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
8A1C83832AC328BE0096AF73 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
8A1C837F2AC328BE0096AF73 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = dwarf;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_TESTABILITY = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 17.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
|
||||
MTL_FAST_MATH = YES;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
8A1C83802AC328BE0096AF73 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ASSETCATALOG_COMPILER_GENERATE_SWIFT_ASSET_SYMBOL_EXTENSIONS = YES;
|
||||
CLANG_ANALYZER_NONNULL = YES;
|
||||
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
|
||||
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CLANG_ENABLE_OBJC_ARC = YES;
|
||||
CLANG_ENABLE_OBJC_WEAK = YES;
|
||||
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
|
||||
CLANG_WARN_BOOL_CONVERSION = YES;
|
||||
CLANG_WARN_COMMA = YES;
|
||||
CLANG_WARN_CONSTANT_CONVERSION = YES;
|
||||
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
|
||||
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
|
||||
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
|
||||
CLANG_WARN_EMPTY_BODY = YES;
|
||||
CLANG_WARN_ENUM_CONVERSION = YES;
|
||||
CLANG_WARN_INFINITE_RECURSION = YES;
|
||||
CLANG_WARN_INT_CONVERSION = YES;
|
||||
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
|
||||
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
|
||||
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
|
||||
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
|
||||
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
|
||||
CLANG_WARN_STRICT_PROTOTYPES = YES;
|
||||
CLANG_WARN_SUSPICIOUS_MOVE = YES;
|
||||
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
|
||||
CLANG_WARN_UNREACHABLE_CODE = YES;
|
||||
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
|
||||
COPY_PHASE_STRIP = NO;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
ENABLE_NS_ASSERTIONS = NO;
|
||||
ENABLE_STRICT_OBJC_MSGSEND = YES;
|
||||
ENABLE_USER_SCRIPT_SANDBOXING = YES;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu17;
|
||||
GCC_NO_COMMON_BLOCKS = YES;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
|
||||
GCC_WARN_UNDECLARED_SELECTOR = YES;
|
||||
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
|
||||
GCC_WARN_UNUSED_FUNCTION = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 17.0;
|
||||
LOCALIZATION_PREFERS_STRING_CATALOGS = YES;
|
||||
MTL_ENABLE_DEBUG_INFO = NO;
|
||||
MTL_FAST_MATH = YES;
|
||||
SDKROOT = iphoneos;
|
||||
SWIFT_COMPILATION_MODE = wholemodule;
|
||||
VALIDATE_PRODUCT = YES;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
8A1C83822AC328BE0096AF73 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
||||
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
8A1C83832AC328BE0096AF73 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
|
||||
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
|
||||
CLANG_ENABLE_MODULES = YES;
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
CURRENT_PROJECT_VERSION = 1;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"llama.swiftui/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = STLSG3FG8Q;
|
||||
ENABLE_PREVIEWS = YES;
|
||||
GENERATE_INFOPLIST_FILE = YES;
|
||||
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
|
||||
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
|
||||
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
|
||||
IPHONEOS_DEPLOYMENT_TARGET = 16.0;
|
||||
LD_RUNPATH_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"@executable_path/Frameworks",
|
||||
);
|
||||
MARKETING_VERSION = 1.0;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = "com.bachittle.llama-swift";
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
SWIFT_OBJC_BRIDGING_HEADER = "llama.cpp.swift/bridging-header.h";
|
||||
SWIFT_VERSION = 5.0;
|
||||
TARGETED_DEVICE_FAMILY = "1,2";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
8A1C837F2AC328BE0096AF73 /* Debug */,
|
||||
8A1C83802AC328BE0096AF73 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
8A1C83822AC328BE0096AF73 /* Debug */,
|
||||
8A1C83832AC328BE0096AF73 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
8A1C836E2AC328BD0096AF73 /* Build configuration list for PBXProject "llama.swiftui" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
8A1C837F2AC328BE0096AF73 /* Debug */,
|
||||
8A1C83802AC328BE0096AF73 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
8A1C83812AC328BE0096AF73 /* Build configuration list for PBXNativeTarget "llama.swiftui" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
8A1C83822AC328BE0096AF73 /* Debug */,
|
||||
8A1C83832AC328BE0096AF73 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 8A1C836B2AC328BD0096AF73 /* Project object */;
|
||||
};
|
||||
rootObject = 8A1C836B2AC328BD0096AF73 /* Project object */;
|
||||
}
|
||||
|
|
|
@ -3,24 +3,26 @@ import Foundation
|
|||
@MainActor
|
||||
class LlamaState: ObservableObject {
|
||||
@Published var messageLog = ""
|
||||
@Published var cacheCleared = false
|
||||
|
||||
private var llamaContext: LlamaContext?
|
||||
private var modelUrl: URL? {
|
||||
Bundle.main.url(forResource: "q8_0", withExtension: "gguf", subdirectory: "models")
|
||||
private var defaultModelUrl: URL? {
|
||||
Bundle.main.url(forResource: "ggml-model", withExtension: "gguf", subdirectory: "models")
|
||||
// Bundle.main.url(forResource: "llama-2-7b-chat", withExtension: "Q2_K.gguf", subdirectory: "models")
|
||||
}
|
||||
|
||||
init() {
|
||||
do {
|
||||
try loadModel()
|
||||
try loadModel(modelUrl: defaultModelUrl)
|
||||
} catch {
|
||||
messageLog += "Error!\n"
|
||||
}
|
||||
}
|
||||
|
||||
private func loadModel() throws {
|
||||
func loadModel(modelUrl: URL?) throws {
|
||||
messageLog += "Loading model...\n"
|
||||
if let modelUrl {
|
||||
llamaContext = try LlamaContext.createContext(path: modelUrl.path())
|
||||
llamaContext = try LlamaContext.create_context(path: modelUrl.path())
|
||||
messageLog += "Loaded model \(modelUrl.lastPathComponent)\n"
|
||||
} else {
|
||||
messageLog += "Could not locate model\n"
|
||||
|
@ -31,7 +33,7 @@ class LlamaState: ObservableObject {
|
|||
guard let llamaContext else {
|
||||
return
|
||||
}
|
||||
messageLog += "Attempting to complete text...\n"
|
||||
|
||||
await llamaContext.completion_init(text: text)
|
||||
messageLog += "\(text)"
|
||||
|
||||
|
@ -42,4 +44,42 @@ class LlamaState: ObservableObject {
|
|||
await llamaContext.clear()
|
||||
messageLog += "\n\ndone\n"
|
||||
}
|
||||
|
||||
func bench() async {
|
||||
guard let llamaContext else {
|
||||
return
|
||||
}
|
||||
|
||||
messageLog += "\n"
|
||||
messageLog += "Running benchmark...\n"
|
||||
messageLog += "Model info: "
|
||||
messageLog += await llamaContext.model_info() + "\n"
|
||||
|
||||
let t_start = DispatchTime.now().uptimeNanoseconds
|
||||
await llamaContext.bench(pp: 8, tg: 4, pl: 1) // heat up
|
||||
let t_end = DispatchTime.now().uptimeNanoseconds
|
||||
|
||||
let t_heat = Double(t_end - t_start) / 1_000_000_000.0
|
||||
messageLog += "Heat up time: \(t_heat) seconds, please wait...\n"
|
||||
|
||||
// if more than 5 seconds, then we're probably running on a slow device
|
||||
if t_heat > 5.0 {
|
||||
messageLog += "Heat up time is too long, aborting benchmark\n"
|
||||
return
|
||||
}
|
||||
|
||||
let result = await llamaContext.bench(pp: 512, tg: 128, pl: 1, nr: 3)
|
||||
|
||||
messageLog += "\(result)"
|
||||
messageLog += "\n"
|
||||
}
|
||||
|
||||
func clear() async {
|
||||
guard let llamaContext else {
|
||||
return
|
||||
}
|
||||
|
||||
await llamaContext.clear()
|
||||
messageLog = ""
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,24 +5,132 @@ struct ContentView: View {
|
|||
|
||||
@State private var multiLineText = ""
|
||||
|
||||
private static func cleanupModelCaches() {
|
||||
// Delete all models (*.gguf)
|
||||
let fileManager = FileManager.default
|
||||
let documentsUrl = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0]
|
||||
do {
|
||||
let fileURLs = try fileManager.contentsOfDirectory(at: documentsUrl, includingPropertiesForKeys: nil)
|
||||
for fileURL in fileURLs {
|
||||
if fileURL.pathExtension == "gguf" {
|
||||
try fileManager.removeItem(at: fileURL)
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
print("Error while enumerating files \(documentsUrl.path): \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
ScrollView(.vertical) {
|
||||
ScrollView(.vertical, showsIndicators: true) {
|
||||
Text(llamaState.messageLog)
|
||||
.font(.system(size: 12))
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.padding()
|
||||
.onTapGesture {
|
||||
UIApplication.shared.sendAction(#selector(UIResponder.resignFirstResponder), to: nil, from: nil, for: nil)
|
||||
}
|
||||
}
|
||||
|
||||
TextEditor(text: $multiLineText)
|
||||
.frame(height: 200)
|
||||
.frame(height: 80)
|
||||
.padding()
|
||||
.border(Color.gray, width: 0.5)
|
||||
Button(action: {
|
||||
sendText()
|
||||
}) {
|
||||
Text("Send")
|
||||
.padding()
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
|
||||
HStack {
|
||||
Button("Send") {
|
||||
sendText()
|
||||
}
|
||||
.padding(8)
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
|
||||
Button("Bench") {
|
||||
bench()
|
||||
}
|
||||
.padding(8)
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
|
||||
Button("Clear") {
|
||||
clear()
|
||||
}
|
||||
.padding(8)
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
|
||||
Button("Copy") {
|
||||
UIPasteboard.general.string = llamaState.messageLog
|
||||
}
|
||||
.padding(8)
|
||||
.background(Color.blue)
|
||||
.foregroundColor(.white)
|
||||
.cornerRadius(8)
|
||||
}
|
||||
|
||||
VStack {
|
||||
DownloadButton(
|
||||
llamaState: llamaState,
|
||||
modelName: "TinyLlama-1.1B (Q4_0, 0.6 GiB)",
|
||||
modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true",
|
||||
filename: "tinyllama-1.1b-1t-openorca.Q4_0.gguf"
|
||||
)
|
||||
.font(.system(size: 12))
|
||||
.padding(.top, 4)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
|
||||
DownloadButton(
|
||||
llamaState: llamaState,
|
||||
modelName: "TinyLlama-1.1B (Q8_0, 1.1 GiB)",
|
||||
modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q8_0.gguf?download=true",
|
||||
filename: "tinyllama-1.1b-1t-openorca.Q8_0.gguf"
|
||||
)
|
||||
.font(.system(size: 12))
|
||||
|
||||
DownloadButton(
|
||||
llamaState: llamaState,
|
||||
modelName: "TinyLlama-1.1B (F16, 2.2 GiB)",
|
||||
modelUrl: "https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf?download=true",
|
||||
filename: "tinyllama-1.1b-f16.gguf"
|
||||
)
|
||||
.font(.system(size: 12))
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
|
||||
DownloadButton(
|
||||
llamaState: llamaState,
|
||||
modelName: "Phi-2.7B (Q4_0, 1.6 GiB)",
|
||||
modelUrl: "https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q4_0.gguf?download=true",
|
||||
filename: "phi-2-q4_0.gguf"
|
||||
)
|
||||
.font(.system(size: 12))
|
||||
|
||||
DownloadButton(
|
||||
llamaState: llamaState,
|
||||
modelName: "Phi-2.7B (Q8_0, 2.8 GiB)",
|
||||
modelUrl: "https://huggingface.co/ggml-org/models/resolve/main/phi-2/ggml-model-q8_0.gguf?download=true",
|
||||
filename: "phi-2-q8_0.gguf"
|
||||
)
|
||||
.font(.system(size: 12))
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
|
||||
DownloadButton(
|
||||
llamaState: llamaState,
|
||||
modelName: "Mistral-7B-v0.1 (Q4_0, 3.8 GiB)",
|
||||
modelUrl: "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF/resolve/main/mistral-7b-v0.1.Q4_0.gguf?download=true",
|
||||
filename: "mistral-7b-v0.1.Q4_0.gguf"
|
||||
)
|
||||
.font(.system(size: 12))
|
||||
|
||||
Button("Clear downloaded models") {
|
||||
ContentView.cleanupModelCaches()
|
||||
llamaState.cacheCleared = true
|
||||
}
|
||||
.padding(8)
|
||||
.font(.system(size: 12))
|
||||
}
|
||||
}
|
||||
.padding()
|
||||
|
@ -34,9 +142,20 @@ struct ContentView: View {
|
|||
multiLineText = ""
|
||||
}
|
||||
}
|
||||
|
||||
func bench() {
|
||||
Task {
|
||||
await llamaState.bench()
|
||||
}
|
||||
}
|
||||
|
||||
func clear() {
|
||||
Task {
|
||||
await llamaState.clear()
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
#Preview {
|
||||
ContentView()
|
||||
}
|
||||
*/
|
||||
|
||||
//#Preview {
|
||||
// ContentView()
|
||||
//}
|
||||
|
|
122
examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift
Normal file
122
examples/llama.swiftui/llama.swiftui/UI/DownloadButton.swift
Normal file
|
@ -0,0 +1,122 @@
|
|||
import SwiftUI
|
||||
|
||||
struct DownloadButton: View {
|
||||
@ObservedObject private var llamaState: LlamaState
|
||||
private var modelName: String
|
||||
private var modelUrl: String
|
||||
private var filename: String
|
||||
|
||||
@State private var status: String
|
||||
|
||||
@State private var downloadTask: URLSessionDownloadTask?
|
||||
@State private var progress = 0.0
|
||||
@State private var observation: NSKeyValueObservation?
|
||||
|
||||
private static func getFileURL(filename: String) -> URL {
|
||||
FileManager.default.urls(for: .documentDirectory, in: .userDomainMask)[0].appendingPathComponent(filename)
|
||||
}
|
||||
|
||||
private func checkFileExistenceAndUpdateStatus() {
|
||||
}
|
||||
|
||||
init(llamaState: LlamaState, modelName: String, modelUrl: String, filename: String) {
|
||||
self.llamaState = llamaState
|
||||
self.modelName = modelName
|
||||
self.modelUrl = modelUrl
|
||||
self.filename = filename
|
||||
|
||||
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||
status = FileManager.default.fileExists(atPath: fileURL.path) ? "downloaded" : "download"
|
||||
}
|
||||
|
||||
private func download() {
|
||||
status = "downloading"
|
||||
print("Downloading model \(modelName) from \(modelUrl)")
|
||||
guard let url = URL(string: modelUrl) else { return }
|
||||
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||
|
||||
downloadTask = URLSession.shared.downloadTask(with: url) { temporaryURL, response, error in
|
||||
if let error = error {
|
||||
print("Error: \(error.localizedDescription)")
|
||||
return
|
||||
}
|
||||
|
||||
guard let response = response as? HTTPURLResponse, (200...299).contains(response.statusCode) else {
|
||||
print("Server error!")
|
||||
return
|
||||
}
|
||||
|
||||
do {
|
||||
if let temporaryURL = temporaryURL {
|
||||
try FileManager.default.copyItem(at: temporaryURL, to: fileURL)
|
||||
print("Writing to \(filename) completed")
|
||||
|
||||
llamaState.cacheCleared = false
|
||||
|
||||
status = "downloaded"
|
||||
}
|
||||
} catch let err {
|
||||
print("Error: \(err.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
observation = downloadTask?.progress.observe(\.fractionCompleted) { progress, _ in
|
||||
self.progress = progress.fractionCompleted
|
||||
}
|
||||
|
||||
downloadTask?.resume()
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
if status == "download" {
|
||||
Button(action: download) {
|
||||
Text("Download " + modelName)
|
||||
}
|
||||
} else if status == "downloading" {
|
||||
Button(action: {
|
||||
downloadTask?.cancel()
|
||||
status = "download"
|
||||
}) {
|
||||
Text("\(modelName) (Downloading \(Int(progress * 100))%)")
|
||||
}
|
||||
} else if status == "downloaded" {
|
||||
Button(action: {
|
||||
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||
if !FileManager.default.fileExists(atPath: fileURL.path) {
|
||||
download()
|
||||
return
|
||||
}
|
||||
do {
|
||||
try llamaState.loadModel(modelUrl: fileURL)
|
||||
} catch let err {
|
||||
print("Error: \(err.localizedDescription)")
|
||||
}
|
||||
}) {
|
||||
Text("\(modelName) (Downloaded)")
|
||||
}
|
||||
} else {
|
||||
Text("Unknown status")
|
||||
}
|
||||
}
|
||||
.onDisappear() {
|
||||
downloadTask?.cancel()
|
||||
}
|
||||
.onChange(of: llamaState.cacheCleared) { newValue in
|
||||
if newValue {
|
||||
downloadTask?.cancel()
|
||||
let fileURL = DownloadButton.getFileURL(filename: filename)
|
||||
status = FileManager.default.fileExists(atPath: fileURL.path) ? "downloaded" : "download"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// #Preview {
|
||||
// DownloadButton(
|
||||
// llamaState: LlamaState(),
|
||||
// modelName: "TheBloke / TinyLlama-1.1B-1T-OpenOrca-GGUF (Q4_0)",
|
||||
// modelUrl: "https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_0.gguf?download=true",
|
||||
// filename: "tinyllama-1.1b-1t-openorca.Q4_0.gguf"
|
||||
// )
|
||||
// }
|
|
@ -514,7 +514,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
|||
ctx_size += padded_size;
|
||||
if (verbosity >= 3) {
|
||||
printf("%s: tensor[%d]: n_dims = %d, name = %s, tensor_size=%zu, padded_size=%zu, offset=%zu\n", __func__, i,
|
||||
cur->n_dims, cur->name, tensor_size, padded_size, offset);
|
||||
ggml_n_dims(cur), cur->name, tensor_size, padded_size, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -739,7 +739,7 @@ bool clip_image_preprocess(const clip_ctx * ctx, const clip_image_u8 * img, clip
|
|||
temp->ny = longer_side;
|
||||
temp->size = 3 * longer_side * longer_side;
|
||||
temp->data = new uint8_t[temp->size]();
|
||||
uint8_t bc[3] = {122, 116, 104}; // bakground color in RGB from LLaVA
|
||||
uint8_t bc[3] = {122, 116, 104}; // background color in RGB from LLaVA
|
||||
|
||||
// fill with background color
|
||||
for (size_t i = 0; i < temp->size; i++) {
|
||||
|
@ -962,7 +962,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
|||
}
|
||||
|
||||
// quantize only 2D tensors
|
||||
quantize &= (cur->n_dims == 2);
|
||||
quantize &= (ggml_n_dims(cur) == 2);
|
||||
|
||||
if (quantize) {
|
||||
new_type = type;
|
||||
|
@ -1035,7 +1035,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
|||
fout.put(0);
|
||||
}
|
||||
|
||||
printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), cur->n_dims, quantize,
|
||||
printf("%s: n_dims = %d | quantize=%d | size = %f MB -> %f MB\n", name.c_str(), ggml_n_dims(cur), quantize,
|
||||
orig_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ def bytes_to_unicode():
|
|||
The reversible bpe codes work on unicode strings.
|
||||
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
||||
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
||||
This is a signficant percentage of your normal, say, 32K bpe vocab.
|
||||
This is a significant percentage of your normal, say, 32K bpe vocab.
|
||||
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
||||
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
||||
"""
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# llama.cpp/examples/lookahead
|
||||
|
||||
Demonstartion of lookahead decoding technique:
|
||||
Demonstration of lookahead decoding technique:
|
||||
|
||||
https://lmsys.org/blog/2023-11-21-lookahead-decoding/
|
||||
|
||||
|
|
|
@ -222,7 +222,7 @@ node index.js
|
|||
|
||||
`content`: Set the text to process.
|
||||
|
||||
**POST** `/infill`: For code infilling. Takes a prefix and a suffix and returns the predicted completion as stream.
|
||||
- **POST** `/infill`: For code infilling. Takes a prefix and a suffix and returns the predicted completion as stream.
|
||||
|
||||
*Options:*
|
||||
|
||||
|
|
|
@ -11227,7 +11227,7 @@ class binary_reader
|
|||
}
|
||||
if (is_ndarray) // ndarray dimensional vector can only contain integers, and can not embed another array
|
||||
{
|
||||
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimentional vector is not allowed", "size"), nullptr));
|
||||
return sax->parse_error(chars_read, get_token_string(), parse_error::create(113, chars_read, exception_message(input_format, "ndarray dimensional vector is not allowed", "size"), nullptr));
|
||||
}
|
||||
std::vector<size_t> dim;
|
||||
if (JSON_HEDLEY_UNLIKELY(!get_ubjson_ndarray_size(dim)))
|
||||
|
|
|
@ -34,7 +34,8 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|||
headers: {
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'text/event-stream'
|
||||
'Accept': 'text/event-stream',
|
||||
...(params.api_key ? {'Authorization': `Bearer ${params.api_key}`} : {})
|
||||
},
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
@ -114,7 +115,7 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|||
return content;
|
||||
}
|
||||
|
||||
// Call llama, return an event target that you can subcribe to
|
||||
// Call llama, return an event target that you can subscribe to
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
|
|
|
@ -223,7 +223,7 @@
|
|||
repeat_last_n: 256, // 0 = disable penalty, -1 = context size
|
||||
repeat_penalty: 1.18, // 1.0 = disabled
|
||||
top_k: 40, // <= 0 to use vocab size
|
||||
top_p: 0.5, // 1.0 = disabled
|
||||
top_p: 0.95, // 1.0 = disabled
|
||||
min_p: 0.05, // 0 = disabled
|
||||
tfs_z: 1.0, // 1.0 = disabled
|
||||
typical_p: 1.0, // 1.0 = disabled
|
||||
|
@ -235,10 +235,11 @@
|
|||
grammar: '',
|
||||
n_probs: 0, // no completion_probabilities,
|
||||
image_data: [],
|
||||
cache_prompt: true
|
||||
cache_prompt: true,
|
||||
api_key: ''
|
||||
})
|
||||
|
||||
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */
|
||||
/* START: Support for storing prompt templates and parameters in browsers LocalStorage */
|
||||
|
||||
const local_storage_storageKey = "llamacpp_server_local_storage";
|
||||
|
||||
|
@ -282,7 +283,7 @@
|
|||
let importedTemplates = local_storage_getDataAsObject('user_templates')
|
||||
|
||||
if (importedTemplates) {
|
||||
// saved templates were successfuly imported.
|
||||
// saved templates were successfully imported.
|
||||
|
||||
console.log('Processing saved templates and updating default template')
|
||||
params.value = { ...params.value, image_data: [] };
|
||||
|
@ -303,7 +304,7 @@
|
|||
}
|
||||
|
||||
function userTemplateResetToDefault() {
|
||||
console.log('Reseting themplate to default')
|
||||
console.log('Resetting template to default')
|
||||
selectedUserTemplate.value.name = 'default';
|
||||
selectedUserTemplate.value.data = savedUserTemplates.value['default'];
|
||||
}
|
||||
|
@ -762,7 +763,7 @@
|
|||
|
||||
<fieldset class="two">
|
||||
${IntField({ label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict })}
|
||||
${FloatField({ label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
|
||||
${FloatField({ label: "Temperature", max: 2.0, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
|
||||
${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
|
||||
${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
|
||||
${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
|
||||
|
@ -790,6 +791,10 @@
|
|||
<fieldset>
|
||||
${IntField({ label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs })}
|
||||
</fieldset>
|
||||
<fieldset>
|
||||
<label for="api_key">API Key</label>
|
||||
<input type="text" name="api_key" value="${params.value.api_key}" placeholder="Enter API key" oninput=${updateParams} />
|
||||
</fieldset>
|
||||
</details>
|
||||
</form>
|
||||
`
|
||||
|
|
|
@ -10,7 +10,8 @@
|
|||
// crash the server in debug mode, otherwise send an http 500 error
|
||||
#define CPPHTTPLIB_NO_EXCEPTIONS 1
|
||||
#endif
|
||||
|
||||
// increase max payload length to allow use of larger context size
|
||||
#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
|
||||
#include "httplib.h"
|
||||
#include "json.hpp"
|
||||
|
||||
|
@ -36,6 +37,7 @@ using json = nlohmann::json;
|
|||
struct server_params
|
||||
{
|
||||
std::string hostname = "127.0.0.1";
|
||||
std::string api_key;
|
||||
std::string public_path = "examples/server/public";
|
||||
int32_t port = 8080;
|
||||
int32_t read_timeout = 600;
|
||||
|
@ -376,7 +378,6 @@ struct llama_client_slot
|
|||
|
||||
int32_t num_prompt_tokens = 0;
|
||||
int32_t num_prompt_tokens_processed = 0;
|
||||
int32_t multibyte_pending = 0;
|
||||
|
||||
json prompt;
|
||||
std::string generated_text;
|
||||
|
@ -425,7 +426,6 @@ struct llama_client_slot
|
|||
stopped_word = false;
|
||||
stopped_limit = false;
|
||||
stopping_word = "";
|
||||
multibyte_pending = 0;
|
||||
n_past = 0;
|
||||
sent_count = 0;
|
||||
sent_token_probs_index = 0;
|
||||
|
@ -992,35 +992,36 @@ struct llama_server_context
|
|||
slot.generated_text += token_str;
|
||||
slot.has_next_token = true;
|
||||
|
||||
if (slot.multibyte_pending > 0)
|
||||
// check if there is incomplete UTF-8 character at the end
|
||||
bool incomplete = false;
|
||||
for (unsigned i = 1; i < 5 && i <= slot.generated_text.size(); ++i)
|
||||
{
|
||||
slot.multibyte_pending -= token_str.size();
|
||||
}
|
||||
else if (token_str.size() == 1)
|
||||
{
|
||||
const char c = token_str[0];
|
||||
// 2-byte characters: 110xxxxx 10xxxxxx
|
||||
unsigned char c = slot.generated_text[slot.generated_text.size() - i];
|
||||
if ((c & 0xC0) == 0x80)
|
||||
{
|
||||
// continuation byte: 10xxxxxx
|
||||
continue;
|
||||
}
|
||||
if ((c & 0xE0) == 0xC0)
|
||||
{
|
||||
slot.multibyte_pending = 1;
|
||||
// 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
|
||||
// 2-byte character: 110xxxxx ...
|
||||
incomplete = i < 2;
|
||||
}
|
||||
else if ((c & 0xF0) == 0xE0)
|
||||
{
|
||||
slot.multibyte_pending = 2;
|
||||
// 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
// 3-byte character: 1110xxxx ...
|
||||
incomplete = i < 3;
|
||||
}
|
||||
else if ((c & 0xF8) == 0xF0)
|
||||
{
|
||||
slot.multibyte_pending = 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
slot.multibyte_pending = 0;
|
||||
// 4-byte character: 11110xxx ...
|
||||
incomplete = i < 4;
|
||||
}
|
||||
// else 1-byte character or invalid byte
|
||||
break;
|
||||
}
|
||||
|
||||
if (slot.multibyte_pending == 0)
|
||||
if (!incomplete)
|
||||
{
|
||||
size_t pos = std::min(slot.sent_count, slot.generated_text.size());
|
||||
const std::string str_test = slot.generated_text.substr(pos);
|
||||
|
@ -1055,7 +1056,7 @@ struct llama_server_context
|
|||
}
|
||||
}
|
||||
|
||||
if (slot.multibyte_pending > 0 && !slot.has_next_token)
|
||||
if (incomplete)
|
||||
{
|
||||
slot.has_next_token = true;
|
||||
}
|
||||
|
@ -1954,6 +1955,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
|||
printf(" --host ip address to listen (default (default: %s)\n", sparams.hostname.c_str());
|
||||
printf(" --port PORT port to listen (default (default: %d)\n", sparams.port);
|
||||
printf(" --path PUBLIC_PATH path from which to serve static files (default %s)\n", sparams.public_path.c_str());
|
||||
printf(" --api-key API_KEY optional api key to enhance server security. If set, requests must include this key for access.\n");
|
||||
printf(" -to N, --timeout N server read/write timeout in seconds (default: %d)\n", sparams.read_timeout);
|
||||
printf(" --embedding enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled");
|
||||
printf(" -np N, --parallel N number of slots for process requests (default: %d)\n", params.n_parallel);
|
||||
|
@ -2003,6 +2005,15 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||
}
|
||||
sparams.public_path = argv[i];
|
||||
}
|
||||
else if (arg == "--api-key")
|
||||
{
|
||||
if (++i >= argc)
|
||||
{
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
sparams.api_key = argv[i];
|
||||
}
|
||||
else if (arg == "--timeout" || arg == "-to")
|
||||
{
|
||||
if (++i >= argc)
|
||||
|
@ -2382,6 +2393,7 @@ json oaicompat_completion_params_parse(
|
|||
llama_params["__oaicompat"] = true;
|
||||
|
||||
// Map OpenAI parameters to llama.cpp parameters
|
||||
llama_params["model"] = json_value(body, "model", std::string("uknown"));
|
||||
llama_params["prompt"] = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt'
|
||||
llama_params["cache_prompt"] = json_value(body, "cache_prompt", false);
|
||||
llama_params["temperature"] = json_value(body, "temperature", 0.8);
|
||||
|
@ -2402,7 +2414,7 @@ json oaicompat_completion_params_parse(
|
|||
llama_params["ignore_eos"] = json_value(body, "ignore_eos", false);
|
||||
llama_params["tfs_z"] = json_value(body, "tfs_z", 0.0);
|
||||
|
||||
if (llama_params.count("grammar") != 0) {
|
||||
if (body.count("grammar") != 0) {
|
||||
llama_params["grammar"] = json_value(body, "grammar", json::object());
|
||||
}
|
||||
|
||||
|
@ -2633,6 +2645,9 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
|
|||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if SERVER_VERBOSE != 1
|
||||
log_disable();
|
||||
#endif
|
||||
// own arguments required by this example
|
||||
gpt_params params;
|
||||
server_params sparams;
|
||||
|
@ -2669,6 +2684,32 @@ int main(int argc, char **argv)
|
|||
|
||||
httplib::Server svr;
|
||||
|
||||
// Middleware for API key validation
|
||||
auto validate_api_key = [&sparams](const httplib::Request &req, httplib::Response &res) -> bool {
|
||||
// If API key is not set, skip validation
|
||||
if (sparams.api_key.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for API key in the header
|
||||
auto auth_header = req.get_header_value("Authorization");
|
||||
std::string prefix = "Bearer ";
|
||||
if (auth_header.substr(0, prefix.size()) == prefix) {
|
||||
std::string received_api_key = auth_header.substr(prefix.size());
|
||||
if (received_api_key == sparams.api_key) {
|
||||
return true; // API key is valid
|
||||
}
|
||||
}
|
||||
|
||||
// API key is invalid or not provided
|
||||
res.set_content("Unauthorized: Invalid API Key", "text/plain; charset=utf-8");
|
||||
res.status = 401; // Unauthorized
|
||||
|
||||
LOG_WARNING("Unauthorized: Invalid API Key", {});
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
svr.set_default_headers({{"Server", "llama.cpp"},
|
||||
{"Access-Control-Allow-Origin", "*"},
|
||||
{"Access-Control-Allow-Headers", "content-type"}});
|
||||
|
@ -2676,28 +2717,28 @@ int main(int argc, char **argv)
|
|||
// this is only called if no index.html is found in the public --path
|
||||
svr.Get("/", [](const httplib::Request &, httplib::Response &res)
|
||||
{
|
||||
res.set_content(reinterpret_cast<const char*>(&index_html), index_html_len, "text/html");
|
||||
res.set_content(reinterpret_cast<const char*>(&index_html), index_html_len, "text/html; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
|
||||
// this is only called if no index.js is found in the public --path
|
||||
svr.Get("/index.js", [](const httplib::Request &, httplib::Response &res)
|
||||
{
|
||||
res.set_content(reinterpret_cast<const char *>(&index_js), index_js_len, "text/javascript");
|
||||
res.set_content(reinterpret_cast<const char *>(&index_js), index_js_len, "text/javascript; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
|
||||
// this is only called if no index.html is found in the public --path
|
||||
svr.Get("/completion.js", [](const httplib::Request &, httplib::Response &res)
|
||||
{
|
||||
res.set_content(reinterpret_cast<const char*>(&completion_js), completion_js_len, "application/javascript");
|
||||
res.set_content(reinterpret_cast<const char*>(&completion_js), completion_js_len, "application/javascript; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
|
||||
// this is only called if no index.html is found in the public --path
|
||||
svr.Get("/json-schema-to-grammar.mjs", [](const httplib::Request &, httplib::Response &res)
|
||||
{
|
||||
res.set_content(reinterpret_cast<const char*>(&json_schema_to_grammar_mjs), json_schema_to_grammar_mjs_len, "application/javascript");
|
||||
res.set_content(reinterpret_cast<const char*>(&json_schema_to_grammar_mjs), json_schema_to_grammar_mjs_len, "application/javascript; charset=utf-8");
|
||||
return false;
|
||||
});
|
||||
|
||||
|
@ -2708,23 +2749,26 @@ int main(int argc, char **argv)
|
|||
{ "user_name", llama.name_user.c_str() },
|
||||
{ "assistant_name", llama.name_assistant.c_str() }
|
||||
};
|
||||
res.set_content(data.dump(), "application/json");
|
||||
res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
svr.Post("/completion", [&llama](const httplib::Request &req, httplib::Response &res)
|
||||
svr.Post("/completion", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res)
|
||||
{
|
||||
if (!validate_api_key(req, res)) {
|
||||
return;
|
||||
}
|
||||
json data = json::parse(req.body);
|
||||
const int task_id = llama.request_completion(data, false, false, -1);
|
||||
if (!json_value(data, "stream", false)) {
|
||||
std::string completion_text;
|
||||
task_result result = llama.next_result(task_id);
|
||||
if (!result.error && result.stop) {
|
||||
res.set_content(result.result_json.dump(-1, ' ', false, json::error_handler_t::replace), "application/json");
|
||||
res.set_content(result.result_json.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
|
||||
}
|
||||
else
|
||||
{
|
||||
res.status = 404;
|
||||
res.set_content(result.result_json["content"], "text/plain");
|
||||
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
|
@ -2795,12 +2839,15 @@ int main(int argc, char **argv)
|
|||
}}
|
||||
};
|
||||
|
||||
res.set_content(models.dump(), "application/json");
|
||||
res.set_content(models.dump(), "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
// TODO: add mount point without "/v1" prefix -- how?
|
||||
svr.Post("/v1/chat/completions", [&llama](const httplib::Request &req, httplib::Response &res)
|
||||
svr.Post("/v1/chat/completions", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res)
|
||||
{
|
||||
if (!validate_api_key(req, res)) {
|
||||
return;
|
||||
}
|
||||
json data = oaicompat_completion_params_parse(json::parse(req.body));
|
||||
|
||||
const int task_id = llama.request_completion(data, false, false, -1);
|
||||
|
@ -2814,10 +2861,10 @@ int main(int argc, char **argv)
|
|||
|
||||
res.set_content(oaicompat_result.dump(-1, ' ', false,
|
||||
json::error_handler_t::replace),
|
||||
"application/json");
|
||||
"application/json; charset=utf-8");
|
||||
} else {
|
||||
res.status = 500;
|
||||
res.set_content(result.result_json["content"], "text/plain");
|
||||
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
|
@ -2869,8 +2916,11 @@ int main(int argc, char **argv)
|
|||
}
|
||||
});
|
||||
|
||||
svr.Post("/infill", [&llama](const httplib::Request &req, httplib::Response &res)
|
||||
svr.Post("/infill", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res)
|
||||
{
|
||||
if (!validate_api_key(req, res)) {
|
||||
return;
|
||||
}
|
||||
json data = json::parse(req.body);
|
||||
const int task_id = llama.request_completion(data, true, false, -1);
|
||||
if (!json_value(data, "stream", false)) {
|
||||
|
@ -2878,12 +2928,12 @@ int main(int argc, char **argv)
|
|||
task_result result = llama.next_result(task_id);
|
||||
if (!result.error && result.stop)
|
||||
{
|
||||
res.set_content(result.result_json.dump(-1, ' ', false, json::error_handler_t::replace), "application/json");
|
||||
res.set_content(result.result_json.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
|
||||
}
|
||||
else
|
||||
{
|
||||
res.status = 404;
|
||||
res.set_content(result.result_json["content"], "text/plain");
|
||||
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
|
@ -2932,11 +2982,11 @@ int main(int argc, char **argv)
|
|||
svr.Get("/model.json", [&llama](const httplib::Request &, httplib::Response &res)
|
||||
{
|
||||
const json data = llama.get_model_props();
|
||||
return res.set_content(data.dump(), "application/json");
|
||||
return res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
svr.Options(R"(/.*)", [](const httplib::Request &, httplib::Response &res)
|
||||
{ return res.set_content("", "application/json"); });
|
||||
{ return res.set_content("", "application/json; charset=utf-8"); });
|
||||
|
||||
svr.Post("/tokenize", [&llama](const httplib::Request &req, httplib::Response &res)
|
||||
{
|
||||
|
@ -2947,7 +2997,7 @@ int main(int argc, char **argv)
|
|||
tokens = llama.tokenize(body["content"], false);
|
||||
}
|
||||
const json data = format_tokenizer_response(tokens);
|
||||
return res.set_content(data.dump(), "application/json");
|
||||
return res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
svr.Post("/detokenize", [&llama](const httplib::Request &req, httplib::Response &res)
|
||||
|
@ -2961,7 +3011,7 @@ int main(int argc, char **argv)
|
|||
}
|
||||
|
||||
const json data = format_detokenized_response(content);
|
||||
return res.set_content(data.dump(), "application/json");
|
||||
return res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
svr.Post("/embedding", [&llama](const httplib::Request &req, httplib::Response &res)
|
||||
|
@ -2978,7 +3028,7 @@ int main(int argc, char **argv)
|
|||
}
|
||||
const int task_id = llama.request_completion({ {"prompt", prompt}, { "n_predict", 0} }, false, true, -1);
|
||||
task_result result = llama.next_result(task_id);
|
||||
return res.set_content(result.result_json.dump(), "application/json");
|
||||
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
svr.set_logger(log_server_request);
|
||||
|
@ -2999,19 +3049,23 @@ int main(int argc, char **argv)
|
|||
{
|
||||
snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
|
||||
}
|
||||
res.set_content(buf, "text/plain");
|
||||
res.set_content(buf, "text/plain; charset=utf-8");
|
||||
res.status = 500;
|
||||
});
|
||||
|
||||
svr.set_error_handler([](const httplib::Request &, httplib::Response &res)
|
||||
{
|
||||
if (res.status == 401)
|
||||
{
|
||||
res.set_content("Unauthorized", "text/plain; charset=utf-8");
|
||||
}
|
||||
if (res.status == 400)
|
||||
{
|
||||
res.set_content("Invalid request", "text/plain");
|
||||
res.set_content("Invalid request", "text/plain; charset=utf-8");
|
||||
}
|
||||
else if (res.status != 500)
|
||||
else if (res.status == 404)
|
||||
{
|
||||
res.set_content("File Not Found", "text/plain");
|
||||
res.set_content("File Not Found", "text/plain; charset=utf-8");
|
||||
res.status = 404;
|
||||
}
|
||||
});
|
||||
|
@ -3032,11 +3086,15 @@ int main(int argc, char **argv)
|
|||
// to make it ctrl+clickable:
|
||||
LOG_TEE("\nllama server listening at http://%s:%d\n\n", sparams.hostname.c_str(), sparams.port);
|
||||
|
||||
LOG_INFO("HTTP server listening", {
|
||||
{"hostname", sparams.hostname},
|
||||
{"port", sparams.port},
|
||||
});
|
||||
std::unordered_map<std::string, std::string> log_data;
|
||||
log_data["hostname"] = sparams.hostname;
|
||||
log_data["port"] = std::to_string(sparams.port);
|
||||
|
||||
if (!sparams.api_key.empty()) {
|
||||
log_data["api_key"] = "api_key: ****" + sparams.api_key.substr(sparams.api_key.length() - 4);
|
||||
}
|
||||
|
||||
LOG_INFO("HTTP server listening", log_data);
|
||||
// run the HTTP server in a thread - see comment below
|
||||
std::thread t([&]()
|
||||
{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# llama.cpp/examples/speculative
|
||||
|
||||
Demonstartion of speculative decoding and tree-based speculative decoding techniques
|
||||
Demonstration of speculative decoding and tree-based speculative decoding techniques
|
||||
|
||||
More info:
|
||||
|
||||
|
|
|
@ -428,7 +428,7 @@ int main(int argc, char ** argv) {
|
|||
++n_past_tgt;
|
||||
}
|
||||
|
||||
// the first token is always proposed by the traget model before the speculation loop so we erase it here
|
||||
// the first token is always proposed by the target model before the speculation loop so we erase it here
|
||||
for (int s = 0; s < n_seq_dft; ++s) {
|
||||
if (!drafts[s].active) {
|
||||
continue;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue