From ece9a45e8ffb73ad461c792720c2fec28b0137bc Mon Sep 17 00:00:00 2001 From: Ashraful Islam Date: Wed, 3 Jan 2024 11:30:02 -0600 Subject: [PATCH 1/9] swift : update Package.swift to use ggml as dependency (#4691) * updates the package.swift to use ggml as dependency * changes the ggml package url src to ggerganov --- Package.swift | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/Package.swift b/Package.swift index 18d610d69..e33a4ff46 100644 --- a/Package.swift +++ b/Package.swift @@ -13,21 +13,17 @@ let package = Package( products: [ .library(name: "llama", targets: ["llama"]), ], + dependencies: [ + .package(url: "https://github.com/ggerganov/ggml.git", .branch("master")) + ], targets: [ .target( name: "llama", + dependencies: ["ggml"], path: ".", exclude: [], sources: [ - "ggml.c", "llama.cpp", - "ggml-alloc.c", - "ggml-backend.c", - "ggml-quants.c", - "ggml-metal.m", - ], - resources: [ - .process("ggml-metal.metal") ], publicHeadersPath: "spm-headers", cSettings: [ From cb1e2818e0e12ec99f7236ec5d4f3ffd8bcc2f4a Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 3 Jan 2024 18:53:40 +0100 Subject: [PATCH 2/9] train : fix typo in overlapping-samples help msg (#4758) This commit fixes a typo in the help message for the --overlapping-samples option. Signed-off-by: Daniel Bevenius --- common/train.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/train.cpp b/common/train.cpp index dcf9614e4..e6f2f7a2f 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -1107,7 +1107,7 @@ void print_common_train_usage(int /*argc*/, char ** /*argv*/, const struct train fprintf(stderr, " --sample-start STR Sets the starting point for samples after the specified pattern. If empty use every token position as sample start. (default '%s')\n", params->sample_start.c_str()); fprintf(stderr, " --include-sample-start Include the sample start in the samples. (default off)\n"); fprintf(stderr, " --escape process sample start escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"); - fprintf(stderr, " --overlapping-samples Samples my overlap, will include sample-start of second and following samples. When off, samples will end at begin of next sample. (default off)\n"); + fprintf(stderr, " --overlapping-samples Samples may overlap, will include sample-start of second and following samples. When off, samples will end at begin of next sample. (default off)\n"); fprintf(stderr, " --fill-with-next-samples Samples shorter than context length will be followed by the next (shuffled) samples. (default off)\n"); fprintf(stderr, " --separate-with-eos When fill-with-next-samples, insert end-of-sequence token between samples.%s\n", params->separate_with_eos ? " (default)" : ""); fprintf(stderr, " --separate-with-bos When fill-with-next-samples, insert begin-of-sequence token between samples.%s\n", params->separate_with_bos ? " (default)" : ""); From 46cea79e1f32499bb24b9fab12123cd386e96728 Mon Sep 17 00:00:00 2001 From: singularity <12184989+singularity-s0@users.noreply.github.com> Date: Thu, 4 Jan 2024 15:58:16 +0800 Subject: [PATCH 3/9] llama.swiftui : fix build of ggml.metallib (#4754) * metal: fix metal backend init failure in swiftui * metal: build ggml.metallib instead of copy src * llama.swift : remove debug flags from metallib build --------- Co-authored-by: Georgi Gerganov --- .../llama.swiftui.xcodeproj/project.pbxproj | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj index 2e6159928..7bf4489a2 100644 --- a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +++ b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj @@ -9,7 +9,6 @@ /* Begin PBXBuildFile section */ 542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; settings = {COMPILER_FLAGS = "-O3"; }; }; 5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; settings = {COMPILER_FLAGS = "-O3"; }; }; - 542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; }; 542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; }; 542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; settings = {COMPILER_FLAGS = "-O3"; }; }; 542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; }; @@ -24,8 +23,25 @@ 8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; }; 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; }; 8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; }; + F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; }; /* End PBXBuildFile section */ +/* Begin PBXBuildRule section */ + F1FE20DB2B465C2100B45541 /* PBXBuildRule */ = { + isa = PBXBuildRule; + compilerSpec = com.apple.compilers.proxy.script; + fileType = sourcecode.metal; + inputFiles = ( + ); + isEditable = 1; + outputFiles = ( + "${DERIVED_FILES_DIR}/ggml-metal.air", + "${DERIVED_FILES_DIR}/ggml.metallib", + ); + script = "# metal\nxcrun metal -c \"${INPUT_FILE_PATH}\" -o \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE}.air\"\nxcrun metallib -o \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE%-metal}.metallib\" \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE}.air\"\n"; + }; +/* End PBXBuildRule section */ + /* Begin PBXFileReference section */ 542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = ""; }; 542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = ""; }; @@ -190,6 +206,7 @@ 8A1C83712AC328BD0096AF73 /* Resources */, ); buildRules = ( + F1FE20DB2B465C2100B45541 /* PBXBuildRule */, ); dependencies = ( ); @@ -241,7 +258,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - 542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */, + F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */, 8A3F84242AC4C891005E2EE8 /* models in Resources */, 8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */, 8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */, From dc891b7f7a23158d54f9383790b92c79cc5906c1 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 4 Jan 2024 10:12:26 +0200 Subject: [PATCH 4/9] ggml : include stdlib.h before intrin.h (#4736) --- ggml-impl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml-impl.h b/ggml-impl.h index 1f5610a86..2faced080 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -5,6 +5,7 @@ // GGML internal header #include +#include // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/ #include #include #include // memcpy From e5804313a1edaf00726ed0b96ecced07accbf50c Mon Sep 17 00:00:00 2001 From: Michael Coppola Date: Thu, 4 Jan 2024 03:17:09 -0500 Subject: [PATCH 5/9] server : fix options in README.md (#4765) * fix examples/server/README.md * minor : fix whitespace --------- Co-authored-by: Georgi Gerganov --- examples/server/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 718a7e064..243e66991 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -168,6 +168,12 @@ node index.js `image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `prompt`. You can determine the place of the image in the prompt as in the following: `USER:[img-12]Describe the image in detail.\nASSISTANT:`. In this case, `[img-12]` will be replaced by the embeddings of the image with id `12` in the following `image_data` array: `{..., "image_data": [{"data": "", "id": 12}]}`. Use `image_data` only with multimodal models, e.g., LLaVA. + `slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1) + + `cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false) + + `system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime) + *Result JSON:* Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion. @@ -198,12 +204,6 @@ node index.js `truncated`: Boolean indicating if the context size was exceeded during generation, i.e. the number of tokens provided in the prompt (`tokens_evaluated`) plus tokens generated (`tokens predicted`) exceeded the context size (`n_ctx`) - `slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1) - - `cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false) - - `system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime) - - **POST** `/tokenize`: Tokenize a given text. *Options:* From 3c0b585561d74a56977cf3a3844535ecc9e37972 Mon Sep 17 00:00:00 2001 From: singularity <12184989+singularity-s0@users.noreply.github.com> Date: Thu, 4 Jan 2024 16:22:38 +0800 Subject: [PATCH 6/9] llama.swiftui : support loading custom model from file picker (#4767) * swiftui: support load model from file picker * swiftui: remove trailing whitespace --- .../llama.swiftui.xcodeproj/project.pbxproj | 4 ++ .../llama.swiftui/UI/ContentView.swift | 2 + .../llama.swiftui/UI/LoadCustomButton.swift | 44 +++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift diff --git a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj index 7bf4489a2..a70750a22 100644 --- a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +++ b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj @@ -23,6 +23,7 @@ 8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; }; 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; }; 8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; }; + F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */; }; F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; }; /* End PBXBuildFile section */ @@ -68,6 +69,7 @@ 8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = ""; }; 8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = ""; }; 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = ""; }; + F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LoadCustomButton.swift; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -182,6 +184,7 @@ children = ( 7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */, 8A1C83782AC328BD0096AF73 /* ContentView.swift */, + F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */, ); path = UI; sourceTree = ""; @@ -274,6 +277,7 @@ files = ( 542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */, 549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */, + F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */, 542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */, 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */, 542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */, diff --git a/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift b/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift index 147e0c63b..7c81ea256 100644 --- a/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +++ b/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift @@ -103,6 +103,8 @@ struct ContentView: View { ContentView.cleanupModelCaches() llamaState.cacheCleared = true } + + LoadCustomButton(llamaState: llamaState) } .padding(.top, 4) .font(.system(size: 12)) diff --git a/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift b/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift new file mode 100644 index 000000000..4315dbe4f --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift @@ -0,0 +1,44 @@ +import SwiftUI +import UniformTypeIdentifiers + +struct LoadCustomButton: View { + @ObservedObject private var llamaState: LlamaState + @State private var showFileImporter = false + + init(llamaState: LlamaState) { + self.llamaState = llamaState + } + + var body: some View { + VStack { + Button(action: { + showFileImporter = true + }) { + Text("Load Custom Model") + } + } + .fileImporter( + isPresented: $showFileImporter, + allowedContentTypes: [UTType(filenameExtension: "gguf", conformingTo: .data)!], + allowsMultipleSelection: false + ) { result in + switch result { + case .success(let files): + files.forEach { file in + let gotAccess = file.startAccessingSecurityScopedResource() + if !gotAccess { return } + + do { + try llamaState.loadModel(modelUrl: file.absoluteURL) + } catch let err { + print("Error: \(err.localizedDescription)") + } + + file.stopAccessingSecurityScopedResource() + } + case .failure(let error): + print(error) + } + } + } +} From a91928014fcf51fe297823fcff0788de4f14206e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Thu, 4 Jan 2024 09:43:23 +0100 Subject: [PATCH 7/9] Print backend name on test-backend-ops failure (#4751) --- tests/test-backend-ops.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 44412cb94..b79de7a7d 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -392,15 +392,21 @@ struct test_case { struct callback_userdata { bool ok; double max_err; + ggml_backend_t backend1; + ggml_backend_t backend2; }; callback_userdata ud { true, max_nmse_err(), + backend1, + backend2 }; auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool { callback_userdata * ud = (callback_userdata *) user_data; + const char * bn1 = ggml_backend_name(ud->backend1); + const char * bn2 = ggml_backend_name(ud->backend2); if (t1->op == GGML_OP_NONE) { // sentinels must be unchanged @@ -422,7 +428,7 @@ struct test_case { for (size_t i = 0; i < f1.size(); i++) { // check for nans if (std::isnan(f1[i]) || std::isnan(f2[i])) { - printf("[%s] NaN at index %zu (%f %f) ", ggml_op_desc(t1), i, f1[i], f2[i]); + printf("[%s] NaN at index %zu (%s=%f %s=%f) ", ggml_op_desc(t1), i, bn1, f1[i], bn2, f2[i]); ud->ok = false; return true; } @@ -430,12 +436,12 @@ struct test_case { if (isinf_or_max(f1[i]) || isinf_or_max(f2[i])) { if (isinf_or_max(f1[i]) && isinf_or_max(f2[i])) { if (std::signbit(f1[i]) != std::signbit(f2[i])) { - printf("[%s] inf sign mismatch: %f %f ", ggml_op_desc(t1), f1[i], f2[i]); + printf("[%s] inf sign mismatch: %s=%f %s=%f ", ggml_op_desc(t1), bn1, f1[i], bn2, f2[i]); ud->ok = false; return true; } } else { - printf("[%s] inf mismatch: %f %f ", ggml_op_desc(t1), f1[i], f2[i]); + printf("[%s] inf mismatch: %s=%f %s=%f ", ggml_op_desc(t1), bn1, f1[i], bn2, f2[i]); ud->ok = false; return true; } From 012cf349aec8ffb47c9def5dc018240fa3721e8b Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 4 Jan 2024 19:56:33 +0200 Subject: [PATCH 8/9] server : send token probs for "stream == false" (#4714) --- examples/server/server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index e45ea809a..d1469fb08 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1265,7 +1265,7 @@ struct llama_server_context { std::vector probs_output = {}; const std::vector to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false); - size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); + size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); if (probs_pos < probs_stop_pos) { @@ -1325,7 +1325,7 @@ struct llama_server_context { probs = std::vector( slot.generated_token_probs.begin(), - slot.generated_token_probs.begin() + slot.sent_token_probs_index); + slot.generated_token_probs.end()); } res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs); } From b3a7c20b5c035250257d2b62851c379b159c899a Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 4 Jan 2024 20:45:37 +0100 Subject: [PATCH 9/9] finetune : remove unused includes (#4756) This commit removes unused includes from finetune.cpp. Signed-off-by: Daniel Bevenius --- examples/finetune/finetune.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index e0520f64c..eaca42fc1 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -3,15 +3,9 @@ #include "llama.h" #include "common.h" #include "train.h" -#include #include -#include -#include #include -#include #include -#include -#include #include #include