diff --git a/common/train.cpp b/common/train.cpp index dcf9614e4..e6f2f7a2f 100644 --- a/common/train.cpp +++ b/common/train.cpp @@ -1107,7 +1107,7 @@ void print_common_train_usage(int /*argc*/, char ** /*argv*/, const struct train fprintf(stderr, " --sample-start STR Sets the starting point for samples after the specified pattern. If empty use every token position as sample start. (default '%s')\n", params->sample_start.c_str()); fprintf(stderr, " --include-sample-start Include the sample start in the samples. (default off)\n"); fprintf(stderr, " --escape process sample start escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n"); - fprintf(stderr, " --overlapping-samples Samples my overlap, will include sample-start of second and following samples. When off, samples will end at begin of next sample. (default off)\n"); + fprintf(stderr, " --overlapping-samples Samples may overlap, will include sample-start of second and following samples. When off, samples will end at begin of next sample. (default off)\n"); fprintf(stderr, " --fill-with-next-samples Samples shorter than context length will be followed by the next (shuffled) samples. (default off)\n"); fprintf(stderr, " --separate-with-eos When fill-with-next-samples, insert end-of-sequence token between samples.%s\n", params->separate_with_eos ? " (default)" : ""); fprintf(stderr, " --separate-with-bos When fill-with-next-samples, insert begin-of-sequence token between samples.%s\n", params->separate_with_bos ? " (default)" : ""); diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index e0520f64c..eaca42fc1 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -3,15 +3,9 @@ #include "llama.h" #include "common.h" #include "train.h" -#include #include -#include -#include #include -#include #include -#include -#include #include #include diff --git a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj index 2e6159928..a70750a22 100644 --- a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +++ b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj @@ -9,7 +9,6 @@ /* Begin PBXBuildFile section */ 542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; settings = {COMPILER_FLAGS = "-O3"; }; }; 5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; settings = {COMPILER_FLAGS = "-O3"; }; }; - 542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; }; 542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; }; 542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; settings = {COMPILER_FLAGS = "-O3"; }; }; 542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; }; @@ -24,8 +23,26 @@ 8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; }; 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; }; 8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; }; + F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */; }; + F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; }; /* End PBXBuildFile section */ +/* Begin PBXBuildRule section */ + F1FE20DB2B465C2100B45541 /* PBXBuildRule */ = { + isa = PBXBuildRule; + compilerSpec = com.apple.compilers.proxy.script; + fileType = sourcecode.metal; + inputFiles = ( + ); + isEditable = 1; + outputFiles = ( + "${DERIVED_FILES_DIR}/ggml-metal.air", + "${DERIVED_FILES_DIR}/ggml.metallib", + ); + script = "# metal\nxcrun metal -c \"${INPUT_FILE_PATH}\" -o \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE}.air\"\nxcrun metallib -o \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE%-metal}.metallib\" \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE}.air\"\n"; + }; +/* End PBXBuildRule section */ + /* Begin PBXFileReference section */ 542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = ""; }; 542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = ""; }; @@ -52,6 +69,7 @@ 8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = ""; }; 8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = ""; }; 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = ""; }; + F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LoadCustomButton.swift; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -166,6 +184,7 @@ children = ( 7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */, 8A1C83782AC328BD0096AF73 /* ContentView.swift */, + F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */, ); path = UI; sourceTree = ""; @@ -190,6 +209,7 @@ 8A1C83712AC328BD0096AF73 /* Resources */, ); buildRules = ( + F1FE20DB2B465C2100B45541 /* PBXBuildRule */, ); dependencies = ( ); @@ -241,7 +261,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( - 542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */, + F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */, 8A3F84242AC4C891005E2EE8 /* models in Resources */, 8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */, 8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */, @@ -257,6 +277,7 @@ files = ( 542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */, 549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */, + F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */, 542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */, 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */, 542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */, diff --git a/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift b/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift index 147e0c63b..7c81ea256 100644 --- a/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift +++ b/examples/llama.swiftui/llama.swiftui/UI/ContentView.swift @@ -103,6 +103,8 @@ struct ContentView: View { ContentView.cleanupModelCaches() llamaState.cacheCleared = true } + + LoadCustomButton(llamaState: llamaState) } .padding(.top, 4) .font(.system(size: 12)) diff --git a/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift b/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift new file mode 100644 index 000000000..4315dbe4f --- /dev/null +++ b/examples/llama.swiftui/llama.swiftui/UI/LoadCustomButton.swift @@ -0,0 +1,44 @@ +import SwiftUI +import UniformTypeIdentifiers + +struct LoadCustomButton: View { + @ObservedObject private var llamaState: LlamaState + @State private var showFileImporter = false + + init(llamaState: LlamaState) { + self.llamaState = llamaState + } + + var body: some View { + VStack { + Button(action: { + showFileImporter = true + }) { + Text("Load Custom Model") + } + } + .fileImporter( + isPresented: $showFileImporter, + allowedContentTypes: [UTType(filenameExtension: "gguf", conformingTo: .data)!], + allowsMultipleSelection: false + ) { result in + switch result { + case .success(let files): + files.forEach { file in + let gotAccess = file.startAccessingSecurityScopedResource() + if !gotAccess { return } + + do { + try llamaState.loadModel(modelUrl: file.absoluteURL) + } catch let err { + print("Error: \(err.localizedDescription)") + } + + file.stopAccessingSecurityScopedResource() + } + case .failure(let error): + print(error) + } + } + } +} diff --git a/examples/server/README.md b/examples/server/README.md index 718a7e064..243e66991 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -168,6 +168,12 @@ node index.js `image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `prompt`. You can determine the place of the image in the prompt as in the following: `USER:[img-12]Describe the image in detail.\nASSISTANT:`. In this case, `[img-12]` will be replaced by the embeddings of the image with id `12` in the following `image_data` array: `{..., "image_data": [{"data": "", "id": 12}]}`. Use `image_data` only with multimodal models, e.g., LLaVA. + `slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1) + + `cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false) + + `system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime) + *Result JSON:* Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion. @@ -198,12 +204,6 @@ node index.js `truncated`: Boolean indicating if the context size was exceeded during generation, i.e. the number of tokens provided in the prompt (`tokens_evaluated`) plus tokens generated (`tokens predicted`) exceeded the context size (`n_ctx`) - `slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1) - - `cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false) - - `system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime) - - **POST** `/tokenize`: Tokenize a given text. *Options:* diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 418d3fb4c..2062f96f1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1266,7 +1266,7 @@ struct llama_server_context { std::vector probs_output = {}; const std::vector to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false); - size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); + size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size()); size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size()); if (probs_pos < probs_stop_pos) { @@ -1326,7 +1326,7 @@ struct llama_server_context { probs = std::vector( slot.generated_token_probs.begin(), - slot.generated_token_probs.begin() + slot.sent_token_probs_index); + slot.generated_token_probs.end()); } res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs); } diff --git a/ggml-impl.h b/ggml-impl.h index 1f5610a86..2faced080 100644 --- a/ggml-impl.h +++ b/ggml-impl.h @@ -5,6 +5,7 @@ // GGML internal header #include +#include // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/ #include #include #include // memcpy diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 44412cb94..b79de7a7d 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -392,15 +392,21 @@ struct test_case { struct callback_userdata { bool ok; double max_err; + ggml_backend_t backend1; + ggml_backend_t backend2; }; callback_userdata ud { true, max_nmse_err(), + backend1, + backend2 }; auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool { callback_userdata * ud = (callback_userdata *) user_data; + const char * bn1 = ggml_backend_name(ud->backend1); + const char * bn2 = ggml_backend_name(ud->backend2); if (t1->op == GGML_OP_NONE) { // sentinels must be unchanged @@ -422,7 +428,7 @@ struct test_case { for (size_t i = 0; i < f1.size(); i++) { // check for nans if (std::isnan(f1[i]) || std::isnan(f2[i])) { - printf("[%s] NaN at index %zu (%f %f) ", ggml_op_desc(t1), i, f1[i], f2[i]); + printf("[%s] NaN at index %zu (%s=%f %s=%f) ", ggml_op_desc(t1), i, bn1, f1[i], bn2, f2[i]); ud->ok = false; return true; } @@ -430,12 +436,12 @@ struct test_case { if (isinf_or_max(f1[i]) || isinf_or_max(f2[i])) { if (isinf_or_max(f1[i]) && isinf_or_max(f2[i])) { if (std::signbit(f1[i]) != std::signbit(f2[i])) { - printf("[%s] inf sign mismatch: %f %f ", ggml_op_desc(t1), f1[i], f2[i]); + printf("[%s] inf sign mismatch: %s=%f %s=%f ", ggml_op_desc(t1), bn1, f1[i], bn2, f2[i]); ud->ok = false; return true; } } else { - printf("[%s] inf mismatch: %f %f ", ggml_op_desc(t1), f1[i], f2[i]); + printf("[%s] inf mismatch: %s=%f %s=%f ", ggml_op_desc(t1), bn1, f1[i], bn2, f2[i]); ud->ok = false; return true; }