Merge branch 'master' into concedo_experimental
# Conflicts: # Package.swift
This commit is contained in:
commit
c9fdd42da2
9 changed files with 88 additions and 20 deletions
|
@ -1107,7 +1107,7 @@ void print_common_train_usage(int /*argc*/, char ** /*argv*/, const struct train
|
|||
fprintf(stderr, " --sample-start STR Sets the starting point for samples after the specified pattern. If empty use every token position as sample start. (default '%s')\n", params->sample_start.c_str());
|
||||
fprintf(stderr, " --include-sample-start Include the sample start in the samples. (default off)\n");
|
||||
fprintf(stderr, " --escape process sample start escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\)\n");
|
||||
fprintf(stderr, " --overlapping-samples Samples my overlap, will include sample-start of second and following samples. When off, samples will end at begin of next sample. (default off)\n");
|
||||
fprintf(stderr, " --overlapping-samples Samples may overlap, will include sample-start of second and following samples. When off, samples will end at begin of next sample. (default off)\n");
|
||||
fprintf(stderr, " --fill-with-next-samples Samples shorter than context length will be followed by the next (shuffled) samples. (default off)\n");
|
||||
fprintf(stderr, " --separate-with-eos When fill-with-next-samples, insert end-of-sequence token between samples.%s\n", params->separate_with_eos ? " (default)" : "");
|
||||
fprintf(stderr, " --separate-with-bos When fill-with-next-samples, insert begin-of-sequence token between samples.%s\n", params->separate_with_bos ? " (default)" : "");
|
||||
|
|
|
@ -3,15 +3,9 @@
|
|||
#include "llama.h"
|
||||
#include "common.h"
|
||||
#include "train.h"
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
#include <cstdarg>
|
||||
#include <ctime>
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
/* Begin PBXBuildFile section */
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 542376072B0D9BFB008E6A1C /* ggml-quants.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
5423760B2B0D9C4B008E6A1C /* ggml-backend.c in Sources */ = {isa = PBXBuildFile; fileRef = 5423760A2B0D9C4B008E6A1C /* ggml-backend.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09B2AC8723900A8AEE9 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL -DGGML_USE_K_QUANTS -O3"; }; };
|
||||
542EA0A02AC8725700A8AEE9 /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 542EA09F2AC8725700A8AEE9 /* ggml-alloc.c */; settings = {COMPILER_FLAGS = "-O3"; }; };
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 542EA0A12AC8729100A8AEE9 /* llama.cpp */; settings = {COMPILER_FLAGS = "-DGGML_USE_K_QUANTS -DGGML_USE_METAL -O3"; }; };
|
||||
|
@ -24,8 +23,26 @@
|
|||
8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; };
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; };
|
||||
8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; };
|
||||
F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */; };
|
||||
F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXBuildRule section */
|
||||
F1FE20DB2B465C2100B45541 /* PBXBuildRule */ = {
|
||||
isa = PBXBuildRule;
|
||||
compilerSpec = com.apple.compilers.proxy.script;
|
||||
fileType = sourcecode.metal;
|
||||
inputFiles = (
|
||||
);
|
||||
isEditable = 1;
|
||||
outputFiles = (
|
||||
"${DERIVED_FILES_DIR}/ggml-metal.air",
|
||||
"${DERIVED_FILES_DIR}/ggml.metallib",
|
||||
);
|
||||
script = "# metal\nxcrun metal -c \"${INPUT_FILE_PATH}\" -o \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE}.air\"\nxcrun metallib -o \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE%-metal}.metallib\" \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE}.air\"\n";
|
||||
};
|
||||
/* End PBXBuildRule section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = "<group>"; };
|
||||
542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = "<group>"; };
|
||||
|
@ -52,6 +69,7 @@
|
|||
8A3F84232AC4C891005E2EE8 /* models */ = {isa = PBXFileReference; lastKnownFileType = folder; name = models; path = llama.swiftui/Resources/models; sourceTree = "<group>"; };
|
||||
8A907F322AC7134E006146EA /* LibLlama.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LibLlama.swift; sourceTree = "<group>"; };
|
||||
8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LlamaState.swift; sourceTree = "<group>"; };
|
||||
F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LoadCustomButton.swift; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
|
@ -166,6 +184,7 @@
|
|||
children = (
|
||||
7FA3D2B22B2EA2F600543F92 /* DownloadButton.swift */,
|
||||
8A1C83782AC328BD0096AF73 /* ContentView.swift */,
|
||||
F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */,
|
||||
);
|
||||
path = UI;
|
||||
sourceTree = "<group>";
|
||||
|
@ -190,6 +209,7 @@
|
|||
8A1C83712AC328BD0096AF73 /* Resources */,
|
||||
);
|
||||
buildRules = (
|
||||
F1FE20DB2B465C2100B45541 /* PBXBuildRule */,
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
|
@ -241,7 +261,7 @@
|
|||
isa = PBXResourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
542378792ACE3F3500834A7B /* ggml-metal.metal in Resources */,
|
||||
F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */,
|
||||
8A3F84242AC4C891005E2EE8 /* models in Resources */,
|
||||
8A1C837E2AC328BE0096AF73 /* Preview Assets.xcassets in Resources */,
|
||||
8A1C837B2AC328BE0096AF73 /* Assets.xcassets in Resources */,
|
||||
|
@ -257,6 +277,7 @@
|
|||
files = (
|
||||
542376082B0D9BFB008E6A1C /* ggml-quants.c in Sources */,
|
||||
549479CD2AC9E42A00E0F78B /* ggml-metal.m in Sources */,
|
||||
F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */,
|
||||
542EA09D2AC8723900A8AEE9 /* ggml.c in Sources */,
|
||||
8A907F332AC7138A006146EA /* LibLlama.swift in Sources */,
|
||||
542EA0A32AC8729100A8AEE9 /* llama.cpp in Sources */,
|
||||
|
|
|
@ -103,6 +103,8 @@ struct ContentView: View {
|
|||
ContentView.cleanupModelCaches()
|
||||
llamaState.cacheCleared = true
|
||||
}
|
||||
|
||||
LoadCustomButton(llamaState: llamaState)
|
||||
}
|
||||
.padding(.top, 4)
|
||||
.font(.system(size: 12))
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
import SwiftUI
|
||||
import UniformTypeIdentifiers
|
||||
|
||||
struct LoadCustomButton: View {
|
||||
@ObservedObject private var llamaState: LlamaState
|
||||
@State private var showFileImporter = false
|
||||
|
||||
init(llamaState: LlamaState) {
|
||||
self.llamaState = llamaState
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
Button(action: {
|
||||
showFileImporter = true
|
||||
}) {
|
||||
Text("Load Custom Model")
|
||||
}
|
||||
}
|
||||
.fileImporter(
|
||||
isPresented: $showFileImporter,
|
||||
allowedContentTypes: [UTType(filenameExtension: "gguf", conformingTo: .data)!],
|
||||
allowsMultipleSelection: false
|
||||
) { result in
|
||||
switch result {
|
||||
case .success(let files):
|
||||
files.forEach { file in
|
||||
let gotAccess = file.startAccessingSecurityScopedResource()
|
||||
if !gotAccess { return }
|
||||
|
||||
do {
|
||||
try llamaState.loadModel(modelUrl: file.absoluteURL)
|
||||
} catch let err {
|
||||
print("Error: \(err.localizedDescription)")
|
||||
}
|
||||
|
||||
file.stopAccessingSecurityScopedResource()
|
||||
}
|
||||
case .failure(let error):
|
||||
print(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -168,6 +168,12 @@ node index.js
|
|||
|
||||
`image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `prompt`. You can determine the place of the image in the prompt as in the following: `USER:[img-12]Describe the image in detail.\nASSISTANT:`. In this case, `[img-12]` will be replaced by the embeddings of the image with id `12` in the following `image_data` array: `{..., "image_data": [{"data": "<BASE64_STRING>", "id": 12}]}`. Use `image_data` only with multimodal models, e.g., LLaVA.
|
||||
|
||||
`slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1)
|
||||
|
||||
`cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
|
||||
|
||||
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
||||
|
||||
*Result JSON:*
|
||||
|
||||
Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
|
||||
|
@ -198,12 +204,6 @@ node index.js
|
|||
|
||||
`truncated`: Boolean indicating if the context size was exceeded during generation, i.e. the number of tokens provided in the prompt (`tokens_evaluated`) plus tokens generated (`tokens predicted`) exceeded the context size (`n_ctx`)
|
||||
|
||||
`slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1)
|
||||
|
||||
`cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
|
||||
|
||||
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
||||
|
||||
- **POST** `/tokenize`: Tokenize a given text.
|
||||
|
||||
*Options:*
|
||||
|
|
|
@ -1266,7 +1266,7 @@ struct llama_server_context
|
|||
{
|
||||
std::vector<completion_token_output> probs_output = {};
|
||||
const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
|
||||
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
|
||||
size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
|
||||
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
|
||||
if (probs_pos < probs_stop_pos)
|
||||
{
|
||||
|
@ -1326,7 +1326,7 @@ struct llama_server_context
|
|||
{
|
||||
probs = std::vector<completion_token_output>(
|
||||
slot.generated_token_probs.begin(),
|
||||
slot.generated_token_probs.begin() + slot.sent_token_probs_index);
|
||||
slot.generated_token_probs.end());
|
||||
}
|
||||
res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs);
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
// GGML internal header
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h> // load `stdlib.h` before other headers to work around MinGW bug: https://sourceforge.net/p/mingw-w64/bugs/192/
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h> // memcpy
|
||||
|
|
|
@ -392,15 +392,21 @@ struct test_case {
|
|||
struct callback_userdata {
|
||||
bool ok;
|
||||
double max_err;
|
||||
ggml_backend_t backend1;
|
||||
ggml_backend_t backend2;
|
||||
};
|
||||
|
||||
callback_userdata ud {
|
||||
true,
|
||||
max_nmse_err(),
|
||||
backend1,
|
||||
backend2
|
||||
};
|
||||
|
||||
auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool {
|
||||
callback_userdata * ud = (callback_userdata *) user_data;
|
||||
const char * bn1 = ggml_backend_name(ud->backend1);
|
||||
const char * bn2 = ggml_backend_name(ud->backend2);
|
||||
|
||||
if (t1->op == GGML_OP_NONE) {
|
||||
// sentinels must be unchanged
|
||||
|
@ -422,7 +428,7 @@ struct test_case {
|
|||
for (size_t i = 0; i < f1.size(); i++) {
|
||||
// check for nans
|
||||
if (std::isnan(f1[i]) || std::isnan(f2[i])) {
|
||||
printf("[%s] NaN at index %zu (%f %f) ", ggml_op_desc(t1), i, f1[i], f2[i]);
|
||||
printf("[%s] NaN at index %zu (%s=%f %s=%f) ", ggml_op_desc(t1), i, bn1, f1[i], bn2, f2[i]);
|
||||
ud->ok = false;
|
||||
return true;
|
||||
}
|
||||
|
@ -430,12 +436,12 @@ struct test_case {
|
|||
if (isinf_or_max(f1[i]) || isinf_or_max(f2[i])) {
|
||||
if (isinf_or_max(f1[i]) && isinf_or_max(f2[i])) {
|
||||
if (std::signbit(f1[i]) != std::signbit(f2[i])) {
|
||||
printf("[%s] inf sign mismatch: %f %f ", ggml_op_desc(t1), f1[i], f2[i]);
|
||||
printf("[%s] inf sign mismatch: %s=%f %s=%f ", ggml_op_desc(t1), bn1, f1[i], bn2, f2[i]);
|
||||
ud->ok = false;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
printf("[%s] inf mismatch: %f %f ", ggml_op_desc(t1), f1[i], f2[i]);
|
||||
printf("[%s] inf mismatch: %s=%f %s=%f ", ggml_op_desc(t1), bn1, f1[i], bn2, f2[i]);
|
||||
ud->ok = false;
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue