attempting to add stanford changes

2024-04-29 18:02:43 -05:00 · 2024-04-29 18:02:43 -05:00 · 0ea4873895
commit 0ea4873895
parent b8c1476e44
5 changed files with 123 additions and 0 deletions
--- a/Package.swift
+++ b/Package.swift
@ -11,6 +11,11 @@ var sources = [
    "ggml-alloc.c",
    "ggml-backend.c",
    "ggml-quants.c",
+    "common/common.cpp",
+    "common/grammar-parser.cpp",
+    "common/sampling.cpp",
+    "common/log.cpp",
+    "tokenize.cpp"
 ]

 var resources: [Resource] = []
--- a/tokenize.cpp
+++ b/tokenize.cpp
@ -0,0 +1,37 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+#include "tokenize.h"
+
+
+/// Tokenize a `String` via a given `llama_context`.
+std::vector<llama_token> llama_tokenize_with_context(
+     const struct llama_context * ctx,
+     const std::string & text,
+     bool add_bos,
+     bool special) {
+    return llama_tokenize(ctx, text, add_bos, special);
+}
+
+/// Tokenize a `char` array via a given `llama_context`.
+std::vector<llama_token> llama_tokenize_with_context_from_char_array(
+     const struct llama_context * ctx,
+     const char* text,
+     bool add_bos,
+     bool special) {
+    return llama_tokenize(ctx, std::string(text), add_bos, special);
+}
+
+/// Tokenize a `String` via a given `llama_model`.
+std::vector<llama_token> llama_tokenize_with_model(
+     const struct llama_model * model,
+     const std::string & text,
+     bool add_bos,
+     bool special) {
+    return llama_tokenize(model, text, add_bos, special);
+}
--- a/tokenize.h
+++ b/tokenize.h
@ -0,0 +1,38 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+#ifndef tokenize_hpp
+#define tokenize_hpp
+
+#include <vector>
+#include <string>
+#include "common/common.h"
+
+
+/// Tokenize a `String` via a given `llama_context`.
+std::vector<llama_token> llama_tokenize_with_context(
+     const struct llama_context * ctx,
+     const std::string & text,
+     bool add_bos,
+     bool special = false);
+
+/// Tokenize a `char` array via a given `llama_context`.
+std::vector<llama_token> llama_tokenize_with_context_from_char_array(
+     const struct llama_context * ctx,
+     const char* text,
+     bool add_bos,
+     bool special = false);
+
+/// Tokenize a `String` via a given `llama_model`.
+std::vector<llama_token> llama_tokenize_with_model(
+     const struct llama_model * model,
+     const std::string & text,
+     bool add_bos,
+     bool special = false);
+
+#endif
--- a/vector.cpp
+++ b/vector.cpp
@ -0,0 +1,21 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+#include "vector.h"
+
+
+/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
+const std::vector<llama_seq_id> getLlamaSeqIdVector() {
+    const std::vector<llama_seq_id> vec = { 0 };
+    return vec;
+}
+
+/// Get `array` representation of C++ `vector`.
+const int* vectorToIntArray(const std::vector<int>& vec) {
+    return vec.data();
+}
--- a/vector.h
+++ b/vector.h
@ -0,0 +1,22 @@
+//
+// This source file is part of the Stanford Spezi open source project
+//
+// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
+//
+// SPDX-License-Identifier: MIT
+//
+
+#ifndef vector_hpp
+#define vector_hpp
+
+#include <vector>
+#include "common.h"
+
+
+/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
+const std::vector<llama_seq_id> getLlamaSeqIdVector();
+
+/// Get `array` representation of C++ `vector`.
+const int* vectorToIntArray(const std::vector<int>& vec);
+
+#endif