attempting to add stanford changes

2024-04-29 18:02:43 -05:00 · 2024-04-29 18:02:43 -05:00 · 0ea4873895
commit 0ea4873895
parent b8c1476e44
5 changed files with 123 additions and 0 deletions
--- a/Package.swift
+++ b/Package.swift
@ -11,6 +11,11 @@ var sources = [
    "ggml-alloc.c",
    "ggml-backend.c",
    "ggml-quants.c",
    "common/common.cpp",
    "common/grammar-parser.cpp",
    "common/sampling.cpp",
    "common/log.cpp",
    "tokenize.cpp"
 ]
 var resources: [Resource] = []
--- a/tokenize.cpp
+++ b/tokenize.cpp
@ -0,0 +1,37 @@
 //
 // This source file is part of the Stanford Spezi open source project
 //
 // SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
 //
 // SPDX-License-Identifier: MIT
 //
 #include "tokenize.h"
 /// Tokenize a `String` via a given `llama_context`.
 std::vector<llama_token> llama_tokenize_with_context(
     const struct llama_context * ctx,
     const std::string & text,
     bool add_bos,
     bool special) {
    return llama_tokenize(ctx, text, add_bos, special);
 }
 /// Tokenize a `char` array via a given `llama_context`.
 std::vector<llama_token> llama_tokenize_with_context_from_char_array(
     const struct llama_context * ctx,
     const char* text,
     bool add_bos,
     bool special) {
    return llama_tokenize(ctx, std::string(text), add_bos, special);
 }
 /// Tokenize a `String` via a given `llama_model`.
 std::vector<llama_token> llama_tokenize_with_model(
     const struct llama_model * model,
     const std::string & text,
     bool add_bos,
     bool special) {
    return llama_tokenize(model, text, add_bos, special);
 }
--- a/tokenize.h
+++ b/tokenize.h
@ -0,0 +1,38 @@
 //
 // This source file is part of the Stanford Spezi open source project
 //
 // SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
 //
 // SPDX-License-Identifier: MIT
 //
 #ifndef tokenize_hpp
 #define tokenize_hpp
 #include <vector>
 #include <string>
 #include "common/common.h"
 /// Tokenize a `String` via a given `llama_context`.
 std::vector<llama_token> llama_tokenize_with_context(
     const struct llama_context * ctx,
     const std::string & text,
     bool add_bos,
     bool special = false);
 /// Tokenize a `char` array via a given `llama_context`.
 std::vector<llama_token> llama_tokenize_with_context_from_char_array(
     const struct llama_context * ctx,
     const char* text,
     bool add_bos,
     bool special = false);
 /// Tokenize a `String` via a given `llama_model`.
 std::vector<llama_token> llama_tokenize_with_model(
     const struct llama_model * model,
     const std::string & text,
     bool add_bos,
     bool special = false);
 #endif
--- a/vector.cpp
+++ b/vector.cpp
@ -0,0 +1,21 @@
 //
 // This source file is part of the Stanford Spezi open source project
 //
 // SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
 //
 // SPDX-License-Identifier: MIT
 //
 #include "vector.h"
 /// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
 const std::vector<llama_seq_id> getLlamaSeqIdVector() {
    const std::vector<llama_seq_id> vec = { 0 };
    return vec;
 }
 /// Get `array` representation of C++ `vector`.
 const int* vectorToIntArray(const std::vector<int>& vec) {
    return vec.data();
 }
--- a/vector.h
+++ b/vector.h
@ -0,0 +1,22 @@
 //
 // This source file is part of the Stanford Spezi open source project
 //
 // SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
 //
 // SPDX-License-Identifier: MIT
 //
 #ifndef vector_hpp
 #define vector_hpp
 #include <vector>
 #include "common.h"
 /// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
 const std::vector<llama_seq_id> getLlamaSeqIdVector();
 /// Get `array` representation of C++ `vector`.
 const int* vectorToIntArray(const std::vector<int>& vec);
 #endif