diff --git a/Package.swift b/Package.swift index 183e64757..022778417 100644 --- a/Package.swift +++ b/Package.swift @@ -11,6 +11,11 @@ var sources = [ "ggml-alloc.c", "ggml-backend.c", "ggml-quants.c", + "common/common.cpp", + "common/grammar-parser.cpp", + "common/sampling.cpp", + "common/log.cpp", + "tokenize.cpp" ] var resources: [Resource] = [] diff --git a/tokenize.cpp b/tokenize.cpp new file mode 100644 index 000000000..89ccf5a18 --- /dev/null +++ b/tokenize.cpp @@ -0,0 +1,37 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +#include "tokenize.h" + + +/// Tokenize a `String` via a given `llama_context`. +std::vector llama_tokenize_with_context( + const struct llama_context * ctx, + const std::string & text, + bool add_bos, + bool special) { + return llama_tokenize(ctx, text, add_bos, special); +} + +/// Tokenize a `char` array via a given `llama_context`. +std::vector llama_tokenize_with_context_from_char_array( + const struct llama_context * ctx, + const char* text, + bool add_bos, + bool special) { + return llama_tokenize(ctx, std::string(text), add_bos, special); +} + +/// Tokenize a `String` via a given `llama_model`. +std::vector llama_tokenize_with_model( + const struct llama_model * model, + const std::string & text, + bool add_bos, + bool special) { + return llama_tokenize(model, text, add_bos, special); +} diff --git a/tokenize.h b/tokenize.h new file mode 100644 index 000000000..6ece84f6c --- /dev/null +++ b/tokenize.h @@ -0,0 +1,38 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +#ifndef tokenize_hpp +#define tokenize_hpp + +#include +#include +#include "common/common.h" + + +/// Tokenize a `String` via a given `llama_context`. +std::vector llama_tokenize_with_context( + const struct llama_context * ctx, + const std::string & text, + bool add_bos, + bool special = false); + +/// Tokenize a `char` array via a given `llama_context`. +std::vector llama_tokenize_with_context_from_char_array( + const struct llama_context * ctx, + const char* text, + bool add_bos, + bool special = false); + +/// Tokenize a `String` via a given `llama_model`. +std::vector llama_tokenize_with_model( + const struct llama_model * model, + const std::string & text, + bool add_bos, + bool special = false); + +#endif diff --git a/vector.cpp b/vector.cpp new file mode 100644 index 000000000..598749b57 --- /dev/null +++ b/vector.cpp @@ -0,0 +1,21 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +#include "vector.h" + + +/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing. +const std::vector getLlamaSeqIdVector() { + const std::vector vec = { 0 }; + return vec; +} + +/// Get `array` representation of C++ `vector`. +const int* vectorToIntArray(const std::vector& vec) { + return vec.data(); +} diff --git a/vector.h b/vector.h new file mode 100644 index 000000000..f916e5465 --- /dev/null +++ b/vector.h @@ -0,0 +1,22 @@ +// +// This source file is part of the Stanford Spezi open source project +// +// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md) +// +// SPDX-License-Identifier: MIT +// + +#ifndef vector_hpp +#define vector_hpp + +#include +#include "common.h" + + +/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing. +const std::vector getLlamaSeqIdVector(); + +/// Get `array` representation of C++ `vector`. +const int* vectorToIntArray(const std::vector& vec); + +#endif