attempting to add stanford changes

This commit is contained in:
Steven Prichard 2024-04-29 18:02:43 -05:00
parent b8c1476e44
commit 0ea4873895
5 changed files with 123 additions and 0 deletions

View file

@ -11,6 +11,11 @@ var sources = [
"ggml-alloc.c", "ggml-alloc.c",
"ggml-backend.c", "ggml-backend.c",
"ggml-quants.c", "ggml-quants.c",
"common/common.cpp",
"common/grammar-parser.cpp",
"common/sampling.cpp",
"common/log.cpp",
"tokenize.cpp"
] ]
var resources: [Resource] = [] var resources: [Resource] = []

37
tokenize.cpp Normal file
View file

@ -0,0 +1,37 @@
//
// This source file is part of the Stanford Spezi open source project
//
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
//
// SPDX-License-Identifier: MIT
//
#include "tokenize.h"
/// Tokenize a `String` via a given `llama_context`.
std::vector<llama_token> llama_tokenize_with_context(
const struct llama_context * ctx,
const std::string & text,
bool add_bos,
bool special) {
return llama_tokenize(ctx, text, add_bos, special);
}
/// Tokenize a `char` array via a given `llama_context`.
std::vector<llama_token> llama_tokenize_with_context_from_char_array(
const struct llama_context * ctx,
const char* text,
bool add_bos,
bool special) {
return llama_tokenize(ctx, std::string(text), add_bos, special);
}
/// Tokenize a `String` via a given `llama_model`.
std::vector<llama_token> llama_tokenize_with_model(
const struct llama_model * model,
const std::string & text,
bool add_bos,
bool special) {
return llama_tokenize(model, text, add_bos, special);
}

38
tokenize.h Normal file
View file

@ -0,0 +1,38 @@
//
// This source file is part of the Stanford Spezi open source project
//
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
//
// SPDX-License-Identifier: MIT
//
#ifndef tokenize_hpp
#define tokenize_hpp
#include <vector>
#include <string>
#include "common/common.h"
/// Tokenize a `String` via a given `llama_context`.
std::vector<llama_token> llama_tokenize_with_context(
const struct llama_context * ctx,
const std::string & text,
bool add_bos,
bool special = false);
/// Tokenize a `char` array via a given `llama_context`.
std::vector<llama_token> llama_tokenize_with_context_from_char_array(
const struct llama_context * ctx,
const char* text,
bool add_bos,
bool special = false);
/// Tokenize a `String` via a given `llama_model`.
std::vector<llama_token> llama_tokenize_with_model(
const struct llama_model * model,
const std::string & text,
bool add_bos,
bool special = false);
#endif

21
vector.cpp Normal file
View file

@ -0,0 +1,21 @@
//
// This source file is part of the Stanford Spezi open source project
//
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
//
// SPDX-License-Identifier: MIT
//
#include "vector.h"
/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
const std::vector<llama_seq_id> getLlamaSeqIdVector() {
const std::vector<llama_seq_id> vec = { 0 };
return vec;
}
/// Get `array` representation of C++ `vector`.
const int* vectorToIntArray(const std::vector<int>& vec) {
return vec.data();
}

22
vector.h Normal file
View file

@ -0,0 +1,22 @@
//
// This source file is part of the Stanford Spezi open source project
//
// SPDX-FileCopyrightText: 2022 Stanford University and the project authors (see CONTRIBUTORS.md)
//
// SPDX-License-Identifier: MIT
//
#ifndef vector_hpp
#define vector_hpp
#include <vector>
#include "common.h"
/// Create an empty `vector` of `llama_seq_id`s that serve as a buffer for batch processing.
const std::vector<llama_seq_id> getLlamaSeqIdVector();
/// Get `array` representation of C++ `vector`.
const int* vectorToIntArray(const std::vector<int>& vec);
#endif