common : add comments
This commit is contained in:
parent
9668aa115c
commit
1e7a033f10
2 changed files with 7 additions and 1 deletions
|
@ -116,15 +116,21 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
||||||
// Vocab utils
|
// Vocab utils
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// tokenizes a string into a vector of tokens
|
||||||
|
// should work similar to Python's `tokenizer.encode`
|
||||||
std::vector<llama_token> llama_tokenize(
|
std::vector<llama_token> llama_tokenize(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
const std::string & text,
|
const std::string & text,
|
||||||
bool add_bos);
|
bool add_bos);
|
||||||
|
|
||||||
|
// tokenizes a token into a piece
|
||||||
|
// should work similar to Python's `tokenizer.id_to_piece`
|
||||||
std::string llama_token_to_piece(
|
std::string llama_token_to_piece(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
llama_token token);
|
llama_token token);
|
||||||
|
|
||||||
|
// detokenizes a vector of tokens into a string
|
||||||
|
// should work similar to Python's `tokenizer.decode`
|
||||||
// removes the leading space from the first non-BOS token
|
// removes the leading space from the first non-BOS token
|
||||||
std::string llama_detokenize(
|
std::string llama_detokenize(
|
||||||
llama_context * ctx,
|
llama_context * ctx,
|
||||||
|
|
2
llama.h
2
llama.h
|
@ -384,7 +384,7 @@ extern "C" {
|
||||||
// Token Id -> Piece.
|
// Token Id -> Piece.
|
||||||
// Uses the vocabulary in the provided context.
|
// Uses the vocabulary in the provided context.
|
||||||
// Does not write null terminator to the buffer.
|
// Does not write null terminator to the buffer.
|
||||||
// Use code is responsible to remove the leading whitespace of the first non-BOS token.
|
// User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.
|
||||||
LLAMA_API int llama_token_to_piece(
|
LLAMA_API int llama_token_to_piece(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
llama_token token,
|
llama_token token,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue