llama : fix strncpy warning + note token_to_str does not write null
This commit is contained in:
parent
a49931300a
commit
5b94b14d5d
6 changed files with 21 additions and 24 deletions
|
@ -261,7 +261,6 @@ for part_name in part_names:
|
||||||
for name in model_part.keys():
|
for name in model_part.keys():
|
||||||
data = model_part[name]
|
data = model_part[name]
|
||||||
|
|
||||||
|
|
||||||
old_dtype = data.dtype
|
old_dtype = data.dtype
|
||||||
|
|
||||||
# we don't need these
|
# we don't need these
|
||||||
|
|
|
@ -266,9 +266,6 @@ int main(int argc, char ** argv) {
|
||||||
params.interactive = true;
|
params.interactive = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// determine newline token
|
|
||||||
auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
|
|
||||||
|
|
||||||
if (params.verbose_prompt) {
|
if (params.verbose_prompt) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||||
|
@ -778,8 +775,7 @@ int main(int argc, char ** argv) {
|
||||||
if (grammar != NULL) {
|
if (grammar != NULL) {
|
||||||
llama_grammar_free(grammar);
|
llama_grammar_free(grammar);
|
||||||
|
|
||||||
std::vector<const llama_grammar_element *> grammar_rules(
|
std::vector<const llama_grammar_element *> grammar_rules( parsed_grammar.c_rules());
|
||||||
parsed_grammar.c_rules());
|
|
||||||
grammar = llama_grammar_init(
|
grammar = llama_grammar_init(
|
||||||
grammar_rules.data(), grammar_rules.size(),
|
grammar_rules.data(), grammar_rules.size(),
|
||||||
parsed_grammar.symbol_ids.at("root"));
|
parsed_grammar.symbol_ids.at("root"));
|
||||||
|
|
|
@ -68,10 +68,10 @@ bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std:
|
||||||
}
|
}
|
||||||
|
|
||||||
// usage:
|
// usage:
|
||||||
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads]
|
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
|
||||||
//
|
//
|
||||||
void usage(const char * executable) {
|
void usage(const char * executable) {
|
||||||
fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.bin [model-quant.bin] type [nthreads]\n\n", executable);
|
fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
|
||||||
fprintf(stderr, " --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
|
fprintf(stderr, " --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
|
||||||
fprintf(stderr, " --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n");
|
fprintf(stderr, " --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n");
|
||||||
fprintf(stderr, "\nAllowed quantization types:\n");
|
fprintf(stderr, "\nAllowed quantization types:\n");
|
||||||
|
@ -118,8 +118,8 @@ int main(int argc, char ** argv) {
|
||||||
if (pos != std::string::npos) {
|
if (pos != std::string::npos) {
|
||||||
fpath = fname_inp.substr(0, pos + 1);
|
fpath = fname_inp.substr(0, pos + 1);
|
||||||
}
|
}
|
||||||
// export as [inp path]/ggml-model-[ftype].bin
|
// export as [inp path]/ggml-model-[ftype].gguf
|
||||||
fname_out = fpath + "ggml-model-" + ftype_str + ".bin";
|
fname_out = fpath + "ggml-model-" + ftype_str + ".gguf";
|
||||||
arg_idx++;
|
arg_idx++;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -26,7 +26,6 @@ int main(int argc, char ** argv) {
|
||||||
auto lparams = llama_context_default_params();
|
auto lparams = llama_context_default_params();
|
||||||
|
|
||||||
lparams.n_ctx = params.n_ctx;
|
lparams.n_ctx = params.n_ctx;
|
||||||
lparams.n_gqa = params.n_gqa;
|
|
||||||
lparams.seed = params.seed;
|
lparams.seed = params.seed;
|
||||||
lparams.f16_kv = params.memory_f16;
|
lparams.f16_kv = params.memory_f16;
|
||||||
lparams.use_mmap = params.use_mmap;
|
lparams.use_mmap = params.use_mmap;
|
||||||
|
|
14
llama.cpp
14
llama.cpp
|
@ -4774,7 +4774,8 @@ float * llama_get_embeddings(struct llama_context * ctx) {
|
||||||
return ctx->embedding.data();
|
return ctx->embedding.data();
|
||||||
}
|
}
|
||||||
|
|
||||||
int llama_token_to_str_with_model(const struct llama_model * model, llama_token token, char * str, int length) {
|
// does not write null-terminator to str
|
||||||
|
int llama_token_to_str_with_model(const struct llama_model * model, llama_token token, char * buf, int length) {
|
||||||
if (0 <= token && token < llama_n_vocab_from_model(model)) {
|
if (0 <= token && token < llama_n_vocab_from_model(model)) {
|
||||||
if (llama_is_normal_token(model->vocab, token)) {
|
if (llama_is_normal_token(model->vocab, token)) {
|
||||||
std::string result = model->vocab.id_to_token[token].tok;
|
std::string result = model->vocab.id_to_token[token].tok;
|
||||||
|
@ -4784,13 +4785,15 @@ int llama_token_to_str_with_model(const struct llama_model * model, llama_token
|
||||||
if (length < (int) result.length()) {
|
if (length < (int) result.length()) {
|
||||||
return -result.length();
|
return -result.length();
|
||||||
}
|
}
|
||||||
strncpy(str, result.c_str(), result.length());
|
memcpy(buf, result.c_str(), result.length());
|
||||||
return result.length();
|
return result.length();
|
||||||
} else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
|
} else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
|
||||||
if (length < 3) {
|
if (length < 3) {
|
||||||
return -3;
|
return -3;
|
||||||
}
|
}
|
||||||
strncpy(str, "\xe2\x96\x85", 4);
|
buf[0] = '\xe2';
|
||||||
|
buf[1] = '\x96';
|
||||||
|
buf[2] = '\x85';
|
||||||
return 3;
|
return 3;
|
||||||
} else if (llama_is_control_token(model->vocab, token)) {
|
} else if (llama_is_control_token(model->vocab, token)) {
|
||||||
;
|
;
|
||||||
|
@ -4798,8 +4801,7 @@ int llama_token_to_str_with_model(const struct llama_model * model, llama_token
|
||||||
if (length < 1) {
|
if (length < 1) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
str[0] = llama_byte_to_char(model->vocab, token);
|
buf[0] = llama_byte_to_char(model->vocab, token);
|
||||||
str[1] = 0x00;
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4830,7 +4832,7 @@ int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token,
|
||||||
if (length < (int) result.length()) {
|
if (length < (int) result.length()) {
|
||||||
return -result.length();
|
return -result.length();
|
||||||
}
|
}
|
||||||
strncpy(str, result.c_str(), result.length());
|
memcpy(str, result.c_str(), result.length());
|
||||||
return result.length();
|
return result.length();
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
7
llama.h
7
llama.h
|
@ -355,22 +355,23 @@ extern "C" {
|
||||||
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
|
||||||
|
|
||||||
// Token Id -> String. Uses the vocabulary in the provided context
|
// Token Id -> String. Uses the vocabulary in the provided context
|
||||||
|
// Does not write null terminator to the buffer
|
||||||
LLAMA_API int llama_token_to_str(
|
LLAMA_API int llama_token_to_str(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
llama_token token,
|
llama_token token,
|
||||||
char * str,
|
char * buf,
|
||||||
int length);
|
int length);
|
||||||
|
|
||||||
LLAMA_API int llama_token_to_str_bpe(
|
LLAMA_API int llama_token_to_str_bpe(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
llama_token token,
|
llama_token token,
|
||||||
char * str,
|
char * buf,
|
||||||
int length);
|
int length);
|
||||||
|
|
||||||
LLAMA_API int llama_token_to_str_with_model(
|
LLAMA_API int llama_token_to_str_with_model(
|
||||||
const struct llama_model * model,
|
const struct llama_model * model,
|
||||||
llama_token token,
|
llama_token token,
|
||||||
char * str,
|
char * buf,
|
||||||
int length);
|
int length);
|
||||||
// Special tokens
|
// Special tokens
|
||||||
LLAMA_API llama_token llama_token_bos(void); // beginning-of-sentence
|
LLAMA_API llama_token llama_token_bos(void); // beginning-of-sentence
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue