diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 99660455a..a74c9349f 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -124,8 +124,9 @@ static void server_log(const char *level, const char *function, int line, static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token) { std::string out = token == -1 ? "" : llama_token_to_str(ctx, token); - // if first bit is 1, meaning it's a partial character - if (out.size() > 0 && (out[0] & 0x80) == 0x80) + // if the size is 1 and first bit is 1, meaning it's a partial character + // (size > 1 meaning it's already a known token) + if (out.size() == 1 && (out[0] & 0x80) == 0x80) { std::stringstream ss; ss << std::hex << (out[0] & 0xff); @@ -500,6 +501,7 @@ struct llama_server_context llama_grammar_accept_token(ctx, grammar, result.tok); } + printf("candidates_p.size = %zu\n", candidates_p.size); for (size_t i = 0; i < std::min(candidates_p.size, (size_t)n_probs); ++i) { result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p});