Fix detokenization of non-special added-tokens
This commit is contained in:
parent
daab3d7f45
commit
d2f8c9d51b
7 changed files with 21 additions and 15 deletions
|
@ -8,7 +8,7 @@ find_package(Threads REQUIRED)
|
||||||
|
|
||||||
# examples
|
# examples
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ../common)
|
||||||
|
|
||||||
if (EMSCRIPTEN)
|
if (EMSCRIPTEN)
|
||||||
else()
|
else()
|
||||||
|
|
|
@ -2,7 +2,6 @@ set(TARGET benchmark)
|
||||||
add_executable(${TARGET} benchmark-matmult.cpp)
|
add_executable(${TARGET} benchmark-matmult.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
if(TARGET BUILD_INFO)
|
if(TARGET BUILD_INFO)
|
||||||
add_dependencies(${TARGET} BUILD_INFO)
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
|
|
|
@ -13,7 +13,7 @@ endif()
|
||||||
set(TARGET llava)
|
set(TARGET llava)
|
||||||
add_executable(${TARGET} llava.cpp)
|
add_executable(${TARGET} llava.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama clip ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE llama clip ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
if(TARGET BUILD_INFO)
|
if(TARGET BUILD_INFO)
|
||||||
add_dependencies(${TARGET} BUILD_INFO)
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
|
|
|
@ -2,5 +2,4 @@ set(TARGET quantize-stats)
|
||||||
add_executable(${TARGET} quantize-stats.cpp)
|
add_executable(${TARGET} quantize-stats.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
|
|
@ -2,7 +2,6 @@ set(TARGET quantize)
|
||||||
add_executable(${TARGET} quantize.cpp)
|
add_executable(${TARGET} quantize.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
if(TARGET BUILD_INFO)
|
if(TARGET BUILD_INFO)
|
||||||
add_dependencies(${TARGET} BUILD_INFO)
|
add_dependencies(${TARGET} BUILD_INFO)
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
set(TARGET server)
|
set(TARGET server)
|
||||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
|
||||||
add_executable(${TARGET} server.cpp json.hpp httplib.h)
|
add_executable(${TARGET} server.cpp json.hpp httplib.h)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_compile_definitions(${TARGET} PRIVATE
|
target_compile_definitions(${TARGET} PRIVATE
|
||||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||||
)
|
)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama clip ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE llama clip ${CMAKE_THREAD_LIBS_INIT})
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||||
endif()
|
endif()
|
||||||
|
|
26
llama.cpp
26
llama.cpp
|
@ -9750,6 +9750,8 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
|
||||||
if (0 <= token && token < llama_n_vocab(model)) {
|
if (0 <= token && token < llama_n_vocab(model)) {
|
||||||
switch (llama_vocab_get_type(model->vocab)) {
|
switch (llama_vocab_get_type(model->vocab)) {
|
||||||
case LLAMA_VOCAB_TYPE_SPM: {
|
case LLAMA_VOCAB_TYPE_SPM: {
|
||||||
|
// NOTE: we accept all unsupported token types,
|
||||||
|
// suppressing them like CONTROL tokens.
|
||||||
if (llama_is_normal_token(model->vocab, token)) {
|
if (llama_is_normal_token(model->vocab, token)) {
|
||||||
std::string result = model->vocab.id_to_token[token].text;
|
std::string result = model->vocab.id_to_token[token].text;
|
||||||
llama_unescape_whitespace(result);
|
llama_unescape_whitespace(result);
|
||||||
|
@ -9758,6 +9760,13 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
|
||||||
}
|
}
|
||||||
memcpy(buf, result.c_str(), result.length());
|
memcpy(buf, result.c_str(), result.length());
|
||||||
return result.length();
|
return result.length();
|
||||||
|
} else if (llama_is_user_defined_token(model->vocab, token)) {
|
||||||
|
std::string result = model->vocab.id_to_token[token].text;
|
||||||
|
if (length < (int) result.length()) {
|
||||||
|
return -result.length();
|
||||||
|
}
|
||||||
|
memcpy(buf, result.c_str(), result.length());
|
||||||
|
return result.length();
|
||||||
} else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
|
} else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
|
||||||
if (length < 3) {
|
if (length < 3) {
|
||||||
return -3;
|
return -3;
|
||||||
|
@ -9772,14 +9781,12 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
|
||||||
}
|
}
|
||||||
buf[0] = llama_token_to_byte(model->vocab, token);
|
buf[0] = llama_token_to_byte(model->vocab, token);
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
|
||||||
// TODO: for now we accept all unsupported token types,
|
|
||||||
// suppressing them like CONTROL tokens.
|
|
||||||
// GGML_ASSERT(false);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LLAMA_VOCAB_TYPE_BPE: {
|
case LLAMA_VOCAB_TYPE_BPE: {
|
||||||
|
// NOTE: we accept all unsupported token types,
|
||||||
|
// suppressing them like CONTROL tokens.
|
||||||
if (llama_is_normal_token(model->vocab, token)) {
|
if (llama_is_normal_token(model->vocab, token)) {
|
||||||
std::string result = model->vocab.id_to_token[token].text;
|
std::string result = model->vocab.id_to_token[token].text;
|
||||||
result = llama_decode_text(result);
|
result = llama_decode_text(result);
|
||||||
|
@ -9788,12 +9795,15 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
|
||||||
}
|
}
|
||||||
memcpy(buf, result.c_str(), result.length());
|
memcpy(buf, result.c_str(), result.length());
|
||||||
return result.length();
|
return result.length();
|
||||||
|
} else if (llama_is_user_defined_token(model->vocab, token)) {
|
||||||
|
std::string result = model->vocab.id_to_token[token].text;
|
||||||
|
if (length < (int) result.length()) {
|
||||||
|
return -result.length();
|
||||||
|
}
|
||||||
|
memcpy(buf, result.c_str(), result.length());
|
||||||
|
return result.length();
|
||||||
} else if (llama_is_control_token(model->vocab, token)) {
|
} else if (llama_is_control_token(model->vocab, token)) {
|
||||||
;
|
;
|
||||||
} else {
|
|
||||||
// TODO: for now we accept all unsupported token types,
|
|
||||||
// suppressing them like CONTROL tokens.
|
|
||||||
// GGML_ASSERT(false);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue