convert-new.py : output gguf (#2635)

* convert-new.py : output gguf (WIP) * convert-new.py : add gguf key-value pairs * llama : add hparams.ctx_train + no longer print ftype * convert-new.py : minor fixes * convert-new.py : vocab-only option should work now * llama : fix tokenizer to use llama_char_to_byte * tests : add new ggml-vocab-llama.gguf * convert-new.py : tensor name mapping * convert-new.py : add map for skipping tensor serialization * convert-new.py : convert script now works * gguf.py : pick some of the refactoring from #2644 * convert-new.py : minor fixes
2023-08-17 17:19:52 +03:00 · 2023-08-17 17:19:52 +03:00 · e0429d38e4
commit e0429d38e4
parent d6fd53afd6
9 changed files with 526 additions and 327 deletions
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -26,10 +26,10 @@ llama_build_and_test_executable(test-quantize-fns.cpp)
 llama_build_and_test_executable(test-quantize-perf.cpp)
 llama_build_and_test_executable(test-sampling.cpp)
 llama_build_executable(test-tokenizer-0.cpp)
-llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin)
+llama_test_executable(test-tokenizer-0.llama test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
 llama_build_executable(test-tokenizer-1.cpp)
-llama_test_executable(test-tokenizer-1.llama test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.bin)
-llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.bin)
+llama_test_executable(test-tokenizer-1.llama  test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
+#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
 llama_build_and_test_executable(test-grammar-parser.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../examples/grammar-parser.cpp)
 llama_build_and_test_executable(test-grad0.cpp) # SLOW
 # llama_build_and_test_executable(test-opt.cpp) # SLOW
--- a/tests/test-tokenizer-0.cpp
+++ b/tests/test-tokenizer-0.cpp
@ -89,6 +89,8 @@ int main(int argc, char **argv) {
        return 2;
    }

+    bool success = true;
+
    for (const auto & test_kv : k_tests()) {
        std::vector<llama_token> res = llama_tokenize(ctx, test_kv.first, true);
        fprintf(stderr, "%s : '%s' tokenized to '%s'\n",
@ -103,7 +105,8 @@ int main(int argc, char **argv) {
        }

        if (!correct) {
-            fprintf(stderr, "%s : failed test: '%s'\n", __func__, test_kv.first.c_str());
+            fprintf(stderr, "%s : failed test:    '%s'\n", __func__, test_kv.first.c_str());
+            fprintf(stderr, "%s : detokenized to: '%s'\n", __func__, unescape_whitespace(ctx, test_kv.second).c_str());
            fprintf(stderr, "%s : expected tokens: ", __func__);
            for (const auto & t : test_kv.second) {
                fprintf(stderr, "%6d, ", t);
@ -115,9 +118,7 @@ int main(int argc, char **argv) {
            }
            fprintf(stderr, "\n");

-            llama_free_model(model);
-            llama_free(ctx);
-            return 3;
+            success = false;
        }
    }

@ -126,5 +127,5 @@ int main(int argc, char **argv) {

    llama_backend_free();

-    return 0;
+    return success ? 0 : 3;
 }