Add tokenizer test + revert to C++11 (#355)

* Add test-tokenizer-0 to do a few tokenizations - feel free to expand * Added option to convert-pth-to-ggml.py script to dump just the vocabulary * Added ./models/ggml-vocab.bin containing just LLaMA vocab data (used for tests) * Added utility to load vocabulary file from previous point (temporary implementation) * Avoid using std::string_view and drop back to C++11 (hope I didn't break something) * Rename gpt_vocab -> llama_vocab * All CMake binaries go into ./bin/ now
2023-03-21 17:29:41 +02:00 · 2023-03-21 17:29:41 +02:00 · eb34620aec
commit eb34620aec
parent 2e664f1ff4
11 changed files with 249 additions and 148 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1,11 +1,37 @@
-cmake_minimum_required(VERSION 3.12)
+cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason
 project("llama.cpp" C CXX)

+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
 if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
    set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
 endif()

+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+    set(LLAMA_STANDALONE ON)
+
+    # configure project version
+    # TODO
+else()
+    set(LLAMA_STANDALONE OFF)
+endif()
+
+if (EMSCRIPTEN)
+    set(BUILD_SHARED_LIBS_DEFAULT OFF)
+
+    option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
+else()
+    if (MINGW)
+        set(BUILD_SHARED_LIBS_DEFAULT OFF)
+    else()
+        set(BUILD_SHARED_LIBS_DEFAULT ON)
+    endif()
+endif()
+
+
 #
 # Option list
 #
@ -34,6 +60,9 @@ option(LLAMA_FMA                    "llama: enable FMA"
 option(LLAMA_ACCELERATE             "llama: enable Accelerate framework"                    ON)
 option(LLAMA_OPENBLAS               "llama: use OpenBLAS"                                   OFF)

+option(LLAMA_BUILD_TESTS            "llama: build tests"    ${LLAMA_STANDALONE})
+option(LLAMA_BUILD_EXAMPLES         "llama: build examples" ${LLAMA_STANDALONE})
+
 #
 # Compile flags
 #
@ -187,17 +216,19 @@ add_executable(llama main.cpp)

 add_executable(quantize quantize.cpp)

-add_library(ggml OBJECT
-            ggml.c
-            ggml.h)
-
 add_library(utils OBJECT
            utils.cpp
            utils.h)

+target_include_directories(utils PUBLIC .)
+target_compile_features(utils PUBLIC cxx_std_11) # don't bump
+
+add_library(ggml OBJECT
+            ggml.c
+            ggml.h)
+
 target_include_directories(ggml PUBLIC .)
-target_compile_features(ggml PUBLIC c_std_11)
-target_compile_features(utils PUBLIC cxx_std_17)
+target_compile_features(ggml PUBLIC c_std_11) # don't bump

 #
 # Linking
@ -206,3 +237,16 @@ target_compile_features(utils PUBLIC cxx_std_17)
 target_link_libraries(ggml PRIVATE Threads::Threads ${LLAMA_EXTRA_LIBS})
 target_link_libraries(llama PRIVATE ggml utils)
 target_link_libraries(quantize PRIVATE ggml utils)
+
+#
+# programs, examples and tests
+#
+
+if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
+    enable_testing()
+    add_subdirectory(tests)
+endif ()
+
+#if (LLAMA_BUILD_EXAMPLES)
+#    add_subdirectory(examples)
+#endif()