From 0f1b21cb90ac6b84a9af70cafb8e13b5389e3b32 Mon Sep 17 00:00:00 2001 From: Bernat Vadell Date: Mon, 20 Mar 2023 18:05:20 +0100 Subject: [PATCH 1/5] Docker - Fix publish docker image in GitHub Registry (#235) * fix publish permission * try to fix docker pipeline using as password github_token & username repository_owner --- .github/workflows/docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index bc9aff7b7..d1a43caa6 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -40,7 +40,7 @@ jobs: uses: docker/login-action@v2 with: registry: ghcr.io - username: ${{ github.actor }} + username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - name: Build and push Docker image (versioned) From a791a68b613b162c88a83f5f0225223bc167c762 Mon Sep 17 00:00:00 2001 From: Mack Straight Date: Mon, 20 Mar 2023 12:26:01 -0700 Subject: [PATCH 2/5] move file magic/version to header, print expected version (#319) --- main.cpp | 10 +++++----- quantize.cpp | 10 +++++----- utils.h | 8 ++++++++ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/main.cpp b/main.cpp index 159033373..3321818d3 100644 --- a/main.cpp +++ b/main.cpp @@ -106,12 +106,12 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); - if (magic == 0x67676d6c) { + if (magic == FILE_MAGIC_UNVERSIONED) { fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", __func__, fname.c_str()); return false; } - if (magic != 0x67676d66) { + if (magic != FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } @@ -119,9 +119,9 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab uint32_t format_version; fin.read((char *) &format_version, sizeof(format_version)); - if (format_version != 1) { - fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ")\n", - __func__, fname.c_str(), format_version); + if (format_version != FILE_VERSION) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", + __func__, fname.c_str(), format_version, FILE_VERSION); return false; } } diff --git a/quantize.cpp b/quantize.cpp index 166e9163a..07db33a3c 100644 --- a/quantize.cpp +++ b/quantize.cpp @@ -64,12 +64,12 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna { uint32_t magic; finp.read((char *) &magic, sizeof(magic)); - if (magic == 0x67676d6c) { + if (magic == FILE_MAGIC_UNVERSIONED) { fprintf(stderr, "%s: invalid model file '%s' (too old, regenerate your model files!)\n", __func__, fname_inp.c_str()); return false; } - if (magic != 0x67676d66) { + if (magic != FILE_MAGIC) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname_inp.c_str()); return false; } @@ -79,9 +79,9 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna uint32_t format_version; finp.read((char *) &format_version, sizeof(format_version)); - if (format_version != 1) { - fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ")\n", - __func__, fname_inp.c_str(), format_version); + if (format_version != FILE_VERSION) { + fprintf(stderr, "%s: invalid model file '%s' (unsupported format version %" PRIu32 ", expected %d)\n", + __func__, fname_inp.c_str(), format_version, FILE_VERSION); return false; } diff --git a/utils.h b/utils.h index b3a0f4724..65fe02ba1 100644 --- a/utils.h +++ b/utils.h @@ -48,6 +48,14 @@ void gpt_print_usage(int argc, char ** argv, const gpt_params & params); std::string gpt_random_prompt(std::mt19937 & rng); +// +// Model file parsing +// + +#define FILE_MAGIC_UNVERSIONED 0x67676d6c // pre-versioned files +#define FILE_MAGIC 0x67676d66 // 'ggmf' in hex +#define FILE_VERSION 1 + // // Vocab utils // From 6b6d5b5024faaf82019d08cde5e8a9d69c6ca316 Mon Sep 17 00:00:00 2001 From: Qingyou Meng Date: Tue, 21 Mar 2023 03:33:10 +0800 Subject: [PATCH 3/5] Fixed tokenizer.model not found error when model dir is symlink (#325) --- convert-pth-to-ggml.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py index 42f537769..108eb1fcc 100644 --- a/convert-pth-to-ggml.py +++ b/convert-pth-to-ggml.py @@ -17,6 +17,7 @@ # and vocabulary. # import argparse +import os import sys import json import struct @@ -44,8 +45,14 @@ def get_n_parts(dim): def load_hparams_and_tokenizer(dir_model): + # `dir_model` is something like `models/7B` or `models/7B/`. + # "tokenizer.model" is expected under model's parent dir. + # When `dir_model` is a symlink, f"{dir_model}/../tokenizer.model" would not be found. + # Let's use the model's parent dir directly. + model_parent_dir = os.path.dirname(os.path.normpath(dir_model)) + fname_hparams = f"{dir_model}/params.json" - fname_tokenizer = f"{dir_model}/../tokenizer.model" + fname_tokenizer = f"{model_parent_dir}/tokenizer.model" with open(fname_hparams, "r") as f: hparams = json.load(f) From bd4b46d6ba504b99c936f43fc014529adffb6048 Mon Sep 17 00:00:00 2001 From: Ben Siraphob Date: Mon, 20 Mar 2023 16:44:30 -0500 Subject: [PATCH 4/5] Nix flake: set meta.mainProgram to llama --- flake.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/flake.nix b/flake.nix index dae4ff60f..da4bd7ba3 100644 --- a/flake.nix +++ b/flake.nix @@ -34,6 +34,7 @@ cat ${./convert-pth-to-ggml.py} >> $out/bin/convert-pth-to-ggml chmod +x $out/bin/convert-pth-to-ggml ''; + meta.mainProgram = "llama"; }; devShells.default = pkgs.mkShell { packages = with pkgs; [ From 8cf9f34eddc124d4ab28f4d2fe8e99d574510bde Mon Sep 17 00:00:00 2001 From: nusu-github <29514220+nusu-github@users.noreply.github.com> Date: Tue, 21 Mar 2023 09:37:16 +0900 Subject: [PATCH 5/5] Adding missing features of CMakeLists.txt & Refactoring (#131) * Functionality addition CMakeLists.txt Refactoring: 1. Simplify more options that are negation of negation. LLAMA_NO_ACCELERATE -> LLAMA_ACCELERATE 2. Changed to an optional expression instead of forcing to enable AVX2 in MSVC. 3. Make CMAKE_CXX_STANDARD, which is different from Makefile, the same. 4. Use add_compile_options instead of adding options to CMAKE_C_FLAGS. 5. Make utils use target_link_libraries instead of directly referencing code. Added features: 1. Added some options. LLAMA_STATIC_LINK,LLAMA_NATIVE,LLAMA_LTO,LLAMA_GPROF,LLAMA_OPENBLAS * Fix Accelerate link in CMake * Windows build Fix * C++11 to C++17 * Reflects C/C++ standard individually * Change the version to 3.12 --------- Co-authored-by: Georgi Gerganov --- CMakeLists.txt | 247 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 162 insertions(+), 85 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 38e7266dc..7f46513c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,131 +1,208 @@ -cmake_minimum_required(VERSION 3.8) -project("llama.cpp") - -set(CMAKE_CXX_STANDARD 20) -set(CMAKE_CXX_STANDARD_REQUIRED true) -set(CMAKE_C_STANDARD 11) -set(THREADS_PREFER_PTHREAD_FLAG ON) -find_package(Threads REQUIRED) +cmake_minimum_required(VERSION 3.12) +project("llama.cpp" C CXX) if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() -option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) -option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +# +# Option list +# -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) +# general +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) -if (APPLE) - option(LLAMA_NO_ACCELERATE "llama: disable Accelerate framework" OFF) - option(LLAMA_NO_AVX "llama: disable AVX" OFF) - option(LLAMA_NO_AVX2 "llama: disable AVX2" OFF) - option(LLAMA_NO_FMA "llama: disable FMA" OFF) -endif() +# debug +option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) +option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) + +# sanitizers +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) + +# instruction set specific +option(LLAMA_AVX "llama: enable AVX" ON) +option(LLAMA_AVX2 "llama: enable AVX2" ON) +option(LLAMA_FMA "llama: enable FMA" ON) + +# 3rd party libs +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) + +# +# Compile flags +# + +set(CMAKE_CXX_STANDARD_REQUIRED true) +set(CMAKE_C_STANDARD_REQUIRED true) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) if (NOT MSVC) if (LLAMA_SANITIZE_THREAD) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread") + add_compile_options(-fsanitize=thread) endif() if (LLAMA_SANITIZE_ADDRESS) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) endif() if (LLAMA_SANITIZE_UNDEFINED) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined") + add_compile_options(-fsanitize=undefined) endif() endif() -if (APPLE AND NOT LLAMA_NO_ACCELERATE) +if (APPLE AND LLAMA_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) message(STATUS "Accelerate framework found") - set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) - set(LLAMA_EXTRA_FLAGS ${LLAMA_EXTRA_FLAGS} -DGGML_USE_ACCELERATE) + add_compile_definitions(GGML_USE_ACCELERATE) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) else() message(WARNING "Accelerate framework not found") endif() endif() +if (LLAMA_OPENBLAS) + if (LLAMA_STATIC) + set(BLA_STATIC ON) + endif() + + set(BLA_VENDOR OpenBLAS) + find_package(BLAS) + if (BLAS_FOUND) + message(STATUS "OpenBLAS found") + + add_compile_definitions(GGML_USE_OPENBLAS) + add_link_options(${BLAS_LIBRARIES}) + else() + message(WARNING "OpenBLAS not found") + endif() +endif() if (LLAMA_ALL_WARNINGS) if (NOT MSVC) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \ - -Wall \ - -Wextra \ - -Wpedantic \ - -Wshadow \ - -Wcast-qual \ - -Wstrict-prototypes \ - -Wpointer-arith \ - -Wno-unused-function \ - ") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ - -Wall \ - -Wextra \ - -Wpedantic \ - -Wcast-qual \ - ") + set(c_flags + -Wall + -Wextra + -Wpedantic + -Wshadow + -Wcast-qual + -Wstrict-prototypes + -Wpointer-arith + -Wno-unused-function + ) + set(cxx_flags + -Wall + -Wextra + -Wpedantic + -Wcast-qual + ) else() # todo : msvc endif() + + add_compile_options( + "$<$:${c_flags}>" + "$<$:${cxx_flags}>" + ) + endif() -message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") - -if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") - message(STATUS "ARM detected") -else() - message(STATUS "x86 detected") - if (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /arch:AVX2") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /arch:AVX2") +if (LLAMA_LTO) + include(CheckIPOSupported) + check_ipo_supported(RESULT result OUTPUT output) + if (result) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) else() - if(NOT LLAMA_NO_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") - endif() - if(NOT LLAMA_NO_AVX2) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") - endif() - if(NOT LLAMA_NO_FMA) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma") - endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mf16c") + message(WARNING "IPO is not supported: ${output}") endif() endif() -# if (LLAMA_PERF) -# set(LLAMA_EXTRA_FLAGS ${LLAMA_EXTRA_FLAGS} -DGGML_PERF) -# endif() +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +if (NOT MSVC) + if (LLAMA_STATIC) + add_link_options(-static) + if (MINGW) + add_link_options(-static-libgcc -static-libstdc++) + endif() + endif() + if (LLAMA_GPROF) + add_compile_options(-pg) + endif() + if (LLAMA_NATIVE) + add_compile_options(-march=native) + endif() +endif() -add_executable(llama - main.cpp - utils.cpp - utils.h) +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + message(STATUS "ARM detected") + if (MSVC) + # TODO: arm msvc? + else() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + add_compile_options(-mcpu=native) + endif() + # TODO: armv6,7,8 version specific flags + endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") + message(STATUS "x86 detected") + if (MSVC) + if (LLAMA_AVX2) + add_compile_options(/arch:AVX2) + elseif (LLAMA_AVX) + add_compile_options(/arch:AVX) + endif() + else() + add_compile_options(-mf16c) + if (LLAMA_FMA) + add_compile_options(-mfma) + endif() + if (LLAMA_AVX) + add_compile_options(-mavx) + endif() + if (LLAMA_AVX2) + add_compile_options(-mavx2) + endif() + endif() +else() + # TODO: support PowerPC + message(STATUS "Unknown architecture") +endif() -add_executable(quantize - quantize.cpp - utils.cpp - utils.h) -add_library(ggml - ggml.c - ggml.h) +# +# Build library +# -target_compile_definitions(ggml PUBLIC ${LLAMA_EXTRA_FLAGS}) -target_compile_definitions(llama PUBLIC ${LLAMA_EXTRA_FLAGS}) -target_compile_definitions(quantize PUBLIC ${LLAMA_EXTRA_FLAGS}) +add_executable(llama main.cpp) + +add_executable(quantize quantize.cpp) + +add_library(ggml OBJECT + ggml.c + ggml.h) + +add_library(utils OBJECT + utils.cpp + utils.h) -target_link_libraries(ggml PRIVATE ${LLAMA_EXTRA_LIBS}) target_include_directories(ggml PUBLIC .) -target_link_libraries(quantize PRIVATE ggml) -target_link_libraries(llama PRIVATE ggml) -target_link_libraries(ggml PRIVATE Threads::Threads) +target_compile_features(ggml PUBLIC c_std_11) +target_compile_features(utils PUBLIC cxx_std_17) + +# +# Linking +# + +target_link_libraries(ggml PRIVATE Threads::Threads ${LLAMA_EXTRA_LIBS}) +target_link_libraries(llama PRIVATE ggml utils) +target_link_libraries(quantize PRIVATE ggml utils)