diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index fb7982655..c91af4cbd 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -514,7 +514,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: // mmap consistently increases speed Linux, and also increases speed on Windows with // hot cache. It may cause a slowdown on macOS, possibly related to free memory. -#if defined(__linux__) || defined(_WIN32) +#if (defined(__linux__) && !defined(__s390x__)) || defined(_WIN32) constexpr bool use_mmap = true; #else constexpr bool use_mmap = false;