ggml : drop support for QK_K=64 (#7473)

* ggml : drop support for QK_K=64

ggml-ci

* opencl : restore QK_K=256 define
This commit is contained in:
Georgi Gerganov 2024-05-23 10:00:21 +03:00 committed by GitHub
parent 1b1e27cb49
commit e84b71c2c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 26 additions and 4049 deletions

View file

@ -124,7 +124,6 @@ set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)")
option(LLAMA_KOMPUTE "llama: use Kompute" OFF)
option(LLAMA_RPC "llama: use RPC" OFF)
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
option(LLAMA_SYCL "llama: use SYCL" OFF)
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
@ -384,10 +383,6 @@ if (LLAMA_LLAMAFILE)
set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
endif()
if (LLAMA_QKK_64)
add_compile_definitions(GGML_QKK_64)
endif()
if (LLAMA_CUBLAS)
message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead")
set(LLAMA_CUDA ON)