diff --git a/CMakeLists.txt b/CMakeLists.txt index ce3a75d09..af2dc8771 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -486,7 +486,7 @@ if (LLAMA_SYCL) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") - set(GGML_HEADERS_SYCL ggml-cuda.h ggml.h ggml-sycl.hpp) + set(GGML_HEADERS_SYCL ggml.h ggml-sycl.h) set(GGML_SOURCES_SYCL ggml-sycl.cpp) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_sycl_lapack mkl_sycl_dft mkl_sycl_sparse mkl_sycl_vm mkl_sycl_rng mkl_sycl_stats mkl_sycl_data_fitting mkl_intel_ilp64 mkl_tbb_thread) diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index bf36355d4..890620c92 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -16,7 +16,7 @@ #include #include #include -#include "ggml-sycl.hpp" +#include "ggml-sycl.h" #include "ggml.h" #include "ggml-backend-impl.h" diff --git a/ggml-sycl.hpp b/ggml-sycl.h similarity index 87% rename from ggml-sycl.hpp rename to ggml-sycl.h index 6fc54bc8a..9530c54c2 100644 --- a/ggml-sycl.hpp +++ b/ggml-sycl.h @@ -10,10 +10,8 @@ extern "C" { #define GGML_SYCL_MAX_DEVICES 16 #define GGML_SYCL_NAME "SYCL" -// Always success. To check if SYCL is actually loaded, use `ggml_sycl_loaded`. GGML_API void ggml_init_sycl(void); -// Returns `true` if there are available SYCL devices and cublas loads successfully; otherwise, it returns `false`. GGML_API bool ggml_sycl_loaded(void); GGML_API void * ggml_sycl_host_malloc(size_t size); @@ -41,7 +39,6 @@ GGML_API bool ggml_sycl_compute_forward(struct ggml_compute_params * params, s GGML_API int ggml_sycl_get_device_count(void); GGML_API void ggml_sycl_get_device_description(int device, char * description, size_t description_size); -// backend API GGML_API ggml_backend_t ggml_backend_sycl_init(int device); GGML_API bool ggml_backend_is_sycl(ggml_backend_t backend); @@ -49,7 +46,6 @@ GGML_API int ggml_backend_sycl_get_device(ggml_backend_t backend); GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device); -// pinned host buffer for use with CPU backend for faster copies between CPU and GPU GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void); int get_main_device(); diff --git a/ggml.c b/ggml.c index c4bec0287..d59af30b3 100644 --- a/ggml.c +++ b/ggml.c @@ -249,7 +249,7 @@ inline static void * ggml_aligned_malloc(size_t size) { #elif defined(GGML_USE_CLBLAST) #include "ggml-opencl.h" #elif defined(GGML_USE_SYCL) -#include "ggml-sycl.hpp" +#include "ggml-sycl.h" #endif // floating point type used to accumulate sums diff --git a/llama.cpp b/llama.cpp index e1e7a56a2..61bafc9d2 100644 --- a/llama.cpp +++ b/llama.cpp @@ -12,7 +12,7 @@ #elif defined(GGML_USE_CLBLAST) # include "ggml-opencl.h" #elif defined(GGML_USE_SYCL) -# include "ggml-sycl.hpp" +# include "ggml-sycl.h" #endif #ifdef GGML_USE_METAL diff --git a/llama.h b/llama.h index b083111fa..07253e115 100644 --- a/llama.h +++ b/llama.h @@ -7,7 +7,7 @@ #include "ggml-cuda.h" #define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES #elif defined(GGML_USE_SYCL) -#include "ggml-sycl.hpp" +#include "ggml-sycl.h" #define LLAMA_MAX_DEVICES GGML_SYCL_MAX_DEVICES #else #define LLAMA_MAX_DEVICES 1