diff --git a/CMakeLists.txt b/CMakeLists.txt
index ce3a75d09..af2dc8771 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -486,7 +486,7 @@ if (LLAMA_SYCL)
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
 
-        set(GGML_HEADERS_SYCL ggml-cuda.h ggml.h ggml-sycl.hpp)
+        set(GGML_HEADERS_SYCL ggml.h ggml-sycl.h)
         set(GGML_SOURCES_SYCL ggml-sycl.cpp)
 
         set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} sycl OpenCL mkl_core pthread m dl  mkl_sycl_blas mkl_sycl_lapack mkl_sycl_dft mkl_sycl_sparse mkl_sycl_vm mkl_sycl_rng mkl_sycl_stats mkl_sycl_data_fitting mkl_intel_ilp64 mkl_tbb_thread)
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index bf36355d4..890620c92 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -16,7 +16,7 @@
 #include <dpct/dpct.hpp>
 #include <dpct/blas_utils.hpp>
 #include <dpct/lib_common_utils.hpp>
-#include "ggml-sycl.hpp"
+#include "ggml-sycl.h"
 #include "ggml.h"
 #include "ggml-backend-impl.h"
 
diff --git a/ggml-sycl.hpp b/ggml-sycl.h
similarity index 87%
rename from ggml-sycl.hpp
rename to ggml-sycl.h
index 6fc54bc8a..9530c54c2 100644
--- a/ggml-sycl.hpp
+++ b/ggml-sycl.h
@@ -10,10 +10,8 @@ extern "C" {
 #define GGML_SYCL_MAX_DEVICES       16
 #define GGML_SYCL_NAME "SYCL"
 
-// Always success. To check if SYCL is actually loaded, use `ggml_sycl_loaded`.
 GGML_API void   ggml_init_sycl(void);
 
-// Returns `true` if there are available SYCL devices and cublas loads successfully; otherwise, it returns `false`.
 GGML_API bool   ggml_sycl_loaded(void);
 
 GGML_API void * ggml_sycl_host_malloc(size_t size);
@@ -41,7 +39,6 @@ GGML_API bool   ggml_sycl_compute_forward(struct ggml_compute_params * params, s
 GGML_API int    ggml_sycl_get_device_count(void);
 GGML_API void   ggml_sycl_get_device_description(int device, char * description, size_t description_size);
 
-// backend API
 GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
 
 GGML_API bool ggml_backend_is_sycl(ggml_backend_t backend);
@@ -49,7 +46,6 @@ GGML_API int  ggml_backend_sycl_get_device(ggml_backend_t backend);
 
 GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
 
-// pinned host buffer for use with CPU backend for faster copies between CPU and GPU
 GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
 
 int get_main_device();
diff --git a/ggml.c b/ggml.c
index c4bec0287..d59af30b3 100644
--- a/ggml.c
+++ b/ggml.c
@@ -249,7 +249,7 @@ inline static void * ggml_aligned_malloc(size_t size) {
 #elif defined(GGML_USE_CLBLAST)
 #include "ggml-opencl.h"
 #elif defined(GGML_USE_SYCL)
-#include "ggml-sycl.hpp"
+#include "ggml-sycl.h"
 #endif
 
 // floating point type used to accumulate sums
diff --git a/llama.cpp b/llama.cpp
index e1e7a56a2..61bafc9d2 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12,7 +12,7 @@
 #elif defined(GGML_USE_CLBLAST)
 #  include "ggml-opencl.h"
 #elif defined(GGML_USE_SYCL)
-#  include "ggml-sycl.hpp"
+#  include "ggml-sycl.h"
 #endif
 
 #ifdef GGML_USE_METAL
diff --git a/llama.h b/llama.h
index b083111fa..07253e115 100644
--- a/llama.h
+++ b/llama.h
@@ -7,7 +7,7 @@
 #include "ggml-cuda.h"
 #define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
 #elif defined(GGML_USE_SYCL)
-#include "ggml-sycl.hpp"
+#include "ggml-sycl.h"
 #define LLAMA_MAX_DEVICES GGML_SYCL_MAX_DEVICES
 #else
 #define LLAMA_MAX_DEVICES 1