fix debug link error. fix windows crash

This commit is contained in:
luoyu-intel 2024-06-19 07:43:51 +00:00
parent 079dd3f592
commit 4488134edf
5 changed files with 817 additions and 820 deletions

View file

@ -665,6 +665,7 @@ if (LLAMA_SYCL)
#todo: AOT #todo: AOT
find_package(IntelSYCL REQUIRED) find_package(IntelSYCL REQUIRED)
find_package(MKL REQUIRED)
message(STATUS "SYCL found") message(STATUS "SYCL found")
@ -679,11 +680,9 @@ if (LLAMA_SYCL)
endif() endif()
add_compile_options(-I./) #include DPCT add_compile_options(-I./) #include DPCT
add_compile_options(-I/${SYCL_INCLUDE_DIR})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
endif() endif()
@ -693,8 +692,10 @@ if (LLAMA_SYCL)
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp") list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
if (WIN32) if (WIN32)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
else() else()
add_compile_options(-I/${SYCL_INCLUDE_DIR})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "INTEL") if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")

View file

@ -19,7 +19,6 @@
"cacheVariables": { "cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON", "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"CMAKE_CXX_COMPILER": "icx", "CMAKE_CXX_COMPILER": "icx",
"CMAKE_C_COMPILER": "icx",
"LLAMA_SYCL": "ON", "LLAMA_SYCL": "ON",
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
} }

View file

@ -4911,7 +4911,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX); GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
GGML_TENSOR_BINARY_OP_LOCALS; GGML_TENSOR_BINARY_OP_LOCALS01;
SYCL_CHECK(ggml_sycl_set_device(ctx.device)); SYCL_CHECK(ggml_sycl_set_device(ctx.device));
queue_ptr main_stream = ctx.stream(); queue_ptr main_stream = ctx.stream();

View file

@ -220,7 +220,8 @@ namespace dpct
// a. and b. // a. and b.
i++; i++;
minor = std::stoi(&(ver[i])); minor = std::stoi(&(ver[i]));
} else { }
else {
// c. // c.
minor = 0; minor = 0;
} }
@ -594,13 +595,13 @@ namespace dpct
typedef std::mutex mutex_type; typedef std::mutex mutex_type;
public: public:
device_ext() : sycl::device(), _ctx(*this) {} device_ext() : sycl::device() {}
~device_ext() ~device_ext()
{ {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
clear_queues(); clear_queues();
} }
device_ext(const sycl::device &base) : sycl::device(base), _ctx(*this) device_ext(const sycl::device& base) : sycl::device(base)
{ {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
init_queues(); init_queues();
@ -709,9 +710,9 @@ namespace dpct
init_queues(); init_queues();
} }
sycl::queue &in_order_queue() { return *_q_in_order; } sycl::queue& in_order_queue() { return _q_in_order; }
sycl::queue &out_of_order_queue() { return *_q_out_of_order; } sycl::queue& out_of_order_queue() { return _q_out_of_order; }
sycl::queue& default_queue() sycl::queue& default_queue()
{ {
@ -721,85 +722,75 @@ namespace dpct
void queues_wait_and_throw() void queues_wait_and_throw()
{ {
std::unique_lock<mutex_type> lock(m_mutex); std::unique_lock<mutex_type> lock(m_mutex);
std::vector<std::shared_ptr<sycl::queue>> current_queues(
_queues);
lock.unlock(); lock.unlock();
for (const auto &q : current_queues) for (auto& q : _queues)
{ {
q->wait_and_throw(); q.wait_and_throw();
} }
// Guard the destruct of current_queues to make sure the ref count is safe. // Guard the destruct of current_queues to make sure the ref count is safe.
lock.lock(); lock.lock();
} }
sycl::queue *create_queue(bool enable_exception_handler = false) sycl::queue create_queue(bool enable_exception_handler = false)
{ {
return create_in_order_queue(enable_exception_handler); return create_in_order_queue(enable_exception_handler);
} }
sycl::queue *create_queue(sycl::context context, sycl::device device, sycl::queue create_queue(sycl::device device,
bool enable_exception_handler = false) { bool enable_exception_handler = false) {
return create_in_order_queue(context, device, enable_exception_handler); return create_in_order_queue(device, enable_exception_handler);
} }
sycl::queue *create_in_order_queue(bool enable_exception_handler = false) { sycl::queue create_in_order_queue(bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(enable_exception_handler, return create_queue_impl(enable_exception_handler,
sycl::property::queue::in_order()); sycl::property::queue::in_order());
} }
sycl::queue *create_in_order_queue(sycl::context context, sycl::device device, sycl::queue create_in_order_queue(sycl::device device,
bool enable_exception_handler = false) { bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(context, device, enable_exception_handler, return create_queue_impl(device, enable_exception_handler,
sycl::property::queue::in_order()); sycl::property::queue::in_order());
} }
sycl::queue *create_out_of_order_queue(bool enable_exception_handler = false) { sycl::queue create_out_of_order_queue(bool enable_exception_handler = false) {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
return create_queue_impl(enable_exception_handler); return create_queue_impl(enable_exception_handler);
} }
void destroy_queue(sycl::queue *&queue) void destroy_queue(sycl::queue queue)
{ {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
_queues.erase(std::remove_if(_queues.begin(), _queues.end(), _queues.clear();
[=](const std::shared_ptr<sycl::queue> &q) -> bool
{
return q.get() == queue;
}),
_queues.end());
queue = nullptr;
} }
void set_saved_queue(sycl::queue *q) void set_saved_queue(sycl::queue q)
{ {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
_saved_queue = q; _saved_queue = q;
} }
sycl::queue *get_saved_queue() const sycl::queue get_saved_queue() const
{ {
std::lock_guard<mutex_type> lock(m_mutex); std::lock_guard<mutex_type> lock(m_mutex);
return _saved_queue; return _saved_queue;
} }
sycl::context get_context() const { return _ctx; }
private: private:
void clear_queues() void clear_queues()
{ {
_queues.clear(); _queues.clear();
_q_in_order = _q_out_of_order = _saved_queue = nullptr;
} }
void init_queues() void init_queues()
{ {
_q_in_order = create_queue_impl(true, sycl::property::queue::in_order()); _q_in_order = create_queue_impl(true, sycl::property::queue::in_order());
_q_out_of_order = create_queue_impl(true); _q_out_of_order = create_queue_impl(true);
_saved_queue = &default_queue(); _saved_queue = default_queue();
} }
/// Caller should acquire resource \p m_mutex before calling this function. /// Caller should acquire resource \p m_mutex before calling this function.
template <class... Properties> template <class... Properties>
sycl::queue *create_queue_impl(bool enable_exception_handler, sycl::queue create_queue_impl(bool enable_exception_handler,
Properties... properties) Properties... properties)
{ {
sycl::async_handler eh = {}; sycl::async_handler eh = {};
@ -807,44 +798,44 @@ namespace dpct
{ {
eh = exception_handler; eh = exception_handler;
} }
_queues.push_back(std::make_shared<sycl::queue>( auto q = sycl::queue(
_ctx, *this, eh, *this, eh,
sycl::property_list( sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED #ifdef DPCT_PROFILING_ENABLED
sycl::property::queue::enable_profiling(), sycl::property::queue::enable_profiling(),
#endif #endif
properties...))); properties...));
_queues.push_back(q);
return _queues.back().get(); return _queues.back();
} }
template <class... Properties> template <class... Properties>
sycl::queue *create_queue_impl(sycl::context context, sycl::device device, sycl::queue create_queue_impl(sycl::device device,
bool enable_exception_handler, bool enable_exception_handler,
Properties... properties) { Properties... properties) {
sycl::async_handler eh = {}; sycl::async_handler eh = {};
if (enable_exception_handler) { if (enable_exception_handler) {
eh = exception_handler; eh = exception_handler;
} }
_queues.push_back(std::make_shared<sycl::queue>( _queues.push_back(sycl::queue(
context, device, eh, device, eh,
sycl::property_list( sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED #ifdef DPCT_PROFILING_ENABLED
sycl::property::queue::enable_profiling(), sycl::property::queue::enable_profiling(),
#endif #endif
properties...))); properties...)));
return _queues.back().get(); return _queues.back();
} }
void get_version(int& major, int& minor) const void get_version(int& major, int& minor) const
{ {
detail::get_version(*this, major, minor); detail::get_version(*this, major, minor);
} }
sycl::queue *_q_in_order, *_q_out_of_order; sycl::queue _q_in_order, _q_out_of_order;
sycl::queue *_saved_queue; sycl::queue _saved_queue;
sycl::context _ctx; std::vector<sycl::queue> _queues;
std::vector<std::shared_ptr<sycl::queue>> _queues;
mutable mutex_type m_mutex; mutable mutex_type m_mutex;
}; };

6
ggml.h
View file

@ -312,6 +312,12 @@
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
GGML_TENSOR_LOCALS(size_t, nb, dst, nb) GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif