fix debug link error. fix windows crash
This commit is contained in:
parent
079dd3f592
commit
4488134edf
5 changed files with 817 additions and 820 deletions
|
@ -665,6 +665,7 @@ if (LLAMA_SYCL)
|
||||||
#todo: AOT
|
#todo: AOT
|
||||||
|
|
||||||
find_package(IntelSYCL REQUIRED)
|
find_package(IntelSYCL REQUIRED)
|
||||||
|
find_package(MKL REQUIRED)
|
||||||
|
|
||||||
message(STATUS "SYCL found")
|
message(STATUS "SYCL found")
|
||||||
|
|
||||||
|
@ -679,11 +680,9 @@ if (LLAMA_SYCL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_compile_options(-I./) #include DPCT
|
add_compile_options(-I./) #include DPCT
|
||||||
add_compile_options(-I/${SYCL_INCLUDE_DIR})
|
|
||||||
|
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
|
|
||||||
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
|
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
|
||||||
endif()
|
endif()
|
||||||
|
@ -693,8 +692,10 @@ if (LLAMA_SYCL)
|
||||||
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
|
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
|
||||||
else()
|
else()
|
||||||
|
add_compile_options(-I/${SYCL_INCLUDE_DIR})
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
|
||||||
if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
|
if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
|
||||||
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
|
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
|
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
|
||||||
"CMAKE_CXX_COMPILER": "icx",
|
"CMAKE_CXX_COMPILER": "icx",
|
||||||
"CMAKE_C_COMPILER": "icx",
|
|
||||||
"LLAMA_SYCL": "ON",
|
"LLAMA_SYCL": "ON",
|
||||||
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||||
}
|
}
|
||||||
|
|
|
@ -4911,7 +4911,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
|
||||||
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
|
||||||
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
|
||||||
|
|
||||||
GGML_TENSOR_BINARY_OP_LOCALS;
|
GGML_TENSOR_BINARY_OP_LOCALS01;
|
||||||
|
|
||||||
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
||||||
queue_ptr main_stream = ctx.stream();
|
queue_ptr main_stream = ctx.stream();
|
||||||
|
|
|
@ -220,7 +220,8 @@ namespace dpct
|
||||||
// a. and b.
|
// a. and b.
|
||||||
i++;
|
i++;
|
||||||
minor = std::stoi(&(ver[i]));
|
minor = std::stoi(&(ver[i]));
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
// c.
|
// c.
|
||||||
minor = 0;
|
minor = 0;
|
||||||
}
|
}
|
||||||
|
@ -594,13 +595,13 @@ namespace dpct
|
||||||
typedef std::mutex mutex_type;
|
typedef std::mutex mutex_type;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
device_ext() : sycl::device(), _ctx(*this) {}
|
device_ext() : sycl::device() {}
|
||||||
~device_ext()
|
~device_ext()
|
||||||
{
|
{
|
||||||
std::lock_guard<mutex_type> lock(m_mutex);
|
std::lock_guard<mutex_type> lock(m_mutex);
|
||||||
clear_queues();
|
clear_queues();
|
||||||
}
|
}
|
||||||
device_ext(const sycl::device &base) : sycl::device(base), _ctx(*this)
|
device_ext(const sycl::device& base) : sycl::device(base)
|
||||||
{
|
{
|
||||||
std::lock_guard<mutex_type> lock(m_mutex);
|
std::lock_guard<mutex_type> lock(m_mutex);
|
||||||
init_queues();
|
init_queues();
|
||||||
|
@ -709,9 +710,9 @@ namespace dpct
|
||||||
init_queues();
|
init_queues();
|
||||||
}
|
}
|
||||||
|
|
||||||
sycl::queue &in_order_queue() { return *_q_in_order; }
|
sycl::queue& in_order_queue() { return _q_in_order; }
|
||||||
|
|
||||||
sycl::queue &out_of_order_queue() { return *_q_out_of_order; }
|
sycl::queue& out_of_order_queue() { return _q_out_of_order; }
|
||||||
|
|
||||||
sycl::queue& default_queue()
|
sycl::queue& default_queue()
|
||||||
{
|
{
|
||||||
|
@ -721,85 +722,75 @@ namespace dpct
|
||||||
void queues_wait_and_throw()
|
void queues_wait_and_throw()
|
||||||
{
|
{
|
||||||
std::unique_lock<mutex_type> lock(m_mutex);
|
std::unique_lock<mutex_type> lock(m_mutex);
|
||||||
std::vector<std::shared_ptr<sycl::queue>> current_queues(
|
|
||||||
_queues);
|
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
for (const auto &q : current_queues)
|
for (auto& q : _queues)
|
||||||
{
|
{
|
||||||
q->wait_and_throw();
|
q.wait_and_throw();
|
||||||
}
|
}
|
||||||
// Guard the destruct of current_queues to make sure the ref count is safe.
|
// Guard the destruct of current_queues to make sure the ref count is safe.
|
||||||
lock.lock();
|
lock.lock();
|
||||||
}
|
}
|
||||||
|
|
||||||
sycl::queue *create_queue(bool enable_exception_handler = false)
|
sycl::queue create_queue(bool enable_exception_handler = false)
|
||||||
{
|
{
|
||||||
return create_in_order_queue(enable_exception_handler);
|
return create_in_order_queue(enable_exception_handler);
|
||||||
}
|
}
|
||||||
|
|
||||||
sycl::queue *create_queue(sycl::context context, sycl::device device,
|
sycl::queue create_queue(sycl::device device,
|
||||||
bool enable_exception_handler = false) {
|
bool enable_exception_handler = false) {
|
||||||
return create_in_order_queue(context, device, enable_exception_handler);
|
return create_in_order_queue(device, enable_exception_handler);
|
||||||
}
|
}
|
||||||
|
|
||||||
sycl::queue *create_in_order_queue(bool enable_exception_handler = false) {
|
sycl::queue create_in_order_queue(bool enable_exception_handler = false) {
|
||||||
std::lock_guard<mutex_type> lock(m_mutex);
|
std::lock_guard<mutex_type> lock(m_mutex);
|
||||||
return create_queue_impl(enable_exception_handler,
|
return create_queue_impl(enable_exception_handler,
|
||||||
sycl::property::queue::in_order());
|
sycl::property::queue::in_order());
|
||||||
}
|
}
|
||||||
|
|
||||||
sycl::queue *create_in_order_queue(sycl::context context, sycl::device device,
|
sycl::queue create_in_order_queue(sycl::device device,
|
||||||
bool enable_exception_handler = false) {
|
bool enable_exception_handler = false) {
|
||||||
std::lock_guard<mutex_type> lock(m_mutex);
|
std::lock_guard<mutex_type> lock(m_mutex);
|
||||||
return create_queue_impl(context, device, enable_exception_handler,
|
return create_queue_impl(device, enable_exception_handler,
|
||||||
sycl::property::queue::in_order());
|
sycl::property::queue::in_order());
|
||||||
}
|
}
|
||||||
|
|
||||||
sycl::queue *create_out_of_order_queue(bool enable_exception_handler = false) {
|
sycl::queue create_out_of_order_queue(bool enable_exception_handler = false) {
|
||||||
std::lock_guard<mutex_type> lock(m_mutex);
|
std::lock_guard<mutex_type> lock(m_mutex);
|
||||||
return create_queue_impl(enable_exception_handler);
|
return create_queue_impl(enable_exception_handler);
|
||||||
}
|
}
|
||||||
|
|
||||||
void destroy_queue(sycl::queue *&queue)
|
void destroy_queue(sycl::queue queue)
|
||||||
{
|
{
|
||||||
std::lock_guard<mutex_type> lock(m_mutex);
|
std::lock_guard<mutex_type> lock(m_mutex);
|
||||||
_queues.erase(std::remove_if(_queues.begin(), _queues.end(),
|
_queues.clear();
|
||||||
[=](const std::shared_ptr<sycl::queue> &q) -> bool
|
|
||||||
{
|
|
||||||
return q.get() == queue;
|
|
||||||
}),
|
|
||||||
_queues.end());
|
|
||||||
queue = nullptr;
|
|
||||||
}
|
}
|
||||||
void set_saved_queue(sycl::queue *q)
|
void set_saved_queue(sycl::queue q)
|
||||||
{
|
{
|
||||||
std::lock_guard<mutex_type> lock(m_mutex);
|
std::lock_guard<mutex_type> lock(m_mutex);
|
||||||
_saved_queue = q;
|
_saved_queue = q;
|
||||||
}
|
}
|
||||||
sycl::queue *get_saved_queue() const
|
sycl::queue get_saved_queue() const
|
||||||
{
|
{
|
||||||
std::lock_guard<mutex_type> lock(m_mutex);
|
std::lock_guard<mutex_type> lock(m_mutex);
|
||||||
return _saved_queue;
|
return _saved_queue;
|
||||||
}
|
}
|
||||||
sycl::context get_context() const { return _ctx; }
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void clear_queues()
|
void clear_queues()
|
||||||
{
|
{
|
||||||
_queues.clear();
|
_queues.clear();
|
||||||
_q_in_order = _q_out_of_order = _saved_queue = nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void init_queues()
|
void init_queues()
|
||||||
{
|
{
|
||||||
_q_in_order = create_queue_impl(true, sycl::property::queue::in_order());
|
_q_in_order = create_queue_impl(true, sycl::property::queue::in_order());
|
||||||
_q_out_of_order = create_queue_impl(true);
|
_q_out_of_order = create_queue_impl(true);
|
||||||
_saved_queue = &default_queue();
|
_saved_queue = default_queue();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Caller should acquire resource \p m_mutex before calling this function.
|
/// Caller should acquire resource \p m_mutex before calling this function.
|
||||||
template <class... Properties>
|
template <class... Properties>
|
||||||
sycl::queue *create_queue_impl(bool enable_exception_handler,
|
sycl::queue create_queue_impl(bool enable_exception_handler,
|
||||||
Properties... properties)
|
Properties... properties)
|
||||||
{
|
{
|
||||||
sycl::async_handler eh = {};
|
sycl::async_handler eh = {};
|
||||||
|
@ -807,44 +798,44 @@ namespace dpct
|
||||||
{
|
{
|
||||||
eh = exception_handler;
|
eh = exception_handler;
|
||||||
}
|
}
|
||||||
_queues.push_back(std::make_shared<sycl::queue>(
|
auto q = sycl::queue(
|
||||||
_ctx, *this, eh,
|
*this, eh,
|
||||||
sycl::property_list(
|
sycl::property_list(
|
||||||
#ifdef DPCT_PROFILING_ENABLED
|
#ifdef DPCT_PROFILING_ENABLED
|
||||||
sycl::property::queue::enable_profiling(),
|
sycl::property::queue::enable_profiling(),
|
||||||
#endif
|
#endif
|
||||||
properties...)));
|
properties...));
|
||||||
|
_queues.push_back(q);
|
||||||
|
|
||||||
return _queues.back().get();
|
return _queues.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class... Properties>
|
template <class... Properties>
|
||||||
sycl::queue *create_queue_impl(sycl::context context, sycl::device device,
|
sycl::queue create_queue_impl(sycl::device device,
|
||||||
bool enable_exception_handler,
|
bool enable_exception_handler,
|
||||||
Properties... properties) {
|
Properties... properties) {
|
||||||
sycl::async_handler eh = {};
|
sycl::async_handler eh = {};
|
||||||
if (enable_exception_handler) {
|
if (enable_exception_handler) {
|
||||||
eh = exception_handler;
|
eh = exception_handler;
|
||||||
}
|
}
|
||||||
_queues.push_back(std::make_shared<sycl::queue>(
|
_queues.push_back(sycl::queue(
|
||||||
context, device, eh,
|
device, eh,
|
||||||
sycl::property_list(
|
sycl::property_list(
|
||||||
#ifdef DPCT_PROFILING_ENABLED
|
#ifdef DPCT_PROFILING_ENABLED
|
||||||
sycl::property::queue::enable_profiling(),
|
sycl::property::queue::enable_profiling(),
|
||||||
#endif
|
#endif
|
||||||
properties...)));
|
properties...)));
|
||||||
|
|
||||||
return _queues.back().get();
|
return _queues.back();
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_version(int& major, int& minor) const
|
void get_version(int& major, int& minor) const
|
||||||
{
|
{
|
||||||
detail::get_version(*this, major, minor);
|
detail::get_version(*this, major, minor);
|
||||||
}
|
}
|
||||||
sycl::queue *_q_in_order, *_q_out_of_order;
|
sycl::queue _q_in_order, _q_out_of_order;
|
||||||
sycl::queue *_saved_queue;
|
sycl::queue _saved_queue;
|
||||||
sycl::context _ctx;
|
std::vector<sycl::queue> _queues;
|
||||||
std::vector<std::shared_ptr<sycl::queue>> _queues;
|
|
||||||
mutable mutex_type m_mutex;
|
mutable mutex_type m_mutex;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
6
ggml.h
6
ggml.h
|
@ -312,6 +312,12 @@
|
||||||
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
|
||||||
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
|
||||||
|
|
||||||
|
#define GGML_TENSOR_BINARY_OP_LOCALS01 \
|
||||||
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
|
||||||
|
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
|
||||||
|
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
|
||||||
|
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue