replace global variables with context[2/2]
This commit is contained in:
parent
c29eccc4df
commit
d0186d381c
8 changed files with 217 additions and 277 deletions
|
@ -684,7 +684,8 @@ if (LLAMA_SYCL)
|
|||
endif()
|
||||
|
||||
set(GGML_HEADERS_SYCL ggml-sycl.h)
|
||||
set(GGML_SOURCES_SYCL ggml-sycl.cpp)
|
||||
file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp")
|
||||
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
|
||||
|
||||
if (WIN32)
|
||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
|
||||
|
|
457
ggml-sycl.cpp
457
ggml-sycl.cpp
File diff suppressed because it is too large
Load diff
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend.h"
|
||||
#include "ggml-sycl/presets.hpp"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -30,13 +31,6 @@ GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
|
|||
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||
|
||||
// TODO: these are temporary
|
||||
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
||||
|
||||
// SYCL doesn't support registering host memory, keep here for reference
|
||||
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
|
||||
|
|
|
@ -12,10 +12,6 @@
|
|||
|
||||
#include "common.hpp"
|
||||
|
||||
int get_main_device() {
|
||||
return g_main_device;
|
||||
}
|
||||
|
||||
int get_current_device_id() {
|
||||
return dpct::dev_mgr::instance().current_device_id();
|
||||
}
|
||||
|
|
|
@ -136,6 +136,7 @@ typedef sycl::float2 dfloat2;
|
|||
|
||||
static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
|
||||
|
||||
static int g_all_sycl_device_count = -1;
|
||||
static bool g_ggml_backend_sycl_buffer_type_initialized = false;
|
||||
|
||||
static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode =
|
||||
|
@ -145,8 +146,6 @@ static void* g_scratch_buffer = nullptr;
|
|||
static size_t g_scratch_size = 0; // disabled by default
|
||||
static size_t g_scratch_offset = 0;
|
||||
|
||||
int get_main_device();
|
||||
|
||||
[[noreturn]] static inline void bad_arch(const sycl::stream& stream_ct1) {
|
||||
stream_ct1 << "ERROR: ggml-sycl was compiled without support for the "
|
||||
"current GPU architecture.\n";
|
||||
|
|
|
@ -2866,7 +2866,7 @@ namespace dpct
|
|||
/// when usm is used and dimension is greater than 1.
|
||||
template <size_t D = Dimension>
|
||||
typename std::enable_if<D != 1, dpct_accessor_t>::type
|
||||
get_access(sycl::handler &cgh) {
|
||||
get_access([[maybe_unused]] sycl::handler &cgh) {
|
||||
return dpct_accessor_t((T *)_device_ptr, _range);
|
||||
}
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@
|
|||
#ifndef GGML_SYCL_PRESETS_HPP
|
||||
#define GGML_SYCL_PRESETS_HPP
|
||||
|
||||
#define GGML_SYCL_MAX_DEVICES 48
|
||||
#define GGML_SYCL_MAX_STREAMS 8
|
||||
#define GGML_SYCL_MAX_STREAMS 8
|
||||
#define GGML_SYCL_MAX_BUFFERS 256
|
||||
#define GGML_SYCL_MAX_DEVICES 48
|
||||
#define GGML_SYCL_NAME "SYCL"
|
||||
|
||||
#define WARP_SIZE 32
|
||||
|
|
13
llama.cpp
13
llama.cpp
|
@ -6612,16 +6612,6 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef GGML_USE_SYCL
|
||||
if (params.split_mode == LLAMA_SPLIT_MODE_NONE) {
|
||||
ggml_backend_sycl_set_single_device_mode(params.main_gpu);
|
||||
//SYCL use device index (0, 1, 2) directly, uer input device id, then convert to device index.
|
||||
params.main_gpu = ggml_backend_sycl_get_device_index(params.main_gpu);
|
||||
} else {
|
||||
ggml_backend_sycl_set_mul_device_mode();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!llm_load_tensors(
|
||||
ml, model, params.n_gpu_layers, params.split_mode, params.main_gpu, params.tensor_split, params.use_mlock,
|
||||
params.progress_callback, params.progress_callback_user_data
|
||||
|
@ -16223,8 +16213,7 @@ struct llama_context * llama_new_context_with_model(
|
|||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
||||
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
||||
if (backend == nullptr) {
|
||||
int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
|
||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, model->main_gpu);
|
||||
llama_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue