support main device is non-zero
This commit is contained in:
parent
3a9d2c54ba
commit
65f895d41b
4 changed files with 46 additions and 5 deletions
|
@ -474,6 +474,7 @@ if (LLAMA_SYCL)
|
||||||
|
|
||||||
if (_sycl_support)
|
if (_sycl_support)
|
||||||
add_compile_definitions(GGML_USE_CUBLAS)
|
add_compile_definitions(GGML_USE_CUBLAS)
|
||||||
|
add_compile_definitions(GGML_USE_SYCL)
|
||||||
#add_compile_definitions(GGML_SYCL_F16)
|
#add_compile_definitions(GGML_SYCL_F16)
|
||||||
#add_compile_options(-std=c++17 -O3 -fsycl)
|
#add_compile_options(-std=c++17 -O3 -fsycl)
|
||||||
add_compile_options(-I./)
|
add_compile_options(-I./)
|
||||||
|
|
|
@ -387,8 +387,8 @@ catch (sycl::exception const &exc) {
|
||||||
|
|
||||||
static int g_device_count = -1;
|
static int g_device_count = -1;
|
||||||
static int g_all_sycl_device_count = -1;
|
static int g_all_sycl_device_count = -1;
|
||||||
static int g_main_device = 0;
|
static int g_main_device = -1;
|
||||||
static int g_main_device_index = 0;
|
static int g_main_device_index = -1;
|
||||||
|
|
||||||
static float g_tensor_split[GGML_CUDA_MAX_DEVICES] = {0};
|
static float g_tensor_split[GGML_CUDA_MAX_DEVICES] = {0};
|
||||||
|
|
||||||
|
@ -413,6 +413,10 @@ static size_t g_scratch_offset = 0;
|
||||||
|
|
||||||
static dpct::queue_ptr g_cublas_handles[GGML_CUDA_MAX_DEVICES] = {nullptr};
|
static dpct::queue_ptr g_cublas_handles[GGML_CUDA_MAX_DEVICES] = {nullptr};
|
||||||
|
|
||||||
|
int get_main_device(){
|
||||||
|
return g_main_device;
|
||||||
|
}
|
||||||
|
|
||||||
[[noreturn]]
|
[[noreturn]]
|
||||||
static void bad_arch(const sycl::stream &stream_ct1) {
|
static void bad_arch(const sycl::stream &stream_ct1) {
|
||||||
stream_ct1 << "ERROR: ggml-cuda was compiled without support for the "
|
stream_ct1 << "ERROR: ggml-cuda was compiled without support for the "
|
||||||
|
@ -6388,7 +6392,10 @@ static void mul_mat_vec_q6_K_q8_1_cuda(const void *vx, const void *vy,
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_device_index_by_id(int id){
|
int get_device_index_by_id(int id){
|
||||||
return g_sycl_device_id2index[id].index;
|
int res = g_sycl_device_id2index[id].index;
|
||||||
|
GGML_SYCL_DEBUG("zjy get_device_index_by_id id=%d device_index=%d\n", id, res);
|
||||||
|
GGML_ASSERT(res>=0);
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
int get_current_device_index(){
|
int get_current_device_index(){
|
||||||
|
@ -8057,6 +8064,7 @@ static void *ggml_cuda_pool_malloc_leg(size_t size, size_t *actual_size) try {
|
||||||
int id;
|
int id;
|
||||||
SYCL_CHECK(
|
SYCL_CHECK(
|
||||||
CHECK_TRY_ERROR(id = get_current_device_index()));
|
CHECK_TRY_ERROR(id = get_current_device_index()));
|
||||||
|
GGML_SYCL_DEBUG("zjy ggml_cuda_pool_malloc_leg index %d\n", id);
|
||||||
#ifdef DEBUG_CUDA_MALLOC
|
#ifdef DEBUG_CUDA_MALLOC
|
||||||
int nnz = 0;
|
int nnz = 0;
|
||||||
size_t max_size = 0;
|
size_t max_size = 0;
|
||||||
|
@ -8080,6 +8088,7 @@ static void *ggml_cuda_pool_malloc_leg(size_t size, size_t *actual_size) try {
|
||||||
*actual_size = b.size;
|
*actual_size = b.size;
|
||||||
b.ptr = nullptr;
|
b.ptr = nullptr;
|
||||||
b.size = 0;
|
b.size = 0;
|
||||||
|
GGML_SYCL_DEBUG("zjy ggml_cuda_pool_malloc_leg return 1 %p\n", ptr);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8092,6 +8101,7 @@ static void *ggml_cuda_pool_malloc_leg(size_t size, size_t *actual_size) try {
|
||||||
*actual_size = b.size;
|
*actual_size = b.size;
|
||||||
b.ptr = nullptr;
|
b.ptr = nullptr;
|
||||||
b.size = 0;
|
b.size = 0;
|
||||||
|
GGML_SYCL_DEBUG("zjy ggml_cuda_pool_malloc_leg return 2 %p\n", ptr);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
void * ptr;
|
void * ptr;
|
||||||
|
@ -8327,10 +8337,19 @@ void ggml_init_cublas() try {
|
||||||
fprintf(stderr, "%s: CUDA_USE_TENSOR_CORES: no\n", __func__);
|
fprintf(stderr, "%s: CUDA_USE_TENSOR_CORES: no\n", __func__);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
for (int id = 0; id < GGML_CUDA_MAX_DEVICES; ++id) {
|
||||||
|
g_sycl_device_id2index[id].index = -1;
|
||||||
|
g_device_caps[id].vmm = 0;
|
||||||
|
g_device_caps[id].device_id = -1;
|
||||||
|
g_device_caps[id].cc = 0;
|
||||||
|
g_tensor_split[id] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int device_inx = -1;
|
int device_inx = -1;
|
||||||
for (int id = 0; id < g_all_sycl_device_count; ++id) {
|
for (int id = 0; id < g_all_sycl_device_count; ++id) {
|
||||||
if(id!=user_device_number) continue;
|
if(id!=user_device_number) continue;
|
||||||
|
|
||||||
device_inx++;
|
device_inx++;
|
||||||
int device_vmm = 0;
|
int device_vmm = 0;
|
||||||
|
|
||||||
|
@ -8400,8 +8419,15 @@ void ggml_init_cublas() try {
|
||||||
|
|
||||||
//zjy hardcode, force set to 1 device
|
//zjy hardcode, force set to 1 device
|
||||||
g_device_count = 1;
|
g_device_count = 1;
|
||||||
|
ggml_cuda_set_main_device(user_device_number);
|
||||||
ggml_cuda_set_device(user_device_number);
|
ggml_cuda_set_device(user_device_number);
|
||||||
fprintf(stderr, "Using Device %d\n", user_device_number);
|
fprintf(stderr, "Using Device %d\n", user_device_number);
|
||||||
|
|
||||||
|
// for (int id = 0; id < g_all_sycl_device_count; ++id) {
|
||||||
|
// GGML_SYCL_DEBUG("zjy id=%d g_device_caps[%d].device_id=%d g_sycl_device_id2index[%d].index=%d ", id, id,
|
||||||
|
// g_device_caps[id].device_id, id, g_sycl_device_id2index[id].index);
|
||||||
|
// }
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
g_cublas_loaded = true;
|
g_cublas_loaded = true;
|
||||||
}
|
}
|
||||||
|
@ -11220,7 +11246,7 @@ void ggml_cuda_set_main_device(const int main_device) try {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_main_device != main_device && g_device_count > 1) {
|
if (g_main_device != main_device && g_device_count >= 1) {
|
||||||
g_main_device = main_device;
|
g_main_device = main_device;
|
||||||
g_main_device_index = get_device_index_by_id(g_main_device);
|
g_main_device_index = get_device_index_by_id(g_main_device);
|
||||||
dpct::device_info prop;
|
dpct::device_info prop;
|
||||||
|
|
|
@ -14,4 +14,6 @@
|
||||||
} \
|
} \
|
||||||
}()
|
}()
|
||||||
|
|
||||||
#define DEBUG_CUDA_MALLOC
|
#define DEBUG_CUDA_MALLOC
|
||||||
|
|
||||||
|
int get_main_device();
|
12
llama.cpp
12
llama.cpp
|
@ -13,6 +13,10 @@
|
||||||
# include "ggml-opencl.h"
|
# include "ggml-opencl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef GGML_USE_SYCL
|
||||||
|
# include "ggml-sycl.hpp"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
# include "ggml-metal.h"
|
# include "ggml-metal.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -9785,6 +9789,14 @@ struct llama_model * llama_load_model_from_file(
|
||||||
struct llama_model_params params) {
|
struct llama_model_params params) {
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
|
#ifdef GGML_USE_SYCL
|
||||||
|
int main_device = get_main_device();
|
||||||
|
if(main_device>=0) params.main_gpu = main_device;
|
||||||
|
else {
|
||||||
|
LLAMA_LOG_ERROR("%s: missed to init GPU device\n", __func__);
|
||||||
|
std::exit(1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
llama_model * model = new llama_model;
|
llama_model * model = new llama_model;
|
||||||
|
|
||||||
unsigned cur_percentage = 0;
|
unsigned cur_percentage = 0;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue