call set_single/mul_gpu_mode in init, order the devices
This commit is contained in:
parent
ad2ed8fa11
commit
555c4976df
4 changed files with 154 additions and 42 deletions
|
@ -9,18 +9,28 @@ source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
if [ $# -gt 0 ]; then
|
if [ $# -gt 0 ]; then
|
||||||
GGML_SYCL_DEVICE=$1
|
GGML_SYCL_DEVICE=$1
|
||||||
|
GGML_SYCL_SINGLE_GPU=1
|
||||||
else
|
else
|
||||||
GGML_SYCL_DEVICE=0
|
GGML_SYCL_DEVICE=0
|
||||||
fi
|
fi
|
||||||
echo "use $GGML_SYCL_DEVICE as main GPU"
|
|
||||||
#export GGML_SYCL_DEBUG=1
|
#export GGML_SYCL_DEBUG=1
|
||||||
|
|
||||||
|
|
||||||
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
|
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
|
||||||
|
|
||||||
#use all GPUs with same max compute units
|
if [ $GGML_SYCL_SINGLE_GPU -eq 1 ]; then
|
||||||
|
echo "use $GGML_SYCL_DEVICE as main GPU"
|
||||||
|
#use signle GPU only
|
||||||
|
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
|
||||||
|
|
||||||
|
else
|
||||||
|
#use multiple GPUs with same max compute units
|
||||||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||||
|
fi
|
||||||
|
|
||||||
#use main GPU only
|
#use main GPU only
|
||||||
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
|
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
|
||||||
|
#use multiple GPUs with same max compute units
|
||||||
|
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||||
|
|
||||||
|
|
143
ggml-sycl.cpp
143
ggml-sycl.cpp
|
@ -16,6 +16,7 @@
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
#include <cstdlib>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
@ -24,10 +25,9 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <regex>
|
||||||
|
|
||||||
#include <sycl/sycl.hpp>
|
#include <sycl/sycl.hpp>
|
||||||
#include <sycl/half_type.hpp>
|
#include <sycl/half_type.hpp>
|
||||||
|
@ -937,17 +937,41 @@ namespace dpct
|
||||||
|
|
||||||
private:
|
private:
|
||||||
mutable std::recursive_mutex m_mutex;
|
mutable std::recursive_mutex m_mutex;
|
||||||
|
static bool compare_dev(sycl::device &device1, sycl::device &device2)
|
||||||
|
{
|
||||||
|
dpct::device_info prop1;
|
||||||
|
dpct::get_device_info(prop1, device1);
|
||||||
|
|
||||||
|
dpct::device_info prop2;
|
||||||
|
dpct::get_device_info(prop2, device2);
|
||||||
|
return prop1.get_max_compute_units() > prop2.get_max_compute_units();
|
||||||
|
}
|
||||||
dev_mgr()
|
dev_mgr()
|
||||||
{
|
{
|
||||||
sycl::device default_device =
|
sycl::device default_device =
|
||||||
sycl::device(sycl::default_selector_v);
|
sycl::device(sycl::default_selector_v);
|
||||||
_devs.push_back(std::make_shared<device_ext>(default_device));
|
_devs.push_back(std::make_shared<device_ext>(default_device));
|
||||||
|
|
||||||
std::vector<sycl::device> sycl_all_devs =
|
std::vector<sycl::device> sycl_all_devs;
|
||||||
sycl::device::get_devices(sycl::info::device_type::all);
|
|
||||||
// Collect other devices except for the default device.
|
// Collect other devices except for the default device.
|
||||||
if (default_device.is_cpu())
|
if (default_device.is_cpu())
|
||||||
_cpu_device = 0;
|
_cpu_device = 0;
|
||||||
|
|
||||||
|
auto Platforms = sycl::platform::get_platforms();
|
||||||
|
// Keep track of the number of devices per backend
|
||||||
|
std::map<sycl::backend, size_t> DeviceNums;
|
||||||
|
|
||||||
|
while (!Platforms.empty()) {
|
||||||
|
auto Platform = Platforms.back();
|
||||||
|
Platforms.pop_back();
|
||||||
|
auto Devices = Platform.get_devices();
|
||||||
|
|
||||||
|
std::sort(Devices.begin(), Devices.end(), compare_dev);
|
||||||
|
for (const auto &Device : Devices) {
|
||||||
|
sycl_all_devs.push_back(Device);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (auto &dev : sycl_all_devs)
|
for (auto &dev : sycl_all_devs)
|
||||||
{
|
{
|
||||||
if (dev == default_device)
|
if (dev == default_device)
|
||||||
|
@ -3194,6 +3218,11 @@ static int g_work_group_size = 0;
|
||||||
#define GGML_SYCL_MMV_Y 1
|
#define GGML_SYCL_MMV_Y 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
enum ggml_sycl_backend_gpu_mode {
|
||||||
|
SYCL_UNSET_GPU_MODE = -1,
|
||||||
|
SYCL_SINGLE_GPU_MODE = 0,
|
||||||
|
SYCL_Mul_GPU_MODE
|
||||||
|
};
|
||||||
|
|
||||||
static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");
|
static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");
|
||||||
|
|
||||||
|
@ -3673,6 +3702,8 @@ static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
|
||||||
|
|
||||||
static float g_tensor_split[GGML_SYCL_MAX_DEVICES] = {0};
|
static float g_tensor_split[GGML_SYCL_MAX_DEVICES] = {0};
|
||||||
|
|
||||||
|
static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode = SYCL_UNSET_GPU_MODE;
|
||||||
|
|
||||||
struct sycl_device_capabilities {
|
struct sycl_device_capabilities {
|
||||||
int cc; // compute capability
|
int cc; // compute capability
|
||||||
bool vmm; // virtual memory support
|
bool vmm; // virtual memory support
|
||||||
|
@ -13183,17 +13214,45 @@ bool ggml_sycl_loaded(void) {
|
||||||
return g_sycl_loaded;
|
return g_sycl_loaded;
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_device_detail(int id) {
|
std::string getDeviceTypeName(const sycl::device &Device) {
|
||||||
|
auto DeviceType = Device.get_info<sycl::info::device::device_type>();
|
||||||
|
switch (DeviceType) {
|
||||||
|
case sycl::info::device_type::cpu:
|
||||||
|
return "cpu";
|
||||||
|
case sycl::info::device_type::gpu:
|
||||||
|
return "gpu";
|
||||||
|
case sycl::info::device_type::host:
|
||||||
|
return "host";
|
||||||
|
case sycl::info::device_type::accelerator:
|
||||||
|
return "acc";
|
||||||
|
default:
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool replace_str(std::string str, const std::string from, const std::string to) {
|
||||||
|
size_t start_pos = str.find(from);
|
||||||
|
if(start_pos == std::string::npos) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
str.replace(start_pos, from.length(), to);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_device_detail(int id, sycl::device &device, std::string device_type) {
|
||||||
|
|
||||||
dpct::device_info prop;
|
dpct::device_info prop;
|
||||||
SYCL_CHECK(CHECK_TRY_ERROR(
|
SYCL_CHECK(CHECK_TRY_ERROR(
|
||||||
dpct::get_device_info(prop, dpct::dev_mgr::instance().get_device(id))));
|
dpct::get_device_info(prop, device)));
|
||||||
sycl::device cur_device = dpct::dev_mgr::instance().get_device(id);
|
|
||||||
std::string version;
|
std::string version;
|
||||||
version += std::to_string(prop.get_major_version());
|
version += std::to_string(prop.get_major_version());
|
||||||
version += ".";
|
version += ".";
|
||||||
version += std::to_string(prop.get_minor_version());
|
version += std::to_string(prop.get_minor_version());
|
||||||
|
|
||||||
fprintf(stderr, "|%2d|%45s|%18s|%17d|%14d|%13d|%15lu|\n", id,
|
device_type = std::regex_replace(device_type, std::regex("ext_oneapi_"), "");
|
||||||
|
|
||||||
|
fprintf(stderr, "|%2d|%18s|%45s|%10s|%11d|%8d|%7d|%15lu|\n", id, device_type.c_str(),
|
||||||
prop.get_name(), version.c_str(), prop.get_max_compute_units(),
|
prop.get_name(), version.c_str(), prop.get_max_compute_units(),
|
||||||
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
|
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
|
||||||
prop.get_global_mem_size());
|
prop.get_global_mem_size());
|
||||||
|
@ -13201,16 +13260,31 @@ void print_device_detail(int id) {
|
||||||
|
|
||||||
void ggml_backend_sycl_print_sycl_devices() {
|
void ggml_backend_sycl_print_sycl_devices() {
|
||||||
int device_count = dpct::dev_mgr::instance().device_count();
|
int device_count = dpct::dev_mgr::instance().device_count();
|
||||||
|
std::map<sycl::backend, size_t> DeviceNums;
|
||||||
fprintf(stderr, "found %d SYCL devices:\n", device_count);
|
fprintf(stderr, "found %d SYCL devices:\n", device_count);
|
||||||
fprintf(stderr, "|ID| Name |compute capability|Max compute units|Max work group|Max sub group|Global mem size|\n");
|
fprintf(stderr, "| | | |compute |Max compute|Max work|Max sub| |\n");
|
||||||
fprintf(stderr, "|--|---------------------------------------------|------------------|-----------------|--------------|-------------|---------------|\n");
|
fprintf(stderr, "|ID|Device Type | Name |capability|units |group |group |Global mem size|\n");
|
||||||
|
fprintf(stderr, "|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|\n");
|
||||||
for (int id = 0; id < device_count; ++id) {
|
for (int id = 0; id < device_count; ++id) {
|
||||||
print_device_detail(id);
|
sycl::device device = dpct::dev_mgr::instance().get_device(id);
|
||||||
|
sycl::backend backend = device.get_backend();
|
||||||
|
int type_id=DeviceNums[backend]++;
|
||||||
|
std::stringstream device_type;
|
||||||
|
device_type << "[" << backend << ":" << getDeviceTypeName(device) << ":" << std::to_string(type_id) << "]";
|
||||||
|
print_device_detail(id, device, device_type.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_gpu_device_list() {
|
void print_gpu_device_list() {
|
||||||
fprintf(stderr, "detect %d SYCL GPUs: [%s] with top Max compute units:%d\n",
|
GGML_ASSERT(g_sycl_gpu_mgr);
|
||||||
|
|
||||||
|
char* hint=NULL;
|
||||||
|
if (g_ggml_sycl_backend_gpu_mode == SYCL_SINGLE_GPU_MODE) {
|
||||||
|
hint = "use %d SYCL GPUs: [%s] with Max compute units:%d\n";
|
||||||
|
} else {
|
||||||
|
hint = "detect %d SYCL GPUs: [%s] with top Max compute units:%d\n";
|
||||||
|
}
|
||||||
|
fprintf(stderr, hint,
|
||||||
g_sycl_gpu_mgr->get_gpu_count(),
|
g_sycl_gpu_mgr->get_gpu_count(),
|
||||||
g_sycl_gpu_mgr->gpus_list.c_str(),
|
g_sycl_gpu_mgr->gpus_list.c_str(),
|
||||||
g_sycl_gpu_mgr->max_compute_units);
|
g_sycl_gpu_mgr->max_compute_units);
|
||||||
|
@ -13266,19 +13340,23 @@ void ggml_init_sycl() try {
|
||||||
}
|
}
|
||||||
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
|
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
|
||||||
ggml_backend_sycl_print_sycl_devices();
|
ggml_backend_sycl_print_sycl_devices();
|
||||||
if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
|
||||||
print_gpu_device_list();
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
g_sycl_loaded = true;
|
g_sycl_loaded = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
catch (sycl::exception const &exc) {
|
||||||
|
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
||||||
|
<< ", line:" << __LINE__ << std::endl;
|
||||||
|
std::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_init_by_gpus(int device_count) try {
|
||||||
|
g_device_count = device_count;
|
||||||
g_device_count = g_sycl_gpu_mgr->get_gpu_count();
|
|
||||||
g_work_group_size = g_sycl_gpu_mgr->work_group_size;
|
g_work_group_size = g_sycl_gpu_mgr->work_group_size;
|
||||||
|
|
||||||
int64_t total_vram = 0;
|
int64_t total_vram = 0;
|
||||||
|
|
||||||
|
print_gpu_device_list();
|
||||||
|
|
||||||
for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
|
for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
|
||||||
g_device_caps[id].vmm = 0;
|
g_device_caps[id].vmm = 0;
|
||||||
|
@ -17486,22 +17564,43 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
|
||||||
return g_sycl_gpu_mgr->get_index(device_id);
|
return g_sycl_gpu_mgr->get_index(device_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device(int main_gpu_id) {
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
|
||||||
|
return g_sycl_gpu_mgr->gpus[device_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
|
||||||
GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
|
GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
|
||||||
printf("ggml_backend_sycl_set_single_device: use single device: %d\n", main_gpu_id);
|
fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
|
||||||
if (g_sycl_gpu_mgr) {
|
if (g_sycl_gpu_mgr) {
|
||||||
delete g_sycl_gpu_mgr;
|
delete g_sycl_gpu_mgr;
|
||||||
}
|
}
|
||||||
g_sycl_gpu_mgr = new sycl_gpu_mgr(main_gpu_id);
|
g_sycl_gpu_mgr = new sycl_gpu_mgr(main_gpu_id);
|
||||||
ggml_init_sycl();
|
g_ggml_sycl_backend_gpu_mode = SYCL_SINGLE_GPU_MODE;
|
||||||
|
ggml_init_by_gpus(g_sycl_gpu_mgr->get_gpu_count());
|
||||||
g_ggml_backend_sycl_buffer_type_initialized = false;
|
g_ggml_backend_sycl_buffer_type_initialized = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
|
||||||
|
if (g_ggml_sycl_backend_gpu_mode == SYCL_Mul_GPU_MODE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "ggml_backend_sycl_set_mul_device_mode: true\n");
|
||||||
|
|
||||||
|
if (g_sycl_gpu_mgr) {
|
||||||
|
delete g_sycl_gpu_mgr;
|
||||||
|
}
|
||||||
|
g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
||||||
|
ggml_init_by_gpus(g_sycl_gpu_mgr->get_gpu_count());
|
||||||
|
g_ggml_backend_sycl_buffer_type_initialized = false;
|
||||||
|
|
||||||
|
g_ggml_sycl_backend_gpu_mode = SYCL_Mul_GPU_MODE;
|
||||||
|
}
|
||||||
|
|
||||||
extern "C" int ggml_backend_sycl_reg_devices();
|
extern "C" int ggml_backend_sycl_reg_devices();
|
||||||
|
|
||||||
int ggml_backend_sycl_reg_devices() {
|
int ggml_backend_sycl_reg_devices() {
|
||||||
if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
ggml_backend_sycl_set_mul_device_mode();
|
||||||
g_device_count = g_sycl_gpu_mgr->get_gpu_count();
|
|
||||||
assert(g_device_count>0);
|
assert(g_device_count>0);
|
||||||
for (int i = 0; i < g_device_count; i++) {
|
for (int i = 0; i < g_device_count; i++) {
|
||||||
int id = g_sycl_gpu_mgr->gpus[i];
|
int id = g_sycl_gpu_mgr->gpus[i];
|
||||||
|
|
|
@ -16,6 +16,7 @@ extern "C" {
|
||||||
#define GGML_SYCL_MAX_DEVICES 16
|
#define GGML_SYCL_MAX_DEVICES 16
|
||||||
#define GGML_SYCL_NAME "SYCL"
|
#define GGML_SYCL_NAME "SYCL"
|
||||||
|
|
||||||
|
|
||||||
GGML_API void ggml_init_sycl(void);
|
GGML_API void ggml_init_sycl(void);
|
||||||
GGML_API bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
GGML_API bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||||
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
|
||||||
|
@ -28,8 +29,9 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
|
||||||
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||||
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device(int main_gpu);
|
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
||||||
|
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
25
llama.cpp
25
llama.cpp
|
@ -3750,14 +3750,6 @@ static bool llm_load_tensors(
|
||||||
model.main_gpu = main_gpu;
|
model.main_gpu = main_gpu;
|
||||||
model.n_gpu_layers = n_gpu_layers;
|
model.n_gpu_layers = n_gpu_layers;
|
||||||
|
|
||||||
#ifdef GGML_USE_SYCL
|
|
||||||
if (split_mode == LLAMA_SPLIT_MODE_NONE) {
|
|
||||||
ggml_backend_sycl_set_single_device(main_gpu);
|
|
||||||
//SYCL use device index (0, 1, 2), instead if device id.
|
|
||||||
main_gpu = ggml_backend_sycl_get_device_index(main_gpu);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const int64_t n_layer = hparams.n_layer;
|
const int64_t n_layer = hparams.n_layer;
|
||||||
const int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);
|
const int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);
|
||||||
|
|
||||||
|
@ -4756,6 +4748,16 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef GGML_USE_SYCL
|
||||||
|
if (params.split_mode == LLAMA_SPLIT_MODE_NONE) {
|
||||||
|
ggml_backend_sycl_set_single_device_mode(params.main_gpu);
|
||||||
|
//SYCL use device index (0, 1, 2) directly, uer input device id, then convert to device index.
|
||||||
|
params.main_gpu = ggml_backend_sycl_get_device_index(params.main_gpu);
|
||||||
|
} else {
|
||||||
|
ggml_backend_sycl_set_mul_device_mode();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!llm_load_tensors(
|
if (!llm_load_tensors(
|
||||||
ml, model, params.n_gpu_layers, params.split_mode, params.main_gpu, params.tensor_split, params.use_mlock,
|
ml, model, params.n_gpu_layers, params.split_mode, params.main_gpu, params.tensor_split, params.use_mlock,
|
||||||
params.progress_callback, params.progress_callback_user_data
|
params.progress_callback, params.progress_callback_user_data
|
||||||
|
@ -12258,17 +12260,16 @@ struct llama_context * llama_new_context_with_model(
|
||||||
if (model->n_gpu_layers > 0) {
|
if (model->n_gpu_layers > 0) {
|
||||||
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
// with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
|
||||||
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
|
||||||
int main_gpu_index = ggml_backend_sycl_get_device_index(model->main_gpu);
|
ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
|
||||||
ggml_backend_t backend = ggml_backend_sycl_init(main_gpu_index);
|
|
||||||
if (backend == nullptr) {
|
if (backend == nullptr) {
|
||||||
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, model->main_gpu, main_gpu_index);
|
int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
|
||||||
|
LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, main_gpu_id, model->main_gpu);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
ctx->backends.push_back(backend);
|
ctx->backends.push_back(backend);
|
||||||
} else {
|
} else {
|
||||||
// LLAMA_SPLIT_LAYER requires a backend for each GPU
|
// LLAMA_SPLIT_LAYER requires a backend for each GPU
|
||||||
|
|
||||||
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
|
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
|
||||||
ggml_backend_t backend = ggml_backend_sycl_init(i);
|
ggml_backend_t backend = ggml_backend_sycl_init(i);
|
||||||
if (backend == nullptr) {
|
if (backend == nullptr) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue