Better CUDA synchronization logic (#2057)
This commit is contained in:
parent
befb3a3562
commit
0bc2cdfc87
2 changed files with 46 additions and 21 deletions
|
@ -8,10 +8,6 @@ extern "C" {
|
|||
|
||||
#define GGML_CUDA_MAX_DEVICES 16
|
||||
|
||||
struct ggml_tensor_extra_gpu {
|
||||
void * data_device[GGML_CUDA_MAX_DEVICES]; // 1 pointer for each device for split tensors
|
||||
};
|
||||
|
||||
void ggml_init_cublas(void);
|
||||
void ggml_cuda_set_tensor_split(const float * tensor_split);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue