llama : refactor model loader with backend registry (#10026)

This commit is contained in:
Diego Devesa 2024-10-30 02:01:23 +01:00 committed by GitHub
parent 8f275a7c45
commit c5b0f4b5d9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 1903 additions and 2019 deletions

View file

@ -28,7 +28,7 @@ GGML_API bool ggml_backend_is_cuda(ggml_backend_t backend);
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_buffer_type(int device);
// split tensor buffer that splits matrices by rows across multiple devices
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(const float * tensor_split);
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_split_buffer_type(int main_device, const float * tensor_split);
// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_API ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type(void);