GPU weights not in RAM, direct loading with cuFile
This commit is contained in:
parent
977e74d70e
commit
ffe9652bc1
3 changed files with 5 additions and 2 deletions
|
@ -383,7 +383,7 @@ struct cuda_buffer {
|
|||
static cuda_buffer g_cuda_buffer_pool[MAX_CUDA_BUFFERS];
|
||||
static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT;
|
||||
|
||||
static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
||||
void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
||||
scoped_spin_lock lock(g_cuda_pool_lock);
|
||||
|
||||
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
|
||||
|
@ -402,7 +402,7 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
|||
return ptr;
|
||||
}
|
||||
|
||||
static void ggml_cuda_pool_free(void * ptr, size_t size) {
|
||||
void ggml_cuda_pool_free(void * ptr, size_t size) {
|
||||
scoped_spin_lock lock(g_cuda_pool_lock);
|
||||
|
||||
for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
|
||||
|
|
|
@ -14,6 +14,8 @@ void ggml_cuda_mul_mat(const struct ggml_tensor * src0, const struct ggml_tens
|
|||
// TODO: export these with GGML_API
|
||||
void * ggml_cuda_host_malloc(size_t size);
|
||||
void ggml_cuda_host_free(void * ptr);
|
||||
void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size);
|
||||
void ggml_cuda_pool_free(void * ptr, size_t size);
|
||||
|
||||
void ggml_cuda_transform_tensor(struct ggml_tensor * tensor);
|
||||
void ggml_cuda_load_data(const char * fname, struct ggml_tensor ** tensors, int num_tensors, const size_t * offsets);
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include "ggml.h"
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
#include <cuda_runtime.h>
|
||||
#include "ggml-cuda.h"
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue