Hacky patch for lower VRAM + prints
This commit is contained in:
parent
16b9cd1939
commit
597074c3f0
1 changed files with 7 additions and 0 deletions
|
@ -1476,6 +1476,7 @@ static cuda_buffer g_cuda_buffer_pool[GGML_CUDA_MAX_DEVICES][MAX_CUDA_BUFFERS];
|
||||||
static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT;
|
static std::atomic_flag g_cuda_pool_lock = ATOMIC_FLAG_INIT;
|
||||||
|
|
||||||
static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
||||||
|
static size_t total_size = 0;
|
||||||
scoped_spin_lock lock(g_cuda_pool_lock);
|
scoped_spin_lock lock(g_cuda_pool_lock);
|
||||||
int id;
|
int id;
|
||||||
CUDA_CHECK(cudaGetDevice(&id));
|
CUDA_CHECK(cudaGetDevice(&id));
|
||||||
|
@ -1492,6 +1493,8 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
|
||||||
}
|
}
|
||||||
void * ptr;
|
void * ptr;
|
||||||
CUDA_CHECK(cudaMalloc((void **) &ptr, size));
|
CUDA_CHECK(cudaMalloc((void **) &ptr, size));
|
||||||
|
total_size += size;
|
||||||
|
fprintf(stderr, "%s: size=%.2f, total_size=%.2f\n", __func__, size / 1024.0f / 1024.0f, total_size / 1024.0f / 1024.0f);
|
||||||
*actual_size = size;
|
*actual_size = size;
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
@ -1560,6 +1563,10 @@ void ggml_init_cublas() {
|
||||||
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
|
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
|
|
||||||
|
size_t actual_size;
|
||||||
|
void * ptr = ggml_cuda_pool_malloc(813*1024*1024, &actual_size);
|
||||||
|
ggml_cuda_pool_free(ptr, actual_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue