llama_model_loader: minor, use same variable name for consistency, fix spacing in types cast

This commit is contained in:
Pierrick HYMBERT 2024-03-21 23:26:45 +01:00
parent 69bdee939a
commit 6df9757ad6

View file

@ -3287,9 +3287,9 @@ struct llama_model_loader {
if (size_done >= size_data) { if (size_done >= size_data) {
// unmap offloaded tensors and metadata // unmap offloaded tensors and metadata
if (use_mmap) { if (use_mmap) {
for (uint32_t file_no = 0; file_no < mappings.size(); file_no++) { for (uint32_t idx = 0; idx < mappings.size(); idx++) {
const auto & mmap_used = mmaps_used[file_no]; const auto & mmap_used = mmaps_used[idx];
auto & mapping = mappings.at(file_no); auto & mapping = mappings.at(idx);
mapping->unmap_fragment(0, mmap_used.first); mapping->unmap_fragment(0, mmap_used.first);
if (mmap_used.second != 0) { if (mmap_used.second != 0) {
mapping->unmap_fragment(mmap_used.second, mapping->size); mapping->unmap_fragment(mmap_used.second, mapping->size);
@ -5143,16 +5143,16 @@ static bool llm_load_tensors(
// this is important for metal with apple silicon: if the entire model could be mapped to a metal buffer, then we could just use metal for all layers // this is important for metal with apple silicon: if the entire model could be mapped to a metal buffer, then we could just use metal for all layers
// this allows using partial offloading when the model size exceeds the metal buffer size, but not the RAM size // this allows using partial offloading when the model size exceeds the metal buffer size, but not the RAM size
if (ml.use_mmap && buft == llama_default_buffer_type_cpu(true)) { if (ml.use_mmap && buft == llama_default_buffer_type_cpu(true)) {
for (uint32_t file_no = 0; file_no < ml.files.size(); file_no++) { for (uint32_t idx = 0; idx < ml.files.size(); idx++) {
void * addr = nullptr; void * addr = nullptr;
size_t first, last; size_t first, last;
ml.get_mapping_range(&first, &last, &addr, file_no, ctx); ml.get_mapping_range(&first, &last, &addr, idx, ctx);
if (first >= last) { if (first >= last) {
continue; continue;
} }
ggml_backend_buffer_t buf = ggml_backend_cpu_buffer_from_ptr((char *) addr + first, last - first); ggml_backend_buffer_t buf = ggml_backend_cpu_buffer_from_ptr((char *) addr + first, last - first);
if (buf != nullptr) { if (buf != nullptr) {
bufs.emplace(file_no, buf); bufs.emplace(idx, buf);
#ifdef GGML_USE_CUBLAS #ifdef GGML_USE_CUBLAS
if (n_layer >= n_gpu_layers) { if (n_layer >= n_gpu_layers) {
ggml_backend_cuda_register_host_buffer( ggml_backend_cuda_register_host_buffer(
@ -5165,17 +5165,17 @@ static bool llm_load_tensors(
} }
#ifdef GGML_USE_METAL #ifdef GGML_USE_METAL
else if (ml.use_mmap && buft == ggml_backend_metal_buffer_type()) { else if (ml.use_mmap && buft == ggml_backend_metal_buffer_type()) {
for (uint32_t file_no = 0; file_no < ml.files.size(); file_no++) { for (uint32_t idx = 0; idx < ml.files.size(); idx++) {
const size_t max_size = ggml_get_max_tensor_size(ctx); const size_t max_size = ggml_get_max_tensor_size(ctx);
void * addr = nullptr; void * addr = nullptr;
size_t first, last; size_t first, last;
ml.get_mapping_range(&first, &last, &addr, file_no, ctx); ml.get_mapping_range(&first, &last, &addr, idx, ctx);
if (first >= last) { if (first >= last) {
continue; continue;
} }
ggml_backend_buffer_t buf = ggml_backend_metal_buffer_from_ptr((char *) addr + first, last - first, max_size); ggml_backend_buffer_t buf = ggml_backend_metal_buffer_from_ptr((char *) addr + first, last - first, max_size);
if (buf != nullptr) { if (buf != nullptr) {
bufs.emplace(file_no, buf); bufs.emplace(idx, buf);
} }
} }
} }
@ -5189,8 +5189,8 @@ static bool llm_load_tensors(
mlock_buf->init(ggml_backend_buffer_get_base(buf)); mlock_buf->init(ggml_backend_buffer_get_base(buf));
mlock_buf->grow_to(ggml_backend_buffer_get_size(buf)); mlock_buf->grow_to(ggml_backend_buffer_get_size(buf));
} }
for (uint32_t file_no = 0; file_no < ml.files.size(); file_no++) { for (uint32_t idx = 0; idx < ml.files.size(); idx++) {
bufs.emplace(file_no, buf); bufs.emplace(idx, buf);
} }
} }
} }