ggml : use ggml_row_size where possible (#4472)
* ggml : use ggml_row_size where possible ggml-ci * ggml : move ggml_nbytes_split to ggml-cuda.cu
This commit is contained in:
parent
cafcd4f895
commit
6744dbe924
5 changed files with 24 additions and 26 deletions
12
ggml-cuda.cu
12
ggml-cuda.cu
|
@ -8898,6 +8898,12 @@ static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, gg
|
|||
(void) dst;
|
||||
}
|
||||
|
||||
static size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return nrows_split*ggml_row_size(tensor->type, tensor->ne[0]);
|
||||
}
|
||||
|
||||
void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
|
||||
const int64_t nrows = ggml_nrows(tensor);
|
||||
|
||||
|
@ -8947,8 +8953,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) {
|
|||
|
||||
// pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses
|
||||
if (ne0 % MATRIX_ROW_PADDING != 0) {
|
||||
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING)
|
||||
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
|
||||
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
|
||||
}
|
||||
|
||||
char * buf;
|
||||
|
@ -9485,8 +9490,7 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t
|
|||
|
||||
if (ggml_is_quantized(tensor->type)) {
|
||||
if (ne0 % MATRIX_ROW_PADDING != 0) {
|
||||
size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING)
|
||||
* ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
|
||||
size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue