metal : relax conditions on fast matrix multiplication kernel (#3168)
* metal : relax conditions on fast matrix multiplication kernel * metal : revert the concurrnecy change because it was wrong * llama : remove experimental stuff
This commit is contained in:
parent
76164fe2e6
commit
a51b687657
4 changed files with 100 additions and 51 deletions
20
ggml.c
20
ggml.c
|
@ -4303,10 +4303,21 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) {
|
|||
}
|
||||
|
||||
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
||||
size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type);
|
||||
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
|
||||
nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
|
||||
size_t nbytes;
|
||||
size_t blck_size = ggml_blck_size(tensor->type);
|
||||
if (blck_size == 1) {
|
||||
nbytes = ggml_type_size(tensor->type);
|
||||
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
||||
nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
|
||||
}
|
||||
}
|
||||
else {
|
||||
nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
|
||||
for (int i = 1; i < GGML_MAX_DIMS; ++i) {
|
||||
nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
|
||||
}
|
||||
}
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
|
@ -18340,7 +18351,8 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|||
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
||||
i,
|
||||
node->ne[0], node->ne[1],
|
||||
ggml_op_name(node->op));
|
||||
ggml_op_name(node->op),
|
||||
ggml_get_name(node));
|
||||
}
|
||||
|
||||
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue