falcon : minor changes (still chasing the Metal problem)
This commit is contained in:
parent
a0dc47a501
commit
e2d23bed1b
3 changed files with 51 additions and 52 deletions
|
@ -992,7 +992,9 @@ void ggml_metal_graph_compute(
|
||||||
[encoder setBytes:&nb2 length:sizeof(uint64_t) atIndex:16];
|
[encoder setBytes:&nb2 length:sizeof(uint64_t) atIndex:16];
|
||||||
[encoder setBytes:&nb3 length:sizeof(uint64_t) atIndex:17];
|
[encoder setBytes:&nb3 length:sizeof(uint64_t) atIndex:17];
|
||||||
[encoder setBytes:&m0 length:sizeof( float) atIndex:18];
|
[encoder setBytes:&m0 length:sizeof( float) atIndex:18];
|
||||||
|
|
||||||
const int nth = 32;
|
const int nth = 32;
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_ROPE:
|
case GGML_OP_ROPE:
|
||||||
|
|
15
llama.cpp
15
llama.cpp
|
@ -2545,26 +2545,23 @@ static struct ggml_cgraph * llm_build_falcon(
|
||||||
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3);
|
||||||
ggml_set_name(KQV_merged, "KQV_merged");
|
ggml_set_name(KQV_merged, "KQV_merged");
|
||||||
|
|
||||||
cur = ggml_cpy(ctx0,
|
cur = ggml_cpy(ctx0, KQV_merged, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
|
||||||
KQV_merged,
|
|
||||||
ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
|
|
||||||
ggml_set_name(cur, "KQV_merged_contiguous");
|
ggml_set_name(cur, "KQV_merged_contiguous");
|
||||||
|
|
||||||
cur = ggml_cpy(ctx0,
|
|
||||||
KQV_merged,
|
|
||||||
ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
|
|
||||||
|
|
||||||
cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur);
|
cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur);
|
||||||
ggml_set_name(cur, "result_wo");
|
ggml_set_name(cur, "result_wo");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * attn_out = cur;
|
||||||
|
|
||||||
|
// feed forward
|
||||||
|
{
|
||||||
struct ggml_tensor * inpFF = attn_norm;
|
struct ggml_tensor * inpFF = attn_norm;
|
||||||
struct ggml_tensor * attn_out = ggml_cpy(
|
|
||||||
ctx0, cur, ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N));
|
|
||||||
|
|
||||||
cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
|
cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
|
||||||
cur = ggml_gelu(ctx0, cur);
|
cur = ggml_gelu(ctx0, cur);
|
||||||
cur = ggml_mul_mat(ctx0, model.layers[il].w2, cur);
|
cur = ggml_mul_mat(ctx0, model.layers[il].w2, cur);
|
||||||
|
}
|
||||||
|
|
||||||
cur = ggml_add(ctx0, cur, attn_out);
|
cur = ggml_add(ctx0, cur, attn_out);
|
||||||
cur = ggml_add(ctx0, cur, inpL);
|
cur = ggml_add(ctx0, cur, inpL);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue