ggml : deprecate ggml_alibi
This commit is contained in:
parent
a0f8a93bf1
commit
0fe2d56001
3 changed files with 26 additions and 5 deletions
|
@ -809,7 +809,7 @@ static bool ggml_metal_graph_compute(
|
||||||
|
|
||||||
id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
|
id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
|
||||||
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
|
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
|
||||||
id<MTLBuffer> id_src2 = src2 ? ggml_metal_get_buffer(src2, &offs_src2) : nil;
|
//id<MTLBuffer> id_src2 = src2 ? ggml_metal_get_buffer(src2, &offs_src2) : nil;
|
||||||
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;
|
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;
|
||||||
|
|
||||||
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
|
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
|
||||||
|
|
5
ggml.h
5
ggml.h
|
@ -1483,12 +1483,13 @@ extern "C" {
|
||||||
|
|
||||||
// alibi position embedding
|
// alibi position embedding
|
||||||
// in-place, returns view(a)
|
// in-place, returns view(a)
|
||||||
GGML_API struct ggml_tensor * ggml_alibi(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
int n_past,
|
int n_past,
|
||||||
int n_head,
|
int n_head,
|
||||||
float bias_max);
|
float bias_max),
|
||||||
|
"use ggml_soft_max_ext instead");
|
||||||
|
|
||||||
// clamp
|
// clamp
|
||||||
// in-place, returns view(a)
|
// in-place, returns view(a)
|
||||||
|
|
20
llama.cpp
20
llama.cpp
|
@ -4814,8 +4814,28 @@ static struct ggml_tensor * llm_build_kqv(
|
||||||
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
|
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(GGML_USE_VULKAN) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_SYCL)
|
||||||
|
#pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, Kompute, and SYCL")
|
||||||
|
#pragma message(" Falling back to ggml_alibi(). Will become and error in Mar 2024")
|
||||||
|
#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488")
|
||||||
|
if (hparams.f_max_alibi_bias > 0.0f) {
|
||||||
|
kq = ggml_scale(ctx, kq, kq_scale);
|
||||||
|
cb(kq, "kq_scaled", il);
|
||||||
|
|
||||||
|
kq = ggml_alibi(ctx, kq, /*n_past*/ 0, n_head, hparams.f_max_alibi_bias);
|
||||||
|
cb(kq, "kq_scaled_alibi", il);
|
||||||
|
|
||||||
|
kq = ggml_add(ctx, kq, kq_mask);
|
||||||
|
cb(kq, "kq_masked", il);
|
||||||
|
|
||||||
|
kq = ggml_soft_max(ctx, kq);
|
||||||
|
cb(kq, "kq_soft_max", il);
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
kq = ggml_soft_max_ext(ctx, kq, kq_mask, kq_scale, hparams.f_max_alibi_bias);
|
kq = ggml_soft_max_ext(ctx, kq, kq_mask, kq_scale, hparams.f_max_alibi_bias);
|
||||||
cb(kq, "kq_soft_max_ext", il);
|
cb(kq, "kq_soft_max_ext", il);
|
||||||
|
}
|
||||||
|
|
||||||
// split cached v into n_head heads
|
// split cached v into n_head heads
|
||||||
struct ggml_tensor * v =
|
struct ggml_tensor * v =
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue