add GGML_ROPE_TYPE_MROPE
, GGML_ROPE_TYPE_VISION
This commit is contained in:
parent
201f7043c3
commit
f1fa60f84c
6 changed files with 39 additions and 18 deletions
|
@ -761,7 +761,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
if (ctx->has_qwen2vl_merger) {
|
if (ctx->has_qwen2vl_merger) {
|
||||||
Q = ggml_mrope_ext(
|
Q = ggml_mrope_ext(
|
||||||
ctx0, Q, positions, nullptr,
|
ctx0, Q, positions, nullptr,
|
||||||
d_head/2, mrope_sections, 2 /*LLAMA_ROPE_TYPE_NEOX8*/, 32768, 10000, 1, 0, 1, 32, 1);
|
d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1);
|
||||||
}
|
}
|
||||||
Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head));
|
Q = ggml_scale_inplace(ctx0, Q, 1.0f / sqrt((float)d_head));
|
||||||
Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3));
|
Q = ggml_cont(ctx0, ggml_permute(ctx0, Q, 0, 2, 1, 3));
|
||||||
|
@ -774,7 +774,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
if (ctx->has_qwen2vl_merger) {
|
if (ctx->has_qwen2vl_merger) {
|
||||||
K = ggml_mrope_ext(
|
K = ggml_mrope_ext(
|
||||||
ctx0, K, positions, nullptr,
|
ctx0, K, positions, nullptr,
|
||||||
d_head/2, mrope_sections, 2 /*LLAMA_ROPE_TYPE_NEOX8*/, 32768, 10000, 1, 0, 1, 32, 1);
|
d_head/2, mrope_sections, GGML_ROPE_TYPE_VISION, 32768, 10000, 1, 0, 1, 32, 1);
|
||||||
}
|
}
|
||||||
K = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3));
|
K = ggml_cont(ctx0, ggml_permute(ctx0, K, 0, 2, 1, 3));
|
||||||
K = ggml_reshape_3d(ctx0, K, d_head, num_positions, n_head * batch_size);
|
K = ggml_reshape_3d(ctx0, K, d_head, num_positions, n_head * batch_size);
|
||||||
|
@ -1301,8 +1301,12 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
idx = get_key_idx(ctx, KEY_USE_GELU);
|
idx = get_key_idx(ctx, KEY_USE_GELU);
|
||||||
new_clip->use_gelu = gguf_get_val_bool(ctx, idx);
|
new_clip->use_gelu = gguf_get_val_bool(ctx, idx);
|
||||||
|
|
||||||
idx = get_key_idx(ctx, KEY_USE_SILU);
|
try {
|
||||||
new_clip->use_silu = gguf_get_val_bool(ctx, idx);
|
idx = get_key_idx(ctx, KEY_USE_SILU);
|
||||||
|
new_clip->use_silu = gguf_get_val_bool(ctx, idx);
|
||||||
|
} catch (std::runtime_error & /*e*/) {
|
||||||
|
new_clip->use_silu = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (verbosity >= 1) {
|
if (verbosity >= 1) {
|
||||||
LOG_INF("%s: text_encoder: %d\n", __func__, new_clip->has_text_encoder);
|
LOG_INF("%s: text_encoder: %d\n", __func__, new_clip->has_text_encoder);
|
||||||
|
|
|
@ -237,7 +237,9 @@
|
||||||
#define GGML_EXIT_SUCCESS 0
|
#define GGML_EXIT_SUCCESS 0
|
||||||
#define GGML_EXIT_ABORTED 1
|
#define GGML_EXIT_ABORTED 1
|
||||||
|
|
||||||
#define GGML_ROPE_TYPE_NEOX 2
|
#define GGML_ROPE_TYPE_NEOX 2
|
||||||
|
#define GGML_ROPE_TYPE_MROPE 4
|
||||||
|
#define GGML_ROPE_TYPE_VISION 12
|
||||||
|
|
||||||
#define GGUF_MAGIC "GGUF"
|
#define GGUF_MAGIC "GGUF"
|
||||||
|
|
||||||
|
|
|
@ -414,11 +414,15 @@ void ggml_cuda_op_rope(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
|
||||||
memcpy(&attn_factor, (int32_t *) dst->op_params + 8, sizeof(float));
|
memcpy(&attn_factor, (int32_t *) dst->op_params + 8, sizeof(float));
|
||||||
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
|
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
|
||||||
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));
|
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));
|
||||||
memcpy(§ions.v, (int32_t *) dst->op_params + 11, sizeof(int)*4);
|
memcpy(§ions.v, (int32_t *) dst->op_params + 11, sizeof(int)*4);
|
||||||
|
|
||||||
const bool is_mrope = sections.v[0] > 0 || sections.v[1] > 0 || sections.v[2] > 0;
|
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
|
||||||
const bool is_vision = is_mrope && sections.v[3] > 0;
|
const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
|
||||||
const bool is_neox = (mode & GGML_ROPE_TYPE_NEOX) & !(is_mrope || is_vision); // TODO: fix this with new rope type
|
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
|
||||||
|
|
||||||
|
if (is_mrope) {
|
||||||
|
GGML_ASSERT(sections.v[0] > 0 || sections.v[1] > 0 || sections.v[2] > 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_vision) {
|
if (is_vision) {
|
||||||
GGML_ASSERT(n_dims == ne00/2);
|
GGML_ASSERT(n_dims == ne00/2);
|
||||||
|
|
|
@ -11359,8 +11359,12 @@ static void ggml_compute_forward_rope_f32(
|
||||||
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
|
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
|
||||||
|
|
||||||
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
|
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
|
||||||
const bool is_mrope = sections[0] > 0 || sections[1] > 0 || sections[2] > 0;
|
const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
|
||||||
const bool is_vision = is_mrope && sections[3] > 0;
|
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
|
||||||
|
|
||||||
|
if (is_mrope) {
|
||||||
|
GGML_ASSERT(sections[0] > 0 || sections[1] > 0 || sections[2] > 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_vision) {
|
if (is_vision) {
|
||||||
GGML_ASSERT(n_dims == ne0/2);
|
GGML_ASSERT(n_dims == ne0/2);
|
||||||
|
@ -11539,8 +11543,12 @@ static void ggml_compute_forward_rope_f16(
|
||||||
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
|
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
|
||||||
|
|
||||||
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
|
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
|
||||||
const bool is_mrope = sections[0] > 0 || sections[1] > 0 || sections[2] > 0;
|
const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE;
|
||||||
const bool is_vision = is_mrope && sections[3] > 0;
|
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
|
||||||
|
|
||||||
|
if (is_mrope) {
|
||||||
|
GGML_ASSERT(sections[0] > 0 || sections[1] > 0 || sections[2] > 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_vision) {
|
if (is_vision) {
|
||||||
GGML_ASSERT(n_dims == ne0/2);
|
GGML_ASSERT(n_dims == ne0/2);
|
||||||
|
@ -11562,7 +11570,6 @@ static void ggml_compute_forward_rope_f16(
|
||||||
|
|
||||||
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
||||||
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
||||||
const int64_t p = pos[i2];
|
|
||||||
|
|
||||||
float * cache = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32)*ith;
|
float * cache = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32)*ith;
|
||||||
if (!is_mrope) {
|
if (!is_mrope) {
|
||||||
|
|
|
@ -107,9 +107,11 @@ extern "C" {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum llama_rope_type {
|
enum llama_rope_type {
|
||||||
LLAMA_ROPE_TYPE_NONE = -1,
|
LLAMA_ROPE_TYPE_NONE = -1,
|
||||||
LLAMA_ROPE_TYPE_NORM = 0,
|
LLAMA_ROPE_TYPE_NORM = 0,
|
||||||
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
|
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
|
||||||
|
LLAMA_ROPE_TYPE_MROPE = GGML_ROPE_TYPE_MROPE,
|
||||||
|
LLAMA_ROPE_TYPE_VISION = GGML_ROPE_TYPE_VISION,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
|
enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
|
||||||
|
|
|
@ -20237,7 +20237,6 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||||
case LLM_ARCH_BITNET:
|
case LLM_ARCH_BITNET:
|
||||||
case LLM_ARCH_QWEN:
|
case LLM_ARCH_QWEN:
|
||||||
case LLM_ARCH_QWEN2:
|
case LLM_ARCH_QWEN2:
|
||||||
case LLM_ARCH_QWEN2VL:
|
|
||||||
case LLM_ARCH_QWEN2MOE:
|
case LLM_ARCH_QWEN2MOE:
|
||||||
case LLM_ARCH_OLMO2:
|
case LLM_ARCH_OLMO2:
|
||||||
case LLM_ARCH_OLMOE:
|
case LLM_ARCH_OLMOE:
|
||||||
|
@ -20254,6 +20253,9 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||||
case LLM_ARCH_MINICPM3:
|
case LLM_ARCH_MINICPM3:
|
||||||
return LLAMA_ROPE_TYPE_NEOX;
|
return LLAMA_ROPE_TYPE_NEOX;
|
||||||
|
|
||||||
|
case LLM_ARCH_QWEN2VL:
|
||||||
|
return LLAMA_ROPE_TYPE_MROPE;
|
||||||
|
|
||||||
// all model arches should be listed explicitly here
|
// all model arches should be listed explicitly here
|
||||||
case LLM_ARCH_UNKNOWN:
|
case LLM_ARCH_UNKNOWN:
|
||||||
GGML_ABORT("unknown architecture");
|
GGML_ABORT("unknown architecture");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue