wip: fix mllama error

Signed-off-by: YiYing He <yiying@secondstate.io>
This commit is contained in:
YiYing He 2025-01-20 15:49:34 +08:00
parent 88c513f4c9
commit c0a71b1330
2 changed files with 10 additions and 5 deletions

View file

@ -242,7 +242,8 @@ void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor *
void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
// TODO: mllama will assert here.
// GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
if (backend->iface.get_tensor_async == NULL) {
ggml_backend_tensor_get(tensor, data, offset, size);
@ -276,7 +277,8 @@ void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, siz
GGML_ASSERT(buf != NULL && "tensor buffer not set");
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
// TODO: mllama will assert here.
// GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
buf->iface.get_tensor(buf, tensor, data, offset, size);
}

View file

@ -1570,16 +1570,19 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
} break;
case LLM_ARCH_MLLAMA:
{
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab+8}, 0);
// TODO: mllama should fix here.
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab+7}, 0);
// output
{
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_NOT_REQUIRED);
// TODO: mllama should fix here.
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab-1}, llama_model_loader::TENSOR_NOT_REQUIRED);
// if output is NULL, init from the input tok embed
if (output == NULL) {
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, llama_model_loader::TENSOR_DUPLICATED);
// TODO: mllama should fix here.
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab-1}, llama_model_loader::TENSOR_DUPLICATED);
}
}