Formatting changes
This commit is contained in:
parent
e6bf87f785
commit
cd4d3df820
3 changed files with 9 additions and 11 deletions
|
@ -870,8 +870,8 @@ void ggml_metal_graph_compute(
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_MUL:
|
case GGML_OP_MUL:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(ggml_is_contiguous(src1));
|
|
||||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||||
|
GGML_ASSERT(ggml_is_contiguous(src1));
|
||||||
|
|
||||||
// utilize float4
|
// utilize float4
|
||||||
GGML_ASSERT(ne00 % 4 == 0);
|
GGML_ASSERT(ne00 % 4 == 0);
|
||||||
|
@ -952,9 +952,8 @@ void ggml_metal_graph_compute(
|
||||||
|
|
||||||
[encoder setComputePipelineState:ctx->pipeline_sqr];
|
[encoder setComputePipelineState:ctx->pipeline_sqr];
|
||||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
|
||||||
|
|
||||||
//GGML_METAL_LOG_WARN("%s: node %3d, op = %8s dispatching \n", __func__, i, ggml_op_name(dst->op));
|
|
||||||
const int64_t n = ggml_nelements(dst);
|
const int64_t n = ggml_nelements(dst);
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
|
|
|
@ -85,7 +85,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
GPTNEOX : int = auto()
|
GPTNEOX : int = auto()
|
||||||
MPT : int = auto()
|
MPT : int = auto()
|
||||||
STARCODER : int = auto()
|
STARCODER : int = auto()
|
||||||
PERSIMMON : int = auto()
|
PERSIMMON : int = auto()
|
||||||
|
|
||||||
|
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
|
@ -190,7 +190,7 @@ MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = {
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||||
},
|
},
|
||||||
MODEL_ARCH.PERSIMMON: {
|
MODEL_ARCH.PERSIMMON: {
|
||||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
||||||
MODEL_TENSOR.OUTPUT: "output",
|
MODEL_TENSOR.OUTPUT: "output",
|
||||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
||||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
||||||
|
|
11
llama.cpp
11
llama.cpp
|
@ -2473,7 +2473,7 @@ static void llm_load_tensors(
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_PERSIMMON:
|
case LLM_ARCH_PERSIMMON:
|
||||||
{
|
{
|
||||||
model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
|
model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
|
||||||
model.output_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, GGML_BACKEND_CPU);
|
model.output_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, GGML_BACKEND_CPU);
|
||||||
model.output_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, GGML_BACKEND_CPU);
|
model.output_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, GGML_BACKEND_CPU);
|
||||||
model.output = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
|
model.output = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
|
||||||
|
@ -2612,8 +2612,8 @@ static bool llama_model_load(
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_cgraph * llm_build_llama(
|
static struct ggml_cgraph * llm_build_llama(
|
||||||
llama_context & lctx,
|
llama_context & lctx,
|
||||||
const llama_batch & batch) {
|
const llama_batch & batch) {
|
||||||
const auto & model = lctx.model;
|
const auto & model = lctx.model;
|
||||||
const auto & hparams = model.hparams;
|
const auto & hparams = model.hparams;
|
||||||
const auto & cparams = lctx.cparams;
|
const auto & cparams = lctx.cparams;
|
||||||
|
@ -4016,9 +4016,8 @@ static struct ggml_cgraph * llm_build_starcoder(
|
||||||
|
|
||||||
|
|
||||||
static struct ggml_cgraph * llm_build_persimmon(
|
static struct ggml_cgraph * llm_build_persimmon(
|
||||||
llama_context & lctx,
|
llama_context & lctx,
|
||||||
const llama_batch & batch
|
const llama_batch & batch) {
|
||||||
) {
|
|
||||||
const auto & model = lctx.model;
|
const auto & model = lctx.model;
|
||||||
const auto & hparams = model.hparams;
|
const auto & hparams = model.hparams;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue