Compare commits

...
Sign in to create a new pull request.

3 commits

Author SHA1 Message Date
Georgi Gerganov
cb79f8a2d8
llama : add SKIP_KQ_KQV option 2023-10-22 09:58:29 +03:00
Georgi Gerganov
ed9fde7a1e
ggml : skip nops 2023-10-22 09:55:37 +03:00
Georgi Gerganov
2471d56a2e
llama : profiling the attention compute 2023-10-22 09:22:54 +03:00
2 changed files with 31 additions and 0 deletions

4
ggml.c
View file

@ -16602,6 +16602,10 @@ static void ggml_compute_forward_cross_entropy_loss_back(
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
GGML_ASSERT(params);
if (tensor->op == GGML_OP_NONE) {
return;
}
#ifdef GGML_USE_CUBLAS
bool skip_cpu = ggml_cuda_compute_forward(params, tensor);
if (skip_cpu) {

View file

@ -5815,6 +5815,33 @@ static struct ggml_cgraph * llama_build_graph(
GGML_ASSERT(false);
}
#if 1
for (int i = 0; i < result->n_nodes; ++i) {
struct ggml_tensor * node = result->nodes[i];
if (getenv("SKIP_KQ_ALL")) {
if (
strcmp(node->name, "KQ") == 0 ||
strcmp(node->name, "KQ_scaled") == 0 ||
strcmp(node->name, "KQ_masked") == 0 ||
strcmp(node->name, "KQ_soft_max") == 0 ||
strcmp(node->name, "KQV") == 0 ||
false) {
//printf("skipping %s\n", dst->name);
node->op = GGML_OP_NONE;
}
}
if (getenv("SKIP_KQ_KQV")) {
if (
strcmp(node->name, "KQ") == 0 ||
strcmp(node->name, "KQV") == 0 ||
false) {
//printf("skipping %s\n", dst->name);
node->op = GGML_OP_NONE;
}
}
}
#endif
return result;
}