llama : avoid ggml_cast, use F32 query

This commit is contained in:
Georgi Gerganov 2024-01-25 17:46:07 +02:00
parent 40ea8cd1ac
commit f9ca5dcbe8
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
6 changed files with 44 additions and 17 deletions

4
ggml.h
View file

@ -1633,6 +1633,10 @@ extern "C" {
struct ggml_tensor * mask,
float scale);
GGML_API void ggml_flash_attn_ext_set_prec(
struct ggml_tensor * a,
enum ggml_prec prec);
GGML_API struct ggml_tensor * ggml_flash_attn_back(
struct ggml_context * ctx,
struct ggml_tensor * q,