llama : avoid redundant state copy for Mamba 1 and 2

This commit is contained in:
Francis Couture-Harpin 2024-09-30 15:52:42 -04:00
parent 0e601cafe9
commit 273e7a495a
4 changed files with 142 additions and 119 deletions

View file

@ -1833,7 +1833,8 @@ extern "C" {
struct ggml_tensor * A,
struct ggml_tensor * B,
struct ggml_tensor * C,
struct ggml_tensor * D);
struct ggml_tensor * D,
struct ggml_tensor * ids);
// partition into non-overlapping windows with padding if needed
// example: