llama : advanced batch splits
This includes equal-sequence-length batch splits which are useful to simplify recurrent model operators. * llama : always make recurrent state slots contiguous * ggml : simplify mamba operators
This commit is contained in:
parent
a38b884c6c
commit
c51daefc32
3 changed files with 1056 additions and 643 deletions
|
@ -1760,10 +1760,8 @@ extern "C" {
|
|||
|
||||
GGML_API struct ggml_tensor * ggml_ssm_conv(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * s,
|
||||
struct ggml_tensor * x,
|
||||
struct ggml_tensor * c,
|
||||
struct ggml_tensor * sq);
|
||||
struct ggml_tensor * sx,
|
||||
struct ggml_tensor * c);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_ssm_scan(
|
||||
struct ggml_context * ctx,
|
||||
|
@ -1772,8 +1770,7 @@ extern "C" {
|
|||
struct ggml_tensor * dt,
|
||||
struct ggml_tensor * A,
|
||||
struct ggml_tensor * B,
|
||||
struct ggml_tensor * C,
|
||||
struct ggml_tensor * sq);
|
||||
struct ggml_tensor * C);
|
||||
|
||||
// partition into non-overlapping windows with padding if needed
|
||||
// example:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue