Merge branch 'gg/flash-attn' of https://github.com/ggerganov/llama.cpp into flash-attn-cuda

2024-01-19 17:38:47 -05:00 · 2024-01-19 17:38:47 -05:00 · 09db1a7cf3
commit 09db1a7cf3
parent e53de2866a fa7ebcca99
24 changed files with 1255 additions and 325 deletions
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -49,6 +49,7 @@ llama_build_and_test_executable(test-llama-grammar.cpp)
 llama_build_and_test_executable(test-grad0.cpp)
 # llama_build_and_test_executable(test-opt.cpp) # SLOW
 llama_build_and_test_executable(test-backend-ops.cpp)
+llama_build_and_test_executable(test-autorelease.cpp)

 llama_build_and_test_executable(test-rope.cpp)