diff --git a/examples/server/tests/features/results.feature b/examples/server/tests/features/results.feature index b3089f15c..e8e1b5414 100644 --- a/examples/server/tests/features/results.feature +++ b/examples/server/tests/features/results.feature @@ -27,7 +27,8 @@ Feature: Results Examples: | n_slots | | 1 | - | 2 | + # FIXME: unified KV cache nondeterminism + # | 2 | Scenario Outline: different results with different seed Given slots @@ -73,14 +74,13 @@ Feature: Results Examples: | n_parallel | temp | | 1 | 0.0 | - | 2 | 0.0 | - | 4 | 0.0 | | 1 | 1.0 | - # FIXME: These tests fail on master. - # Problems: unified KV cache (except for CPU backend with LLAMA_NO_LLAMAFILE=1), SIMD nondeterminism. + # FIXME: unified KV cache nondeterminism # See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227 # and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574 # and https://github.com/ggerganov/llama.cpp/pull/7347 . + # | 2 | 0.0 | + # | 4 | 0.0 | # | 2 | 1.0 | # | 4 | 1.0 | @@ -108,12 +108,11 @@ Feature: Results Examples: | n_slots | n_kv | n_predict | n_parallel | | 4 | 1024 | 1 | 1 | - | 4 | 1024 | 1 | 4 | - # FIXME: These tests fail on master. - # Problems: unified KV cache (except for CPU backend with LLAMA_NO_LLAMAFILE=1), SIMD nondeterminism. + # FIXME: unified KV cache nondeterminism # See https://github.com/ggerganov/whisper.cpp/issues/1941#issuecomment-1986923227 # and https://github.com/ggerganov/llama.cpp/pull/6122#discussion_r1531405574 # and https://github.com/ggerganov/llama.cpp/pull/7347 . + # | 4 | 1024 | 1 | 4 | # | 4 | 1024 | 100 | 1 | # This test still fails even the above patches; the first token probabilities are already different. # | 4 | 1024 | 100 | 4 |