llama : do not use KV cache for non-causal models

ggml-ci
2024-03-04 13:31:03 +02:00 · 2024-03-04 13:31:03 +02:00 · eb42596277
commit eb42596277
parent d0347840c1
3 changed files with 109 additions and 39 deletions
--- a/examples/server-embd.py
+++ b/examples/server-embd.py
@ -13,7 +13,7 @@ async def main():
    model_url = "http://127.0.0.1:6900"
    responses: list[requests.Response] = await asyncio.gather(*[requests_post_async(
        url= f"{model_url}/embedding",
-        json= {"content": str(i)*32}
+        json= {"content": str(0)*32}
    ) for i in range(n)])

    for response in responses: