llama : do not use KV cache for non-causal models
ggml-ci
This commit is contained in:
parent
d0347840c1
commit
eb42596277
3 changed files with 109 additions and 39 deletions
|
@ -13,7 +13,7 @@ async def main():
|
|||
model_url = "http://127.0.0.1:6900"
|
||||
responses: list[requests.Response] = await asyncio.gather(*[requests_post_async(
|
||||
url= f"{model_url}/embedding",
|
||||
json= {"content": str(i)*32}
|
||||
json= {"content": str(0)*32}
|
||||
) for i in range(n)])
|
||||
|
||||
for response in responses:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue