Detect multi-byte responses and wait

This commit is contained in:
Mug 2023-04-28 12:50:30 +02:00 committed by Don Mahurin
parent 36b3494332
commit 441d30811a

View file

@ -96,7 +96,7 @@ specified) expect poor results""", file=sys.stderr)
print(file=sys.stderr) print(file=sys.stderr)
print(f"system_info: n_threads = {self.params.n_threads} / {cpu_count()} \ print(f"system_info: n_threads = {self.params.n_threads} / {cpu_count()} \
| {llama_cpp.llama_print_system_info().decode('utf8', errors='ignore')}", file=sys.stderr) | {llama_cpp.llama_print_system_info().decode('utf8')}", file=sys.stderr)
# determine the required inference memory per token: # determine the required inference memory per token:
if (self.params.mem_test): if (self.params.mem_test):