Introduce C-style API (#370)
* Major refactoring - introduce C-style API * Clean up * Add <cassert> * Add <iterator> * Add <algorithm> .... * Fix timing reporting and accumulation * Measure eval time only for single-token calls * Change llama_tokenize return meaning
This commit is contained in:
parent
da0e9fe90c
commit
f5a77a629b
14 changed files with 1954 additions and 1752 deletions
|
@ -148,7 +148,7 @@ def main():
|
|||
model = torch.load(fname_model, map_location="cpu")
|
||||
|
||||
with open(fname_out, "wb") as fout:
|
||||
fout.write(struct.pack("i", hparams["vocab_size"]))
|
||||
write_header(fout, hparams, ftype)
|
||||
write_tokens(fout, tokenizer)
|
||||
|
||||
del model
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue