Merge branch 'master' of https://github.com/ggerganov/llama.cpp into ntkv2
This commit is contained in:
commit
bc8395d5c4
28 changed files with 4088 additions and 3002 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -10,6 +10,7 @@
|
||||||
*.gcno
|
*.gcno
|
||||||
*.gcda
|
*.gcda
|
||||||
*.dot
|
*.dot
|
||||||
|
*.bat
|
||||||
*.metallib
|
*.metallib
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.build/
|
.build/
|
||||||
|
|
9
Makefile
9
Makefile
|
@ -391,9 +391,12 @@ else
|
||||||
endif #LLAMA_CUDA_NVCC
|
endif #LLAMA_CUDA_NVCC
|
||||||
ifdef CUDA_DOCKER_ARCH
|
ifdef CUDA_DOCKER_ARCH
|
||||||
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
|
||||||
|
endif # CUDA_DOCKER_ARCH
|
||||||
|
ifdef CUDA_NATIVE_ARCH
|
||||||
|
NVCCFLAGS += -arch=$(CUDA_NATIVE_ARCH)
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -arch=native
|
NVCCFLAGS += -arch=native
|
||||||
endif # CUDA_DOCKER_ARCH
|
endif # CUDA_NATIVE_ARCH
|
||||||
ifdef LLAMA_CUDA_FORCE_DMMV
|
ifdef LLAMA_CUDA_FORCE_DMMV
|
||||||
NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
|
NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
|
||||||
endif # LLAMA_CUDA_FORCE_DMMV
|
endif # LLAMA_CUDA_FORCE_DMMV
|
||||||
|
@ -605,8 +608,8 @@ embedding: examples/embedding/embedding.cpp build-info.h ggml.
|
||||||
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h build-info.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) -Wno-cast-qual
|
||||||
|
|
||||||
gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
|
gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
|
@ -101,7 +101,7 @@ as the main playground for developing new features for the [ggml](https://github
|
||||||
|
|
||||||
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
|
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
|
||||||
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
|
||||||
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp), [hlhr202/llama-node](https://github.com/hlhr202/llama-node)
|
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)
|
||||||
- Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
|
- Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb)
|
||||||
- Rust: [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
|
- Rust: [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
|
||||||
- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp)
|
- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp)
|
||||||
|
|
|
@ -131,6 +131,7 @@ pub fn build(b: *std.build.Builder) !void {
|
||||||
const sampling = make.obj("sampling", "common/sampling.cpp");
|
const sampling = make.obj("sampling", "common/sampling.cpp");
|
||||||
const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
|
const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
|
||||||
const train = make.obj("train", "common/train.cpp");
|
const train = make.obj("train", "common/train.cpp");
|
||||||
|
const clip = make.obj("clip", "examples/llava/clip.cpp");
|
||||||
|
|
||||||
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, sampling, console, grammar_parser });
|
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, sampling, console, grammar_parser });
|
||||||
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common });
|
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common });
|
||||||
|
@ -139,7 +140,7 @@ pub fn build(b: *std.build.Builder) !void {
|
||||||
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, train });
|
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, train });
|
||||||
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, train });
|
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, train });
|
||||||
|
|
||||||
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, sampling, grammar_parser });
|
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, llama, common, sampling, grammar_parser, clip });
|
||||||
if (server.target.isWindows()) {
|
if (server.target.isWindows()) {
|
||||||
server.linkSystemLibrary("ws2_32");
|
server.linkSystemLibrary("ws2_32");
|
||||||
}
|
}
|
||||||
|
|
|
@ -672,6 +672,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
process_escapes(params.prompt);
|
process_escapes(params.prompt);
|
||||||
process_escapes(params.input_prefix);
|
process_escapes(params.input_prefix);
|
||||||
process_escapes(params.input_suffix);
|
process_escapes(params.input_suffix);
|
||||||
|
process_escapes(sparams.cfg_negative_prompt);
|
||||||
for (auto & antiprompt : params.antiprompt) {
|
for (auto & antiprompt : params.antiprompt) {
|
||||||
process_escapes(antiprompt);
|
process_escapes(antiprompt);
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,7 +110,7 @@ print("gguf: loading model "+dir_model.name)
|
||||||
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
||||||
hparams = json.load(f)
|
hparams = json.load(f)
|
||||||
print("hello print: ",hparams["architectures"][0])
|
print("hello print: ",hparams["architectures"][0])
|
||||||
if hparams["architectures"][0] != "BaichuanForCausalLM":
|
if hparams["architectures"][0] != "BaichuanForCausalLM" and hparams["architectures"][0] != "BaiChuanForCausalLM":
|
||||||
print("Model architecture not supported: " + hparams["architectures"][0])
|
print("Model architecture not supported: " + hparams["architectures"][0])
|
||||||
|
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
@ -231,7 +231,7 @@ gguf_writer.add_token_list(tokens)
|
||||||
gguf_writer.add_token_scores(scores)
|
gguf_writer.add_token_scores(scores)
|
||||||
gguf_writer.add_token_types(toktypes)
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model)
|
special_vocab = gguf.SpecialVocab(dir_model, n_vocab = len(tokens))
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
|
@ -129,7 +129,7 @@ gguf_writer.add_token_list(tokens)
|
||||||
gguf_writer.add_token_scores(scores)
|
gguf_writer.add_token_scores(scores)
|
||||||
gguf_writer.add_token_types(toktypes)
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges=True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges=True, n_vocab = len(tokens))
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
|
@ -152,7 +152,7 @@ gguf_writer.add_token_list(tokens)
|
||||||
gguf_writer.add_token_scores(scores)
|
gguf_writer.add_token_scores(scores)
|
||||||
gguf_writer.add_token_types(toktypes)
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True, n_vocab = len(tokens))
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
|
@ -134,7 +134,7 @@ gguf_writer.add_token_list(tokens)
|
||||||
gguf_writer.add_token_scores(scores)
|
gguf_writer.add_token_scores(scores)
|
||||||
gguf_writer.add_token_types(toktypes)
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True, n_vocab = len(tokens))
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
|
@ -388,7 +388,9 @@ def handle_metadata(cfg, hp):
|
||||||
cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir,
|
cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir,
|
||||||
cfg.vocabtype )
|
cfg.vocabtype )
|
||||||
# FIXME: Respect cfg.vocab_dir?
|
# FIXME: Respect cfg.vocab_dir?
|
||||||
svocab = gguf.SpecialVocab(cfg.model_metadata_dir)
|
svocab = gguf.SpecialVocab(cfg.model_metadata_dir,
|
||||||
|
load_merges = cfg.vocabtype == 'bpe',
|
||||||
|
n_vocab = vocab.vocab_size)
|
||||||
convert.check_vocab_size(params, vocab)
|
convert.check_vocab_size(params, vocab)
|
||||||
return (params, vocab, svocab)
|
return (params, vocab, svocab)
|
||||||
|
|
||||||
|
|
|
@ -128,18 +128,25 @@ vocab_size = hparams["vocab_size"]
|
||||||
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||||
|
|
||||||
|
added_vocab = tokenizer.get_added_vocab()
|
||||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||||
|
|
||||||
for i in range(vocab_size):
|
for i in range(vocab_size):
|
||||||
tokens.append(reverse_vocab[i] if i in reverse_vocab else f"[PAD{i}]")
|
if i not in reverse_vocab:
|
||||||
scores.append(0.0) # dummy
|
tokens.append(f"[PAD{i}]")
|
||||||
toktypes.append(gguf.TokenType.NORMAL)
|
toktypes.append(gguf.TokenType.USER_DEFINED)
|
||||||
|
elif reverse_vocab[i] in added_vocab:
|
||||||
|
# NOTE: wouldn't we like to distinguish CONTROL tokens here?
|
||||||
|
tokens.append(reverse_vocab[i])
|
||||||
|
toktypes.append(gguf.TokenType.USER_DEFINED)
|
||||||
|
else:
|
||||||
|
tokens.append(reverse_vocab[i])
|
||||||
|
toktypes.append(gguf.TokenType.NORMAL)
|
||||||
|
|
||||||
gguf_writer.add_token_list(tokens)
|
gguf_writer.add_token_list(tokens)
|
||||||
gguf_writer.add_token_scores(scores)
|
|
||||||
gguf_writer.add_token_types(toktypes)
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True, n_vocab = len(tokens))
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
|
@ -150,7 +150,7 @@ gguf_writer.add_token_list(tokens)
|
||||||
gguf_writer.add_token_scores(scores)
|
gguf_writer.add_token_scores(scores)
|
||||||
gguf_writer.add_token_types(toktypes)
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges=True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges=True, n_vocab = len(tokens))
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
|
@ -122,7 +122,7 @@ gguf_writer.add_token_list(tokens)
|
||||||
gguf_writer.add_token_scores(scores)
|
gguf_writer.add_token_scores(scores)
|
||||||
gguf_writer.add_token_types(toktypes)
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True, n_vocab = len(tokens))
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
13
convert.py
13
convert.py
|
@ -360,7 +360,7 @@ class SentencePieceVocab:
|
||||||
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
|
||||||
actual_ids = sorted(added_tokens.values())
|
actual_ids = sorted(added_tokens.values())
|
||||||
if expected_ids != actual_ids:
|
if expected_ids != actual_ids:
|
||||||
raise Exception(f"Expected added token IDs to be sequential and start at {len(added_tokens)}; got {actual_ids}")
|
raise Exception(f"Expected added token IDs to be sequential and start at {vocab_size}; got {actual_ids}")
|
||||||
|
|
||||||
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
|
||||||
self.added_tokens_list = [text for (text, idx) in items]
|
self.added_tokens_list = [text for (text, idx) in items]
|
||||||
|
@ -1162,10 +1162,13 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
|
|
||||||
vocab: Vocab
|
vocab: Vocab
|
||||||
if args.vocab_only:
|
if args.vocab_only:
|
||||||
assert args.outfile, "need --outfile if using --vocab-only"
|
if not args.outfile:
|
||||||
|
raise ValueError("need --outfile if using --vocab-only")
|
||||||
# FIXME: Try to respect vocab_dir somehow?
|
# FIXME: Try to respect vocab_dir somehow?
|
||||||
vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
|
vocab = load_vocab(args.vocab_dir or args.model, args.vocabtype)
|
||||||
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent, load_merges = args.vocabtype == 'bpe')
|
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
|
||||||
|
load_merges = args.vocabtype == 'bpe',
|
||||||
|
n_vocab = vocab.vocab_size)
|
||||||
outfile = args.outfile
|
outfile = args.outfile
|
||||||
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab)
|
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab)
|
||||||
print(f"Wrote {outfile}")
|
print(f"Wrote {outfile}")
|
||||||
|
@ -1177,7 +1180,9 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
|
vocab_dir = args.vocab_dir if args.vocab_dir else model_plus.paths[0].parent
|
||||||
vocab = load_vocab(vocab_dir, args.vocabtype)
|
vocab = load_vocab(vocab_dir, args.vocabtype)
|
||||||
# FIXME: Try to respect vocab_dir somehow?
|
# FIXME: Try to respect vocab_dir somehow?
|
||||||
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent, load_merges = args.vocabtype == 'bpe')
|
special_vocab = gguf.SpecialVocab(model_plus.paths[0].parent,
|
||||||
|
load_merges = args.vocabtype == 'bpe',
|
||||||
|
n_vocab = vocab.vocab_size)
|
||||||
|
|
||||||
model = model_plus.model
|
model = model_plus.model
|
||||||
model = convert_model_names(model, params)
|
model = convert_model_names(model, params)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
set(TARGET clip)
|
set(TARGET clip)
|
||||||
add_library(${TARGET} clip.cpp clip.h)
|
add_library(${TARGET} clip.cpp clip.h)
|
||||||
install(TARGETS ${TARGET} LIBRARY)
|
install(TARGETS ${TARGET} LIBRARY)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common ggml ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
target_compile_options(${TARGET} PRIVATE -Wno-cast-qual) # stb_image.h
|
||||||
|
|
|
@ -610,8 +610,8 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
int idx_mean = get_key_idx(ctx, KEY_IMAGE_MEAN);
|
int idx_mean = get_key_idx(ctx, KEY_IMAGE_MEAN);
|
||||||
int idx_std = get_key_idx(ctx, KEY_IMAGE_STD);
|
int idx_std = get_key_idx(ctx, KEY_IMAGE_STD);
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
new_clip->image_mean[i] = *((float *)gguf_get_arr_data(ctx, idx_mean));
|
new_clip->image_mean[i] = *((const float *)gguf_get_arr_data(ctx, idx_mean));
|
||||||
new_clip->image_std[i] = *((float *)gguf_get_arr_data(ctx, idx_std));
|
new_clip->image_std[i] = *((const float *)gguf_get_arr_data(ctx, idx_std));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (verbosity >= 2) {
|
if (verbosity >= 2) {
|
||||||
|
|
|
@ -761,6 +761,9 @@ int main(int argc, char ** argv) {
|
||||||
n_consumed = embd_inp.size();
|
n_consumed = embd_inp.size();
|
||||||
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
|
embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end());
|
||||||
}
|
}
|
||||||
|
if (params.escape) {
|
||||||
|
process_escapes(buffer);
|
||||||
|
}
|
||||||
|
|
||||||
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
||||||
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
|
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
|
||||||
|
|
|
@ -6,7 +6,7 @@ install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_compile_definitions(${TARGET} PRIVATE
|
target_compile_definitions(${TARGET} PRIVATE
|
||||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||||
)
|
)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common llama clip ${CMAKE_THREAD_LIBS_INIT})
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -24,6 +24,10 @@ Command line options:
|
||||||
- `--port`: Set the port to listen. Default: `8080`.
|
- `--port`: Set the port to listen. Default: `8080`.
|
||||||
- `--path`: path from which to serve static files (default examples/server/public)
|
- `--path`: path from which to serve static files (default examples/server/public)
|
||||||
- `--embedding`: Enable embedding extraction, Default: disabled.
|
- `--embedding`: Enable embedding extraction, Default: disabled.
|
||||||
|
- `-np N`, `--parallel N`: Set the number of slots for process requests (default: 1)
|
||||||
|
- `-cb`, `--cont-batching`: enable continuous batching (a.k.a dynamic batching) (default: disabled)
|
||||||
|
- `-spf FNAME`, `--system-prompt-file FNAME` Set a file to load "a system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
||||||
|
- `--mmproj MMPROJ_FILE`: Path to a multimodal projector file for LLaVA.
|
||||||
|
|
||||||
## Build
|
## Build
|
||||||
|
|
||||||
|
@ -158,6 +162,8 @@ node index.js
|
||||||
|
|
||||||
`n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0)
|
`n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0)
|
||||||
|
|
||||||
|
`image_data`: An array of objects to hold base64-encoded image `data` and its `id`s to be reference in `prompt`. You can determine the place of the image in the prompt as in the following: `USER:[img-12]Describe the image in detail.\nASSISTANT:` In this case, `[img-12]` will be replaced by the embeddings of the image id 12 in the following `image_data` array: `{..., "image_data": [{"data": "<BASE64_STRING>", "id": 12}]}`. Use `image_data` only with multimodal models, e.g., LLaVA.
|
||||||
|
|
||||||
*Result JSON:*
|
*Result JSON:*
|
||||||
|
|
||||||
Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
|
Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
|
||||||
|
@ -188,6 +194,12 @@ node index.js
|
||||||
|
|
||||||
`truncated`: Boolean indicating if the context size was exceeded during generation, i.e. the number of tokens provided in the prompt (`tokens_evaluated`) plus tokens generated (`tokens predicted`) exceeded the context size (`n_ctx`)
|
`truncated`: Boolean indicating if the context size was exceeded during generation, i.e. the number of tokens provided in the prompt (`tokens_evaluated`) plus tokens generated (`tokens predicted`) exceeded the context size (`n_ctx`)
|
||||||
|
|
||||||
|
`slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1)
|
||||||
|
|
||||||
|
`cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false)
|
||||||
|
|
||||||
|
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
|
||||||
|
|
||||||
- **POST** `/tokenize`: Tokenize a given text.
|
- **POST** `/tokenize`: Tokenize a given text.
|
||||||
|
|
||||||
*Options:*
|
*Options:*
|
||||||
|
@ -218,8 +230,32 @@ node index.js
|
||||||
|
|
||||||
It also accepts all the options of `/completion` except `stream` and `prompt`.
|
It also accepts all the options of `/completion` except `stream` and `prompt`.
|
||||||
|
|
||||||
|
- **GET** `/props`: Return the required assistant name and anti-prompt to generate the prompt in case you have specified a system prompt for all slots.
|
||||||
|
|
||||||
## More examples
|
## More examples
|
||||||
|
|
||||||
|
### Change system prompt on runtime
|
||||||
|
|
||||||
|
To use the server example to serve multiple chat-type clients while keeping the same system prompt, you can utilize the option `system_prompt` to achieve that. This only needs to be done once to establish it.
|
||||||
|
|
||||||
|
`prompt`: Specify a context that you want all connecting clients to respect.
|
||||||
|
|
||||||
|
`anti_prompt`: Specify the word you want to use to instruct the model to stop. This must be sent to each client through the `/props` endpoint.
|
||||||
|
|
||||||
|
`assistant_name`: The bot's name is necessary for each customer to generate the prompt. This must be sent to each client through the `/props` endpoint.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"system_prompt": {
|
||||||
|
"prompt": "Transcript of a never ending dialog, where the User interacts with an Assistant.\nThe Assistant is helpful, kind, honest, good at writing, and never fails to answer the User's requests immediately and with precision.\nUser: Recommend a nice restaurant in the area.\nAssistant: I recommend the restaurant \"The Golden Duck\". It is a 5 star restaurant with a great view of the city. The food is delicious and the service is excellent. The prices are reasonable and the portions are generous. The restaurant is located at 123 Main Street, New York, NY 10001. The phone number is (212) 555-1234. The hours are Monday through Friday from 11:00 am to 10:00 pm. The restaurant is closed on Saturdays and Sundays.\nUser: Who is Richard Feynman?\nAssistant: Richard Feynman was an American physicist who is best known for his work in quantum mechanics and particle physics. He was awarded the Nobel Prize in Physics in 1965 for his contributions to the development of quantum electrodynamics. He was a popular lecturer and author, and he wrote several books, including \"Surely You're Joking, Mr. Feynman!\" and \"What Do You Care What Other People Think?\".\nUser:",
|
||||||
|
"anti_prompt": "User:",
|
||||||
|
"assistant_name": "Assistant:"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**NOTE**: You can do this automatically when starting the server by simply creating a .json file with these options and using the CLI option `-spf FNAME` or `--system-prompt-file FNAME`.
|
||||||
|
|
||||||
### Interactive mode
|
### Interactive mode
|
||||||
|
|
||||||
Check the sample in [chat.mjs](chat.mjs).
|
Check the sample in [chat.mjs](chat.mjs).
|
||||||
|
|
|
@ -8,6 +8,7 @@ import json
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
slot_id = -1
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="An example of using server.cpp with a similar API to OAI. It must be used together with server.cpp.")
|
parser = argparse.ArgumentParser(description="An example of using server.cpp with a similar API to OAI. It must be used together with server.cpp.")
|
||||||
parser.add_argument("--chat-prompt", type=str, help="the top prompt in chat completions(default: 'A chat between a curious user and an artificial intelligence assistant. The assistant follows the given rules no matter what.\\n')", default='A chat between a curious user and an artificial intelligence assistant. The assistant follows the given rules no matter what.\\n')
|
parser.add_argument("--chat-prompt", type=str, help="the top prompt in chat completions(default: 'A chat between a curious user and an artificial intelligence assistant. The assistant follows the given rules no matter what.\\n')", default='A chat between a curious user and an artificial intelligence assistant. The assistant follows the given rules no matter what.\\n')
|
||||||
|
@ -77,7 +78,8 @@ def make_postData(body, chat=False, stream=False):
|
||||||
if(is_present(body, "stop")): postData["stop"] += body["stop"]
|
if(is_present(body, "stop")): postData["stop"] += body["stop"]
|
||||||
postData["n_keep"] = -1
|
postData["n_keep"] = -1
|
||||||
postData["stream"] = stream
|
postData["stream"] = stream
|
||||||
|
postData["cache_prompt"] = True
|
||||||
|
postData["slot_id"] = slot_id
|
||||||
return postData
|
return postData
|
||||||
|
|
||||||
def make_resData(data, chat=False, promptToken=[]):
|
def make_resData(data, chat=False, promptToken=[]):
|
||||||
|
@ -128,6 +130,7 @@ def make_resData_stream(data, chat=False, time_now = 0, start=False):
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
slot_id = data["slot_id"]
|
||||||
if (chat):
|
if (chat):
|
||||||
if (start):
|
if (start):
|
||||||
resData["choices"][0]["delta"] = {
|
resData["choices"][0]["delta"] = {
|
||||||
|
|
|
@ -7,6 +7,11 @@ const args = process.argv.slice(2);
|
||||||
const grammarJsonSchemaFile = args.find(
|
const grammarJsonSchemaFile = args.find(
|
||||||
(_, index) => args[index - 1] === "--grammar-json-schema"
|
(_, index) => args[index - 1] === "--grammar-json-schema"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const no_cached_prompt = args.find(
|
||||||
|
(_, index) => args[index - 1] === "--no-cache-prompt"
|
||||||
|
) ?? "false";
|
||||||
|
|
||||||
const grammarFile = args.find((_, index) => args[index - 1] === "--grammar");
|
const grammarFile = args.find((_, index) => args[index - 1] === "--grammar");
|
||||||
|
|
||||||
// Example usage: function,arguments
|
// Example usage: function,arguments
|
||||||
|
@ -30,6 +35,9 @@ if (grammarFile) {
|
||||||
grammar = readFileSync(grammarFile, 'utf-8')
|
grammar = readFileSync(grammarFile, 'utf-8')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for cached prompt
|
||||||
|
let slot_id = -1;
|
||||||
|
|
||||||
const API_URL = 'http://127.0.0.1:8080'
|
const API_URL = 'http://127.0.0.1:8080'
|
||||||
|
|
||||||
const chat = [
|
const chat = [
|
||||||
|
@ -76,6 +84,8 @@ async function chat_completion(question) {
|
||||||
top_p: 0.9,
|
top_p: 0.9,
|
||||||
n_keep: n_keep,
|
n_keep: n_keep,
|
||||||
n_predict: 256,
|
n_predict: 256,
|
||||||
|
cache_prompt: no_cached_prompt === "false",
|
||||||
|
slot_id: slot_id,
|
||||||
stop: ["\n### Human:"], // stop completion after generating this
|
stop: ["\n### Human:"], // stop completion after generating this
|
||||||
grammar,
|
grammar,
|
||||||
stream: true,
|
stream: true,
|
||||||
|
@ -92,6 +102,7 @@ async function chat_completion(question) {
|
||||||
const t = Buffer.from(chunk).toString('utf8')
|
const t = Buffer.from(chunk).toString('utf8')
|
||||||
if (t.startsWith('data: ')) {
|
if (t.startsWith('data: ')) {
|
||||||
const message = JSON.parse(t.substring(6))
|
const message = JSON.parse(t.substring(6))
|
||||||
|
slot_id = message.slot_id
|
||||||
answer += message.content
|
answer += message.content
|
||||||
process.stdout.write(message.content)
|
process.stdout.write(message.content)
|
||||||
if (message.stop) {
|
if (message.stop) {
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -125,6 +125,7 @@
|
||||||
background-color: #222;
|
background-color: #222;
|
||||||
color: #ddd;
|
color: #ddd;
|
||||||
}
|
}
|
||||||
|
|
||||||
code {
|
code {
|
||||||
font-family: monospace;
|
font-family: monospace;
|
||||||
padding: 0.1em 0.3em;
|
padding: 0.1em 0.3em;
|
||||||
|
@ -141,7 +142,8 @@
|
||||||
display: inline;
|
display: inline;
|
||||||
}
|
}
|
||||||
|
|
||||||
header, footer {
|
header,
|
||||||
|
footer {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,6 +165,7 @@
|
||||||
0% {
|
0% {
|
||||||
background-position: 0%;
|
background-position: 0%;
|
||||||
}
|
}
|
||||||
|
|
||||||
100% {
|
100% {
|
||||||
background-position: 100%;
|
background-position: 100%;
|
||||||
}
|
}
|
||||||
|
@ -181,6 +184,7 @@
|
||||||
--loading-color-1: #22222200;
|
--loading-color-1: #22222200;
|
||||||
--loading-color-2: #222222ff;
|
--loading-color-2: #222222ff;
|
||||||
}
|
}
|
||||||
|
|
||||||
.popover-content {
|
.popover-content {
|
||||||
background-color: black;
|
background-color: black;
|
||||||
}
|
}
|
||||||
|
@ -194,6 +198,8 @@
|
||||||
|
|
||||||
import { llama } from '/completion.js';
|
import { llama } from '/completion.js';
|
||||||
import { SchemaConverter } from '/json-schema-to-grammar.mjs';
|
import { SchemaConverter } from '/json-schema-to-grammar.mjs';
|
||||||
|
let selected_image = false;
|
||||||
|
var slot_id = -1;
|
||||||
|
|
||||||
const session = signal({
|
const session = signal({
|
||||||
prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.",
|
prompt: "This is a conversation between User and Llama, a friendly chatbot. Llama is helpful, kind, honest, good at writing, and never fails to answer any requests immediately and with precision.",
|
||||||
|
@ -203,6 +209,7 @@
|
||||||
type: "chat", // "chat" | "completion"
|
type: "chat", // "chat" | "completion"
|
||||||
char: "Llama",
|
char: "Llama",
|
||||||
user: "User",
|
user: "User",
|
||||||
|
image_selected: ''
|
||||||
})
|
})
|
||||||
|
|
||||||
const params = signal({
|
const params = signal({
|
||||||
|
@ -220,7 +227,9 @@
|
||||||
mirostat_tau: 5, // target entropy
|
mirostat_tau: 5, // target entropy
|
||||||
mirostat_eta: 0.1, // learning rate
|
mirostat_eta: 0.1, // learning rate
|
||||||
grammar: '',
|
grammar: '',
|
||||||
n_probs: 0, // no completion_probabilities
|
n_probs: 0, // no completion_probabilities,
|
||||||
|
image_data: [],
|
||||||
|
cache_prompt: true
|
||||||
})
|
})
|
||||||
|
|
||||||
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */
|
/* START: Support for storing prompt templates and parameters in borwser LocalStorage */
|
||||||
|
@ -270,6 +279,7 @@
|
||||||
// saved templates were successfuly imported.
|
// saved templates were successfuly imported.
|
||||||
|
|
||||||
console.log('Processing saved templates and updating default template')
|
console.log('Processing saved templates and updating default template')
|
||||||
|
params.value = { ...params.value, image_data: [] };
|
||||||
|
|
||||||
//console.log(importedTemplates);
|
//console.log(importedTemplates);
|
||||||
savedUserTemplates.value = importedTemplates;
|
savedUserTemplates.value = importedTemplates;
|
||||||
|
@ -294,7 +304,9 @@
|
||||||
|
|
||||||
function userTemplateApply(t) {
|
function userTemplateApply(t) {
|
||||||
session.value = t.data.session;
|
session.value = t.data.session;
|
||||||
|
session.value = { ...session.value, image_selected: '' };
|
||||||
params.value = t.data.params;
|
params.value = t.data.params;
|
||||||
|
params.value = { ...params.value, image_data: [] };
|
||||||
}
|
}
|
||||||
|
|
||||||
function userTemplateResetToDefaultAndApply() {
|
function userTemplateResetToDefaultAndApply() {
|
||||||
|
@ -385,20 +397,25 @@
|
||||||
throw new Error("already running");
|
throw new Error("already running");
|
||||||
}
|
}
|
||||||
controller.value = new AbortController();
|
controller.value = new AbortController();
|
||||||
for await (const chunk of llama(prompt, llamaParams, {controller: controller.value})) {
|
for await (const chunk of llama(prompt, llamaParams, { controller: controller.value })) {
|
||||||
const data = chunk.data;
|
const data = chunk.data;
|
||||||
|
|
||||||
if (data.stop) {
|
if (data.stop) {
|
||||||
while (
|
while (
|
||||||
currentMessages.length > 0 &&
|
currentMessages.length > 0 &&
|
||||||
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
|
currentMessages[currentMessages.length - 1].content.match(/\n$/) != null
|
||||||
) {
|
) {
|
||||||
currentMessages.pop();
|
currentMessages.pop();
|
||||||
}
|
}
|
||||||
transcriptUpdate([...history, [char, currentMessages]])
|
transcriptUpdate([...history, [char, currentMessages]])
|
||||||
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
|
console.log("Completion finished: '", currentMessages.map(msg => msg.content).join(''), "', summary: ", data);
|
||||||
} else {
|
} else {
|
||||||
currentMessages.push(data);
|
currentMessages.push(data);
|
||||||
|
slot_id = data.slot_id;
|
||||||
|
if (selected_image && !data.multimodal) {
|
||||||
|
alert("The server was not compiled for multimodal or the model projector can't be loaded.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
transcriptUpdate([...history, [char, currentMessages]])
|
transcriptUpdate([...history, [char, currentMessages]])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -419,7 +436,7 @@
|
||||||
|
|
||||||
transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
|
transcriptUpdate([...session.value.transcript, ["{{user}}", msg]])
|
||||||
|
|
||||||
const prompt = template(session.value.template, {
|
let prompt = template(session.value.template, {
|
||||||
message: msg,
|
message: msg,
|
||||||
history: session.value.transcript.flatMap(
|
history: session.value.transcript.flatMap(
|
||||||
([name, data]) =>
|
([name, data]) =>
|
||||||
|
@ -434,9 +451,12 @@
|
||||||
)
|
)
|
||||||
).join("\n"),
|
).join("\n"),
|
||||||
});
|
});
|
||||||
|
if (selected_image) {
|
||||||
|
prompt = `A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:[img-10]${msg}\nASSISTANT:`;
|
||||||
|
}
|
||||||
await runLlama(prompt, {
|
await runLlama(prompt, {
|
||||||
...params.value,
|
...params.value,
|
||||||
|
slot_id: slot_id,
|
||||||
stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
|
stop: ["</s>", template("{{char}}:"), template("{{user}}:")],
|
||||||
}, "{{char}}");
|
}, "{{char}}");
|
||||||
}
|
}
|
||||||
|
@ -446,10 +466,11 @@
|
||||||
console.log('already running...');
|
console.log('already running...');
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const {prompt} = session.value;
|
const { prompt } = session.value;
|
||||||
transcriptUpdate([...session.value.transcript, ["", prompt]]);
|
transcriptUpdate([...session.value.transcript, ["", prompt]]);
|
||||||
await runLlama(prompt, {
|
await runLlama(prompt, {
|
||||||
...params.value,
|
...params.value,
|
||||||
|
slot_id: slot_id,
|
||||||
stop: [],
|
stop: [],
|
||||||
}, "");
|
}, "");
|
||||||
}
|
}
|
||||||
|
@ -467,6 +488,27 @@
|
||||||
transcriptUpdate([]);
|
transcriptUpdate([]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const uploadImage = (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
document.getElementById("fileInput").click();
|
||||||
|
document.getElementById("fileInput").addEventListener("change", function (event) {
|
||||||
|
const selectedFile = event.target.files[0];
|
||||||
|
if (selectedFile) {
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = function () {
|
||||||
|
const image_data = reader.result;
|
||||||
|
session.value = { ...session.value, image_selected: image_data };
|
||||||
|
params.value = {
|
||||||
|
...params.value, image_data: [
|
||||||
|
{ data: image_data.replace(/data:image\/[^;]+;base64,/, ''), id: 10 }]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
selected_image = true;
|
||||||
|
reader.readAsDataURL(selectedFile);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function MessageInput() {
|
function MessageInput() {
|
||||||
const message = useSignal("")
|
const message = useSignal("")
|
||||||
|
|
||||||
|
@ -497,6 +539,7 @@
|
||||||
</div>
|
</div>
|
||||||
<div class="right">
|
<div class="right">
|
||||||
<button type="submit" disabled=${generating.value}>Send</button>
|
<button type="submit" disabled=${generating.value}>Send</button>
|
||||||
|
<button onclick=${uploadImage}>Upload Image</button>
|
||||||
<button onclick=${stop} disabled=${!generating.value}>Stop</button>
|
<button onclick=${stop} disabled=${!generating.value}>Stop</button>
|
||||||
<button onclick=${reset}>Reset</button>
|
<button onclick=${reset}>Reset</button>
|
||||||
</div>
|
</div>
|
||||||
|
@ -540,7 +583,7 @@
|
||||||
data;
|
data;
|
||||||
message = html`<${Markdownish} text=${template(text)} />`
|
message = html`<${Markdownish} text=${template(text)} />`
|
||||||
}
|
}
|
||||||
if(user) {
|
if (user) {
|
||||||
return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
|
return html`<p key=${index}><strong>${template(user)}:</strong> ${message}</p>`
|
||||||
} else {
|
} else {
|
||||||
return html`<p key=${index}>${message}</p>`
|
return html`<p key=${index}>${message}</p>`
|
||||||
|
@ -549,6 +592,7 @@
|
||||||
|
|
||||||
return html`
|
return html`
|
||||||
<section id="chat" ref=${container}>
|
<section id="chat" ref=${container}>
|
||||||
|
<img style="width: 60%;${!session.value.image_selected ? `display: none;` : ``}" src="${session.value.image_selected}"/>
|
||||||
${messages.flatMap(chatLine)}
|
${messages.flatMap(chatLine)}
|
||||||
</section>`;
|
</section>`;
|
||||||
};
|
};
|
||||||
|
@ -567,7 +611,7 @@
|
||||||
const converter = new SchemaConverter(
|
const converter = new SchemaConverter(
|
||||||
grammarJsonSchemaPropOrder.value
|
grammarJsonSchemaPropOrder.value
|
||||||
.split(',')
|
.split(',')
|
||||||
.reduce((acc, cur, i) => ({...acc, [cur.trim()]: i}), {})
|
.reduce((acc, cur, i) => ({ ...acc, [cur.trim()]: i }), {})
|
||||||
)
|
)
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
params.value = {
|
params.value = {
|
||||||
|
@ -579,7 +623,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const FloatField = ({label, max, min, name, step, value}) => {
|
const FloatField = ({ label, max, min, name, step, value }) => {
|
||||||
return html`
|
return html`
|
||||||
<div>
|
<div>
|
||||||
<label for="${name}">${label}</label>
|
<label for="${name}">${label}</label>
|
||||||
|
@ -589,7 +633,7 @@
|
||||||
`
|
`
|
||||||
};
|
};
|
||||||
|
|
||||||
const IntField = ({label, max, min, name, value}) => {
|
const IntField = ({ label, max, min, name, value }) => {
|
||||||
return html`
|
return html`
|
||||||
<div>
|
<div>
|
||||||
<label for="${name}">${label}</label>
|
<label for="${name}">${label}</label>
|
||||||
|
@ -672,7 +716,7 @@
|
||||||
${GrammarControl()}
|
${GrammarControl()}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
`
|
`
|
||||||
);
|
);
|
||||||
|
|
||||||
const CompletionConfigForm = () => (
|
const CompletionConfigForm = () => (
|
||||||
html`
|
html`
|
||||||
|
@ -694,20 +738,20 @@
|
||||||
${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
|
${session.value.type === 'chat' ? ChatConfigForm() : CompletionConfigForm()}
|
||||||
|
|
||||||
<fieldset class="two">
|
<fieldset class="two">
|
||||||
${IntField({label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict})}
|
${IntField({ label: "Predictions", max: 2048, min: -1, name: "n_predict", value: params.value.n_predict })}
|
||||||
${FloatField({label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature})}
|
${FloatField({ label: "Temperature", max: 1.5, min: 0.0, name: "temperature", step: 0.01, value: params.value.temperature })}
|
||||||
${FloatField({label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty})}
|
${FloatField({ label: "Penalize repeat sequence", max: 2.0, min: 0.0, name: "repeat_penalty", step: 0.01, value: params.value.repeat_penalty })}
|
||||||
${IntField({label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n})}
|
${IntField({ label: "Consider N tokens for penalize", max: 2048, min: 0, name: "repeat_last_n", value: params.value.repeat_last_n })}
|
||||||
${IntField({label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k})}
|
${IntField({ label: "Top-K sampling", max: 100, min: -1, name: "top_k", value: params.value.top_k })}
|
||||||
${FloatField({label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p})}
|
${FloatField({ label: "Top-P sampling", max: 1.0, min: 0.0, name: "top_p", step: 0.01, value: params.value.top_p })}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<details>
|
<details>
|
||||||
<summary>More options</summary>
|
<summary>More options</summary>
|
||||||
<fieldset class="two">
|
<fieldset class="two">
|
||||||
${FloatField({label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z})}
|
${FloatField({ label: "TFS-Z", max: 1.0, min: 0.0, name: "tfs_z", step: 0.01, value: params.value.tfs_z })}
|
||||||
${FloatField({label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p})}
|
${FloatField({ label: "Typical P", max: 1.0, min: 0.0, name: "typical_p", step: 0.01, value: params.value.typical_p })}
|
||||||
${FloatField({label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty})}
|
${FloatField({ label: "Presence penalty", max: 1.0, min: 0.0, name: "presence_penalty", step: 0.01, value: params.value.presence_penalty })}
|
||||||
${FloatField({label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty})}
|
${FloatField({ label: "Frequency penalty", max: 1.0, min: 0.0, name: "frequency_penalty", step: 0.01, value: params.value.frequency_penalty })}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<hr />
|
<hr />
|
||||||
<fieldset class="three">
|
<fieldset class="three">
|
||||||
|
@ -716,11 +760,11 @@
|
||||||
<label><input type="radio" name="mirostat" value="1" checked=${params.value.mirostat == 1} oninput=${updateParamsInt} /> Mirostat v1</label>
|
<label><input type="radio" name="mirostat" value="1" checked=${params.value.mirostat == 1} oninput=${updateParamsInt} /> Mirostat v1</label>
|
||||||
<label><input type="radio" name="mirostat" value="2" checked=${params.value.mirostat == 2} oninput=${updateParamsInt} /> Mirostat v2</label>
|
<label><input type="radio" name="mirostat" value="2" checked=${params.value.mirostat == 2} oninput=${updateParamsInt} /> Mirostat v2</label>
|
||||||
</div>
|
</div>
|
||||||
${FloatField({label: "Mirostat tau", max: 10.0, min: 0.0, name: "mirostat_tau", step: 0.01, value: params.value.mirostat_tau})}
|
${FloatField({ label: "Mirostat tau", max: 10.0, min: 0.0, name: "mirostat_tau", step: 0.01, value: params.value.mirostat_tau })}
|
||||||
${FloatField({label: "Mirostat eta", max: 1.0, min: 0.0, name: "mirostat_eta", step: 0.01, value: params.value.mirostat_eta})}
|
${FloatField({ label: "Mirostat eta", max: 1.0, min: 0.0, name: "mirostat_eta", step: 0.01, value: params.value.mirostat_eta })}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<fieldset>
|
<fieldset>
|
||||||
${IntField({label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs})}
|
${IntField({ label: "Show Probabilities", max: 10, min: 0, name: "n_probs", value: params.value.n_probs })}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
</details>
|
</details>
|
||||||
</form>
|
</form>
|
||||||
|
@ -759,20 +803,20 @@
|
||||||
const popoverChildren = html`
|
const popoverChildren = html`
|
||||||
<div class="prob-set">
|
<div class="prob-set">
|
||||||
${probs.map((p, index) => {
|
${probs.map((p, index) => {
|
||||||
return html`
|
return html`
|
||||||
<div
|
<div
|
||||||
key=${index}
|
key=${index}
|
||||||
title=${`prob: ${p.prob}`}
|
title=${`prob: ${p.prob}`}
|
||||||
style=${{
|
style=${{
|
||||||
padding: '0.3em',
|
padding: '0.3em',
|
||||||
backgroundColor: p.tok_str === content ? probColor(p.prob) : 'transparent'
|
backgroundColor: p.tok_str === content ? probColor(p.prob) : 'transparent'
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<span>${p.tok_str}: </span>
|
<span>${p.tok_str}: </span>
|
||||||
<span>${Math.floor(p.prob * 100)}%</span>
|
<span>${Math.floor(p.prob * 100)}%</span>
|
||||||
</div>
|
</div>
|
||||||
`
|
`
|
||||||
})}
|
})}
|
||||||
</div>
|
</div>
|
||||||
`
|
`
|
||||||
|
|
||||||
|
@ -851,9 +895,9 @@
|
||||||
ref=${popoverRef}
|
ref=${popoverRef}
|
||||||
class="popover-content"
|
class="popover-content"
|
||||||
style=${{
|
style=${{
|
||||||
top: position.value.top,
|
top: position.value.top,
|
||||||
left: position.value.left,
|
left: position.value.left,
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
${props.popoverChildren}
|
${props.popoverChildren}
|
||||||
</div>
|
</div>
|
||||||
|
@ -952,8 +996,11 @@
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
<div id="container"></div>
|
<div id="container">
|
||||||
|
<input type="file" id="fileInput" accept="image/*" style="display: none;">
|
||||||
|
</div>
|
||||||
<div id="portal"></div>
|
<div id="portal"></div>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1004,12 +1004,15 @@ class SpecialVocab:
|
||||||
merges: list[str] = []
|
merges: list[str] = []
|
||||||
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
|
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
|
||||||
special_token_ids: dict[str, int] = {}
|
special_token_ids: dict[str, int] = {}
|
||||||
|
n_vocab: int | None = None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, path: str | os.PathLike[str], load_merges: bool = False,
|
self, path: str | os.PathLike[str], load_merges: bool = False,
|
||||||
special_token_types: tuple[str, ...] | None = None,
|
special_token_types: tuple[str, ...] | None = None,
|
||||||
|
n_vocab: int | None = None,
|
||||||
):
|
):
|
||||||
self.special_token_ids = {}
|
self.special_token_ids = {}
|
||||||
|
self.n_vocab = n_vocab
|
||||||
self.load_merges = load_merges
|
self.load_merges = load_merges
|
||||||
if special_token_types is not None:
|
if special_token_types is not None:
|
||||||
self.special_token_types = special_token_types
|
self.special_token_types = special_token_types
|
||||||
|
@ -1019,6 +1022,16 @@ class SpecialVocab:
|
||||||
if not self._try_load_from_tokenizer_json(path):
|
if not self._try_load_from_tokenizer_json(path):
|
||||||
self._try_load_from_config_json(path)
|
self._try_load_from_config_json(path)
|
||||||
|
|
||||||
|
def _set_special_token(self, typ: str, tid: Any):
|
||||||
|
if not isinstance(tid, int) or tid < 0:
|
||||||
|
return
|
||||||
|
if self.n_vocab is None or tid < self.n_vocab:
|
||||||
|
self.special_token_ids[typ] = tid
|
||||||
|
return
|
||||||
|
print(f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
|
||||||
|
file = sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||||
tokenizer_file = path / 'tokenizer.json'
|
tokenizer_file = path / 'tokenizer.json'
|
||||||
if not tokenizer_file.is_file():
|
if not tokenizer_file.is_file():
|
||||||
|
@ -1046,10 +1059,11 @@ class SpecialVocab:
|
||||||
tc_content = entry_content
|
tc_content = entry_content
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
for maybe_token_id in (atok.get('id') for atok in added_tokens if atok.get('content') == tc_content):
|
# We only need the first match here.
|
||||||
if isinstance(maybe_token_id, int) and maybe_token_id >= 0:
|
maybe_token_id = next((
|
||||||
self.special_token_ids[typ] = maybe_token_id
|
atok.get('id') for atok in added_tokens
|
||||||
break
|
if atok.get('content') == tc_content), None)
|
||||||
|
self._set_special_token(typ, maybe_token_id)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _try_load_from_config_json(self, path: Path) -> bool:
|
def _try_load_from_config_json(self, path: Path) -> bool:
|
||||||
|
@ -1059,21 +1073,21 @@ class SpecialVocab:
|
||||||
with open(config_file, encoding = 'utf-8') as f:
|
with open(config_file, encoding = 'utf-8') as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
for typ in self.special_token_types:
|
for typ in self.special_token_types:
|
||||||
maybe_token_id = config.get(f'{typ}_token_id')
|
self._set_special_token(typ, config.get(f'{typ}_token_id'))
|
||||||
if isinstance(maybe_token_id, int) and maybe_token_id >= 0:
|
|
||||||
self.special_token_ids[typ] = maybe_token_id
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def add_to_gguf(self, gw: GGUFWriter) -> None:
|
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
|
||||||
if len(self.merges) > 0:
|
if len(self.merges) > 0:
|
||||||
print(f'gguf: Adding {len(self.merges)} merge(s).')
|
if not quiet:
|
||||||
|
print(f'gguf: Adding {len(self.merges)} merge(s).')
|
||||||
gw.add_token_merges(self.merges)
|
gw.add_token_merges(self.merges)
|
||||||
for typ, tokid in self.special_token_ids.items():
|
for typ, tokid in self.special_token_ids.items():
|
||||||
handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
|
handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
|
||||||
if handler is None:
|
if handler is None:
|
||||||
print(f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping')
|
print(f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping', file = sys.stderr)
|
||||||
continue
|
continue
|
||||||
print(f'gguf: Setting special token type {typ} to {tokid}')
|
if not quiet:
|
||||||
|
print(f'gguf: Setting special token type {typ} to {tokid}')
|
||||||
handler(tokid)
|
handler(tokid)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
|
|
54
llama.cpp
54
llama.cpp
|
@ -1000,14 +1000,15 @@ static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
|
||||||
(void) tensor;
|
(void) tensor;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string llama_token_to_str(const struct llama_context * ctx, llama_token token) {
|
static std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
||||||
std::vector<char> result(8, 0);
|
std::vector<char> result(8, 0);
|
||||||
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
|
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
|
||||||
if (n_tokens < 0) {
|
if (n_tokens < 0) {
|
||||||
result.resize(-n_tokens);
|
result.resize(-n_tokens);
|
||||||
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
|
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size());
|
||||||
GGML_ASSERT(check == -n_tokens);
|
GGML_ASSERT(check == -n_tokens);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
result.resize(n_tokens);
|
result.resize(n_tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1240,10 +1241,10 @@ struct llama_vocab {
|
||||||
id special_eot_id = 32010;
|
id special_eot_id = 32010;
|
||||||
|
|
||||||
int find_bpe_rank(std::string token_left, std::string token_right) const {
|
int find_bpe_rank(std::string token_left, std::string token_right) const {
|
||||||
replace_all(token_left, " ", "\u0120");
|
GGML_ASSERT(token_left.find(" ") == std::string::npos);
|
||||||
replace_all(token_left, "\n", "\u010A");
|
GGML_ASSERT(token_left.find("\n") == std::string::npos);
|
||||||
replace_all(token_right, " ", "\u0120");
|
GGML_ASSERT(token_right.find(" ") == std::string::npos);
|
||||||
replace_all(token_right, "\n", "\u010A");
|
GGML_ASSERT(token_right.find("\n") == std::string::npos);
|
||||||
|
|
||||||
auto it = bpe_ranks.find(std::make_pair(token_left, token_right));
|
auto it = bpe_ranks.find(std::make_pair(token_left, token_right));
|
||||||
if (it == bpe_ranks.end()) {
|
if (it == bpe_ranks.end()) {
|
||||||
|
@ -2292,15 +2293,35 @@ static void llm_load_vocab(
|
||||||
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
|
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
|
||||||
vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
|
vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
|
||||||
} else {
|
} else {
|
||||||
vocab.linefeed_id = llama_tokenize_internal(vocab, "\u010A", false)[0];
|
const std::vector<int> ids = llama_tokenize_internal(vocab, "\u010A", false);
|
||||||
|
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
||||||
|
vocab.linefeed_id = ids[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
// special tokens
|
// special tokens
|
||||||
GGUF_GET_KEY(ctx, vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_BOS_ID));
|
{
|
||||||
GGUF_GET_KEY(ctx, vocab.special_eos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_EOS_ID));
|
const std::vector<std::pair<enum llm_kv, int32_t &>> special_token_types = {
|
||||||
GGUF_GET_KEY(ctx, vocab.special_unk_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_UNK_ID));
|
{ LLM_KV_TOKENIZER_BOS_ID, vocab.special_bos_id },
|
||||||
GGUF_GET_KEY(ctx, vocab.special_sep_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_SEP_ID));
|
{ LLM_KV_TOKENIZER_EOS_ID, vocab.special_eos_id },
|
||||||
GGUF_GET_KEY(ctx, vocab.special_pad_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_PAD_ID));
|
{ LLM_KV_TOKENIZER_UNK_ID, vocab.special_unk_id },
|
||||||
|
{ LLM_KV_TOKENIZER_SEP_ID, vocab.special_sep_id },
|
||||||
|
{ LLM_KV_TOKENIZER_PAD_ID, vocab.special_pad_id },
|
||||||
|
};
|
||||||
|
for (const auto & it : special_token_types) {
|
||||||
|
const std::string & key = kv(std::get<0>(it));
|
||||||
|
int32_t & id = std::get<1>(it), old_id = id;
|
||||||
|
|
||||||
|
GGUF_GET_KEY(ctx, id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, key);
|
||||||
|
// Must be >= -1 and < vocab size. Since the key is unsigned, -1
|
||||||
|
// can only come from the default value, so there's no point in
|
||||||
|
// validating that.
|
||||||
|
if (size_t(id + 1) > vocab.id_to_token.size()) {
|
||||||
|
LLAMA_LOG_WARN("%s: bad special token: '%s' = %d, using default id %d\n",
|
||||||
|
__func__, key.c_str(), id, old_id);
|
||||||
|
id = old_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// build special tokens cache
|
// build special tokens cache
|
||||||
{
|
{
|
||||||
|
@ -6203,11 +6224,10 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
|
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
|
||||||
|
static const char * hex = "0123456789ABCDEF";
|
||||||
switch (llama_vocab_get_type(vocab)) {
|
switch (llama_vocab_get_type(vocab)) {
|
||||||
case LLAMA_VOCAB_TYPE_SPM: {
|
case LLAMA_VOCAB_TYPE_SPM: {
|
||||||
char buf[7];
|
const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
|
||||||
int result = snprintf(buf, sizeof(buf), "<0x%02X>", ch);
|
|
||||||
GGML_ASSERT(0 <= result && result < 7);
|
|
||||||
return vocab.token_to_id.at(buf);
|
return vocab.token_to_id.at(buf);
|
||||||
}
|
}
|
||||||
case LLAMA_VOCAB_TYPE_BPE: {
|
case LLAMA_VOCAB_TYPE_BPE: {
|
||||||
|
@ -7580,7 +7600,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
|
||||||
|
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
const llama_token id = candidates->data[i].id;
|
const llama_token id = candidates->data[i].id;
|
||||||
const std::string piece = llama_token_to_str(ctx, id);
|
const std::string piece = llama_token_to_piece(ctx, id);
|
||||||
if (id == eos) {
|
if (id == eos) {
|
||||||
if (!allow_eos) {
|
if (!allow_eos) {
|
||||||
candidates->data[i].logit = -INFINITY;
|
candidates->data[i].logit = -INFINITY;
|
||||||
|
@ -7792,7 +7812,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string piece = llama_token_to_str(ctx, token);
|
const std::string piece = llama_token_to_piece(ctx, token);
|
||||||
|
|
||||||
// Note terminating 0 in decoded string
|
// Note terminating 0 in decoded string
|
||||||
const auto decoded = decode_utf8(piece.c_str(), grammar->partial_utf8);
|
const auto decoded = decode_utf8(piece.c_str(), grammar->partial_utf8);
|
||||||
|
|
BIN
models/ggml-vocab-mpt.gguf
Normal file
BIN
models/ggml-vocab-mpt.gguf
Normal file
Binary file not shown.
|
@ -31,6 +31,7 @@ llama_test_executable (test-tokenizer-1-llama test-tokenizer-1-llama.cpp ${CMAKE
|
||||||
llama_build_executable(test-tokenizer-1-bpe.cpp)
|
llama_build_executable(test-tokenizer-1-bpe.cpp)
|
||||||
llama_test_executable (test-tokenizer-1-falcon test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
llama_test_executable (test-tokenizer-1-falcon test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||||
llama_test_executable(test-tokenizer-1-aquila test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
llama_test_executable(test-tokenizer-1-aquila test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
||||||
|
llama_test_executable(test-tokenizer-1-mpt test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
|
||||||
llama_build_and_test_executable(test-grammar-parser.cpp)
|
llama_build_and_test_executable(test-grammar-parser.cpp)
|
||||||
llama_build_and_test_executable(test-llama-grammar.cpp)
|
llama_build_and_test_executable(test-llama-grammar.cpp)
|
||||||
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue