Merge master into server-cfg
This commit is contained in:
commit
890d1b8446
44 changed files with 1331 additions and 437 deletions
|
@ -1,4 +1,5 @@
|
|||
set(TARGET baby-llama)
|
||||
add_executable(${TARGET} baby-llama.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
set(TARGET benchmark)
|
||||
add_executable(${TARGET} benchmark-matmult.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
|
|
@ -168,6 +168,18 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
break;
|
||||
}
|
||||
params.n_ctx = std::stoi(argv[i]);
|
||||
} else if (arg == "--rope-freq-base") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.rope_freq_base = std::stof(argv[i]);
|
||||
} else if (arg == "--rope-freq-scale") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.rope_freq_scale = std::stof(argv[i]);
|
||||
} else if (arg == "--memory-f32") {
|
||||
params.memory_f16 = false;
|
||||
} else if (arg == "--top-p") {
|
||||
|
@ -267,6 +279,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||
break;
|
||||
}
|
||||
params.n_keep = std::stoi(argv[i]);
|
||||
} else if (arg == "--chunks") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_chunks = std::stoi(argv[i]);
|
||||
} else if (arg == "-m" || arg == "--model") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
|
@ -493,6 +511,8 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||
fprintf(stderr, " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
|
||||
fprintf(stderr, " --cfg-smooth-factor N smooth factor between old and new logits (default: %f, 1.0 = no smoothing)\n", params.cfg_smooth_factor);
|
||||
fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
|
||||
fprintf(stderr, " --rope-freq-base N RoPE base frequency (default: %.1f)\n", params.rope_freq_base);
|
||||
fprintf(stderr, " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale);
|
||||
fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
|
||||
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
|
||||
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
|
||||
|
@ -501,6 +521,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
|
||||
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
||||
fprintf(stderr, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
|
||||
if (llama_mlock_supported()) {
|
||||
fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
||||
}
|
||||
|
@ -573,6 +594,8 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
|||
lparams.use_mlock = params.use_mlock;
|
||||
lparams.logits_all = params.perplexity;
|
||||
lparams.embedding = params.embedding;
|
||||
lparams.rope_freq_base = params.rope_freq_base;
|
||||
lparams.rope_freq_scale = params.rope_freq_scale;
|
||||
|
||||
return lparams;
|
||||
}
|
||||
|
|
|
@ -28,10 +28,13 @@ struct gpt_params {
|
|||
int32_t n_ctx = 512; // context size
|
||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
||||
int32_t n_gpu_layers = 0; // number of layers to store in VRAM
|
||||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
|
||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||
float rope_freq_base = 10000.0f; // RoPE base frequency
|
||||
float rope_freq_scale = 1.0f; // RoPE frequency scaling factor
|
||||
|
||||
// sampling parameters
|
||||
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
set(TARGET embdinput)
|
||||
add_library(${TARGET} embd-input-lib.cpp embd-input.h)
|
||||
install(TARGETS ${TARGET} LIBRARY)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
@ -8,6 +9,7 @@ endif()
|
|||
|
||||
set(TARGET embd-input-test)
|
||||
add_executable(${TARGET} embd-input-test.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama embdinput ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
|
|
@ -17,7 +17,7 @@ make
|
|||
import torch
|
||||
|
||||
bin_path = "../LLaVA-13b-delta-v1-1/pytorch_model-00003-of-00003.bin"
|
||||
pth_path = "./examples/embd_input/llava_projection.pth"
|
||||
pth_path = "./examples/embd-input/llava_projection.pth"
|
||||
|
||||
dic = torch.load(bin_path)
|
||||
used_key = ["model.mm_projector.weight","model.mm_projector.bias"]
|
||||
|
|
|
@ -59,7 +59,7 @@ if __name__=="__main__":
|
|||
# Also here can use pytorch_model-00003-of-00003.bin directly.
|
||||
a.load_projection(os.path.join(
|
||||
os.path.dirname(__file__) ,
|
||||
"llava_projetion.pth"))
|
||||
"llava_projection.pth"))
|
||||
respose = a.chat_with_image(
|
||||
Image.open("./media/llama1-logo.png").convert('RGB'),
|
||||
"what is the text in the picture?")
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
set(TARGET embedding)
|
||||
add_executable(${TARGET} embedding.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
set(TARGET main)
|
||||
add_executable(${TARGET} main.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
|
|
@ -84,9 +84,17 @@ int main(int argc, char ** argv) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (params.rope_freq_base != 10000.0) {
|
||||
fprintf(stderr, "%s: warning: changing RoPE frequency base to %g (default 10000.0)\n", __func__, params.rope_freq_base);
|
||||
}
|
||||
|
||||
if (params.rope_freq_scale != 1.0) {
|
||||
fprintf(stderr, "%s: warning: scaling RoPE frequency by %g (default 1.0)\n", __func__, params.rope_freq_scale);
|
||||
}
|
||||
|
||||
if (params.n_ctx > 2048) {
|
||||
fprintf(stderr, "%s: warning: model might not support context sizes greater than 2048 tokens (%d specified);"
|
||||
"expect poor results\n", __func__, params.n_ctx);
|
||||
fprintf(stderr, "%s: warning: base model only supports context sizes no greater than 2048 tokens (%d specified);"
|
||||
" you are on your own\n", __func__, params.n_ctx);
|
||||
} else if (params.n_ctx < 8) {
|
||||
fprintf(stderr, "%s: warning: minimum context size is 8, using minimum size.\n", __func__);
|
||||
params.n_ctx = 8;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
set(TEST_TARGET metal)
|
||||
add_executable(${TEST_TARGET} metal.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TEST_TARGET} PRIVATE ggml)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
set(TARGET perplexity)
|
||||
add_executable(${TARGET} perplexity.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
|
|
@ -32,13 +32,15 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
|
|||
// BOS tokens will be added for each chunk before eval
|
||||
auto tokens = ::llama_tokenize(ctx, params.prompt, true);
|
||||
|
||||
int count = 0;
|
||||
const int n_chunk_max = tokens.size() / params.n_ctx;
|
||||
|
||||
const int n_chunk = tokens.size() / params.n_ctx;
|
||||
const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max);
|
||||
const int n_vocab = llama_n_vocab(ctx);
|
||||
const int n_batch = params.n_batch;
|
||||
|
||||
int count = 0;
|
||||
double nll = 0.0;
|
||||
|
||||
fprintf(stderr, "%s: calculating perplexity over %d chunks, batch_size=%d\n", __func__, n_chunk, n_batch);
|
||||
|
||||
for (int i = 0; i < n_chunk; ++i) {
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
set(TARGET quantize-stats)
|
||||
add_executable(${TARGET} quantize-stats.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
set(TARGET quantize)
|
||||
add_executable(${TARGET} quantize.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
|
|
@ -14,103 +14,27 @@ struct quant_option {
|
|||
};
|
||||
|
||||
static const std::vector<struct quant_option> QUANT_OPTIONS = {
|
||||
{
|
||||
"Q4_0",
|
||||
LLAMA_FTYPE_MOSTLY_Q4_0,
|
||||
" 3.50G, +0.2499 ppl @ 7B - small, very high quality loss - legacy, prefer using Q3_K_M",
|
||||
},
|
||||
{
|
||||
"Q4_1",
|
||||
LLAMA_FTYPE_MOSTLY_Q4_1,
|
||||
" 3.90G, +0.1846 ppl @ 7B - small, substantial quality loss - legacy, prefer using Q3_K_L",
|
||||
},
|
||||
{
|
||||
"Q5_0",
|
||||
LLAMA_FTYPE_MOSTLY_Q5_0,
|
||||
" 4.30G, +0.0796 ppl @ 7B - medium, balanced quality - legacy, prefer using Q4_K_M",
|
||||
},
|
||||
{
|
||||
"Q5_1",
|
||||
LLAMA_FTYPE_MOSTLY_Q5_1,
|
||||
" 4.70G, +0.0415 ppl @ 7B - medium, low quality loss - legacy, prefer using Q5_K_M",
|
||||
},
|
||||
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.50G, +0.2499 ppl @ 7B", },
|
||||
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1846 ppl @ 7B", },
|
||||
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.30G, +0.0796 ppl @ 7B", },
|
||||
{ "Q5_1", LLAMA_FTYPE_MOSTLY_Q5_1, " 4.70G, +0.0415 ppl @ 7B", },
|
||||
#ifdef GGML_USE_K_QUANTS
|
||||
{
|
||||
"Q2_K",
|
||||
LLAMA_FTYPE_MOSTLY_Q2_K,
|
||||
" 2.67G, +0.8698 ppl @ 7B - smallest, extreme quality loss - not recommended",
|
||||
},
|
||||
{
|
||||
"Q3_K",
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_M,
|
||||
"alias for Q3_K_M"
|
||||
},
|
||||
{
|
||||
"Q3_K_S",
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_S,
|
||||
" 2.75G, +0.5505 ppl @ 7B - very small, very high quality loss",
|
||||
},
|
||||
{
|
||||
"Q3_K_M",
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_M,
|
||||
" 3.06G, +0.2437 ppl @ 7B - very small, very high quality loss",
|
||||
},
|
||||
{
|
||||
"Q3_K_L",
|
||||
LLAMA_FTYPE_MOSTLY_Q3_K_L,
|
||||
" 3.35G, +0.1803 ppl @ 7B - small, substantial quality loss",
|
||||
},
|
||||
{
|
||||
"Q4_K",
|
||||
LLAMA_FTYPE_MOSTLY_Q4_K_M,
|
||||
"alias for Q4_K_M",
|
||||
},
|
||||
{
|
||||
"Q4_K_S",
|
||||
LLAMA_FTYPE_MOSTLY_Q4_K_S,
|
||||
" 3.56G, +0.1149 ppl @ 7B - small, significant quality loss",
|
||||
},
|
||||
{
|
||||
"Q4_K_M",
|
||||
LLAMA_FTYPE_MOSTLY_Q4_K_M,
|
||||
" 3.80G, +0.0535 ppl @ 7B - medium, balanced quality - *recommended*",
|
||||
},
|
||||
{
|
||||
"Q5_K",
|
||||
LLAMA_FTYPE_MOSTLY_Q5_K_M,
|
||||
"alias for Q5_K_M",
|
||||
},
|
||||
{
|
||||
"Q5_K_S",
|
||||
LLAMA_FTYPE_MOSTLY_Q5_K_S,
|
||||
" 4.33G, +0.0353 ppl @ 7B - large, low quality loss - *recommended*",
|
||||
},
|
||||
{
|
||||
"Q5_K_M",
|
||||
LLAMA_FTYPE_MOSTLY_Q5_K_M,
|
||||
" 4.45G, +0.0142 ppl @ 7B - large, very low quality loss - *recommended*",
|
||||
},
|
||||
{
|
||||
"Q6_K",
|
||||
LLAMA_FTYPE_MOSTLY_Q6_K,
|
||||
" 5.15G, +0.0044 ppl @ 7B - very large, extremely low quality loss",
|
||||
},
|
||||
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.67G, +0.8698 ppl @ 7B", },
|
||||
{ "Q3_K", LLAMA_FTYPE_MOSTLY_Q3_K_M, "alias for Q3_K_M" },
|
||||
{ "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G, +0.5505 ppl @ 7B", },
|
||||
{ "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.06G, +0.2437 ppl @ 7B", },
|
||||
{ "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1803 ppl @ 7B", },
|
||||
{ "Q4_K", LLAMA_FTYPE_MOSTLY_Q4_K_M, "alias for Q4_K_M", },
|
||||
{ "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S, " 3.56G, +0.1149 ppl @ 7B", },
|
||||
{ "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M, " 3.80G, +0.0535 ppl @ 7B", },
|
||||
{ "Q5_K", LLAMA_FTYPE_MOSTLY_Q5_K_M, "alias for Q5_K_M", },
|
||||
{ "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S, " 4.33G, +0.0353 ppl @ 7B", },
|
||||
{ "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0142 ppl @ 7B", },
|
||||
{ "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, +0.0044 ppl @ 7B", },
|
||||
#endif
|
||||
{
|
||||
"Q8_0",
|
||||
LLAMA_FTYPE_MOSTLY_Q8_0,
|
||||
" 6.70G, +0.0004 ppl @ 7B - very large, extremely low quality loss - not recommended",
|
||||
},
|
||||
{
|
||||
"F16",
|
||||
LLAMA_FTYPE_MOSTLY_F16,
|
||||
"13.00G @ 7B - extremely large, virtually no quality loss - not recommended",
|
||||
},
|
||||
{
|
||||
"F32",
|
||||
LLAMA_FTYPE_ALL_F32,
|
||||
"26.00G @ 7B - absolutely huge, lossless - not recommended",
|
||||
},
|
||||
{ "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ 7B", },
|
||||
{ "F16", LLAMA_FTYPE_MOSTLY_F16, "13.00G @ 7B", },
|
||||
{ "F32", LLAMA_FTYPE_ALL_F32, "26.00G @ 7B", },
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
set(TARGET save-load-state)
|
||||
add_executable(${TARGET} save-load-state.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
|
|
@ -2,6 +2,7 @@ set(TARGET server)
|
|||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
add_executable(${TARGET} server.cpp json.hpp httplib.h)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_compile_definitions(${TARGET} PRIVATE
|
||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||
)
|
||||
|
|
|
@ -66,6 +66,7 @@ Using [curl](https://curl.se/). On Windows `curl.exe` should be available in the
|
|||
```sh
|
||||
curl --request POST \
|
||||
--url http://localhost:8080/completion \
|
||||
--header "Content-Type: application/json" \
|
||||
--data '{"prompt": "Building a website can be done in 10 simple steps:","n_predict": 128}'
|
||||
```
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ tokenize() {
|
|||
--silent \
|
||||
--request POST \
|
||||
--url "${API_URL}/tokenize" \
|
||||
--header "Content-Type: application/json" \
|
||||
--data-raw "$(jq -ns --arg content "$1" '{content:$content}')" \
|
||||
| jq '.tokens[]'
|
||||
}
|
||||
|
@ -64,6 +65,7 @@ chat_completion() {
|
|||
--no-buffer \
|
||||
--request POST \
|
||||
--url "${API_URL}/completion" \
|
||||
--header "Content-Type: application/json" \
|
||||
--data-raw "${DATA}")
|
||||
|
||||
printf "\n"
|
||||
|
|
|
@ -611,6 +611,8 @@ static void server_print_usage(const char * argv0, const gpt_params & params,
|
|||
fprintf(stderr, " -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
|
||||
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
|
||||
fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
|
||||
fprintf(stderr, " --rope-freq-base N RoPE base frequency (default: %.1f)\n", params.rope_freq_base);
|
||||
fprintf(stderr, " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale);
|
||||
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
|
||||
fprintf(stderr, " not recommended: doubles context memory required and no measurable increase in quality\n");
|
||||
|
@ -698,6 +700,18 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
|
|||
break;
|
||||
}
|
||||
params.n_ctx = std::stoi(argv[i]);
|
||||
} else if (arg == "--rope-freq-base") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.rope_freq_base = std::stof(argv[i]);
|
||||
} else if (arg == "--rope-freq-scale") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.rope_freq_scale = std::stof(argv[i]);
|
||||
} else if (arg == "--memory-f32" || arg == "--memory_f32") {
|
||||
params.memory_f16 = false;
|
||||
} else if (arg == "--threads" || arg == "-t") {
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
set(TARGET simple)
|
||||
add_executable(${TARGET} simple.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
if(TARGET BUILD_INFO)
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
set(TARGET train-text-from-scratch)
|
||||
add_executable(${TARGET} train-text-from-scratch.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue