Refactor llama-run to split out opt struct
Put it in it's own file Signed-off-by: Eric Curtin <ecurtin@redhat.com>
This commit is contained in:
parent
8d59d91171
commit
ccc6243371
4 changed files with 195 additions and 175 deletions
|
@ -1,5 +1,5 @@
|
||||||
set(TARGET llama-run)
|
set(TARGET llama-run)
|
||||||
add_executable(${TARGET} run.cpp)
|
add_executable(${TARGET} run.cpp opt.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||||
|
|
158
examples/run/opt.cpp
Normal file
158
examples/run/opt.cpp
Normal file
|
@ -0,0 +1,158 @@
|
||||||
|
#include "opt.h"
|
||||||
|
|
||||||
|
#include <cstdarg>
|
||||||
|
|
||||||
|
int printe(const char * fmt, ...) {
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
const int ret = vfprintf(stderr, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Opt::init(int argc, const char ** argv) {
|
||||||
|
ctx_params = llama_context_default_params();
|
||||||
|
model_params = llama_model_default_params();
|
||||||
|
context_size_default = ctx_params.n_batch;
|
||||||
|
ngl_default = model_params.n_gpu_layers;
|
||||||
|
common_params_sampling sampling;
|
||||||
|
temperature_default = sampling.temp;
|
||||||
|
|
||||||
|
if (argc < 2) {
|
||||||
|
printe("Error: No arguments provided.\n");
|
||||||
|
print_help();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse arguments
|
||||||
|
if (parse(argc, argv)) {
|
||||||
|
printe("Error: Failed to parse arguments.\n");
|
||||||
|
print_help();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If help is requested, show help and exit
|
||||||
|
if (help) {
|
||||||
|
print_help();
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
|
||||||
|
ctx_params.n_ctx = ctx_params.n_batch;
|
||||||
|
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
|
||||||
|
temperature = temperature >= 0 ? temperature : temperature_default;
|
||||||
|
|
||||||
|
return 0; // Success
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Opt::parse_flag(const char ** argv, int i, const char * short_opt, const char * long_opt) {
|
||||||
|
return strcmp(argv[i], short_opt) == 0 || strcmp(argv[i], long_opt) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Opt::handle_option_with_value(int argc, const char ** argv, int & i, int & option_value) {
|
||||||
|
if (i + 1 >= argc) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
option_value = std::atoi(argv[++i]);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Opt::handle_option_with_value(int argc, const char ** argv, int & i, float & option_value) {
|
||||||
|
if (i + 1 >= argc) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
option_value = std::atof(argv[++i]);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int Opt::parse(int argc, const char ** argv) {
|
||||||
|
bool options_parsing = true;
|
||||||
|
for (int i = 1, positional_args_i = 0; i < argc; ++i) {
|
||||||
|
if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) {
|
||||||
|
if (handle_option_with_value(argc, argv, i, context_size) == 1) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (options_parsing && (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--ngl") == 0)) {
|
||||||
|
if (handle_option_with_value(argc, argv, i, ngl) == 1) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
|
||||||
|
if (handle_option_with_value(argc, argv, i, temperature) == 1) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else if (options_parsing &&
|
||||||
|
(parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) {
|
||||||
|
verbose = true;
|
||||||
|
} else if (options_parsing && parse_flag(argv, i, "-h", "--help")) {
|
||||||
|
help = true;
|
||||||
|
return 0;
|
||||||
|
} else if (options_parsing && strcmp(argv[i], "--") == 0) {
|
||||||
|
options_parsing = false;
|
||||||
|
} else if (positional_args_i == 0) {
|
||||||
|
if (!argv[i][0] || argv[i][0] == '-') {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
++positional_args_i;
|
||||||
|
model_ = argv[i];
|
||||||
|
} else if (positional_args_i == 1) {
|
||||||
|
++positional_args_i;
|
||||||
|
user = argv[i];
|
||||||
|
} else {
|
||||||
|
user += " " + std::string(argv[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Opt::print_help() const {
|
||||||
|
printf(
|
||||||
|
"Description:\n"
|
||||||
|
" Runs a llm\n"
|
||||||
|
"\n"
|
||||||
|
"Usage:\n"
|
||||||
|
" llama-run [options] model [prompt]\n"
|
||||||
|
"\n"
|
||||||
|
"Options:\n"
|
||||||
|
" -c, --context-size <value>\n"
|
||||||
|
" Context size (default: %d)\n"
|
||||||
|
" -n, --ngl <value>\n"
|
||||||
|
" Number of GPU layers (default: %d)\n"
|
||||||
|
" --temp <value>\n"
|
||||||
|
" Temperature (default: %.1f)\n"
|
||||||
|
" -v, --verbose, --log-verbose\n"
|
||||||
|
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n"
|
||||||
|
" -h, --help\n"
|
||||||
|
" Show help message\n"
|
||||||
|
"\n"
|
||||||
|
"Commands:\n"
|
||||||
|
" model\n"
|
||||||
|
" Model is a string with an optional prefix of \n"
|
||||||
|
" huggingface:// (hf://), ollama://, https:// or file://.\n"
|
||||||
|
" If no protocol is specified and a file exists in the specified\n"
|
||||||
|
" path, file:// is assumed, otherwise if a file does not exist in\n"
|
||||||
|
" the specified path, ollama:// is assumed. Models that are being\n"
|
||||||
|
" pulled are downloaded with .partial extension while being\n"
|
||||||
|
" downloaded and then renamed as the file without the .partial\n"
|
||||||
|
" extension when complete.\n"
|
||||||
|
"\n"
|
||||||
|
"Examples:\n"
|
||||||
|
" llama-run llama3\n"
|
||||||
|
" llama-run ollama://granite-code\n"
|
||||||
|
" llama-run ollama://smollm:135m\n"
|
||||||
|
" llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
|
||||||
|
" llama-run "
|
||||||
|
"huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
|
||||||
|
" llama-run https://example.com/some-file1.gguf\n"
|
||||||
|
" llama-run some-file2.gguf\n"
|
||||||
|
" llama-run file://some-file3.gguf\n"
|
||||||
|
" llama-run --ngl 999 some-file4.gguf\n"
|
||||||
|
" llama-run --ngl 999 some-file5.gguf Hello World\n",
|
||||||
|
context_size_default, ngl_default, temperature_default);
|
||||||
|
}
|
35
examples/run/opt.h
Normal file
35
examples/run/opt.h
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstdlib>
|
||||||
|
#include <cstring>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "llama-cpp.h"
|
||||||
|
|
||||||
|
GGML_ATTRIBUTE_FORMAT(1, 2)
|
||||||
|
int printe(const char * fmt, ...);
|
||||||
|
|
||||||
|
struct Opt {
|
||||||
|
int init(int argc, const char ** argv);
|
||||||
|
|
||||||
|
// Public members
|
||||||
|
llama_context_params ctx_params;
|
||||||
|
llama_model_params model_params;
|
||||||
|
std::string model_;
|
||||||
|
std::string user;
|
||||||
|
int context_size = -1, ngl = -1;
|
||||||
|
float temperature = -1;
|
||||||
|
bool verbose = false;
|
||||||
|
|
||||||
|
int context_size_default = -1, ngl_default = -1;
|
||||||
|
float temperature_default = -1;
|
||||||
|
bool help = false;
|
||||||
|
|
||||||
|
bool parse_flag(const char ** argv, int i, const char * short_opt, const char * long_opt);
|
||||||
|
int handle_option_with_value(int argc, const char ** argv, int & i, int & option_value);
|
||||||
|
int handle_option_with_value(int argc, const char ** argv, int & i, float & option_value);
|
||||||
|
int parse(int argc, const char ** argv);
|
||||||
|
void print_help() const;
|
||||||
|
};
|
|
@ -23,9 +23,8 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common.h"
|
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
#include "llama-cpp.h"
|
#include "opt.h"
|
||||||
|
|
||||||
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
|
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32)
|
||||||
[[noreturn]] static void sigint_handler(int) {
|
[[noreturn]] static void sigint_handler(int) {
|
||||||
|
@ -52,178 +51,6 @@ static std::string fmt(const char * fmt, ...) {
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_ATTRIBUTE_FORMAT(1, 2)
|
|
||||||
static int printe(const char * fmt, ...) {
|
|
||||||
va_list args;
|
|
||||||
va_start(args, fmt);
|
|
||||||
const int ret = vfprintf(stderr, fmt, args);
|
|
||||||
va_end(args);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
class Opt {
|
|
||||||
public:
|
|
||||||
int init(int argc, const char ** argv) {
|
|
||||||
ctx_params = llama_context_default_params();
|
|
||||||
model_params = llama_model_default_params();
|
|
||||||
context_size_default = ctx_params.n_batch;
|
|
||||||
ngl_default = model_params.n_gpu_layers;
|
|
||||||
common_params_sampling sampling;
|
|
||||||
temperature_default = sampling.temp;
|
|
||||||
|
|
||||||
if (argc < 2) {
|
|
||||||
printe("Error: No arguments provided.\n");
|
|
||||||
print_help();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse arguments
|
|
||||||
if (parse(argc, argv)) {
|
|
||||||
printe("Error: Failed to parse arguments.\n");
|
|
||||||
print_help();
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If help is requested, show help and exit
|
|
||||||
if (help) {
|
|
||||||
print_help();
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
|
|
||||||
ctx_params.n_ctx = ctx_params.n_batch;
|
|
||||||
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
|
|
||||||
temperature = temperature >= 0 ? temperature : temperature_default;
|
|
||||||
|
|
||||||
return 0; // Success
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_context_params ctx_params;
|
|
||||||
llama_model_params model_params;
|
|
||||||
std::string model_;
|
|
||||||
std::string user;
|
|
||||||
int context_size = -1, ngl = -1;
|
|
||||||
float temperature = -1;
|
|
||||||
bool verbose = false;
|
|
||||||
|
|
||||||
private:
|
|
||||||
int context_size_default = -1, ngl_default = -1;
|
|
||||||
float temperature_default = -1;
|
|
||||||
bool help = false;
|
|
||||||
|
|
||||||
bool parse_flag(const char ** argv, int i, const char * short_opt, const char * long_opt) {
|
|
||||||
return strcmp(argv[i], short_opt) == 0 || strcmp(argv[i], long_opt) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int handle_option_with_value(int argc, const char ** argv, int & i, int & option_value) {
|
|
||||||
if (i + 1 >= argc) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
option_value = std::atoi(argv[++i]);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int handle_option_with_value(int argc, const char ** argv, int & i, float & option_value) {
|
|
||||||
if (i + 1 >= argc) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
option_value = std::atof(argv[++i]);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int parse(int argc, const char ** argv) {
|
|
||||||
bool options_parsing = true;
|
|
||||||
for (int i = 1, positional_args_i = 0; i < argc; ++i) {
|
|
||||||
if (options_parsing && (strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--context-size") == 0)) {
|
|
||||||
if (handle_option_with_value(argc, argv, i, context_size) == 1) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (options_parsing && (strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--ngl") == 0)) {
|
|
||||||
if (handle_option_with_value(argc, argv, i, ngl) == 1) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
|
|
||||||
if (handle_option_with_value(argc, argv, i, temperature) == 1) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else if (options_parsing &&
|
|
||||||
(parse_flag(argv, i, "-v", "--verbose") || parse_flag(argv, i, "-v", "--log-verbose"))) {
|
|
||||||
verbose = true;
|
|
||||||
} else if (options_parsing && parse_flag(argv, i, "-h", "--help")) {
|
|
||||||
help = true;
|
|
||||||
return 0;
|
|
||||||
} else if (options_parsing && strcmp(argv[i], "--") == 0) {
|
|
||||||
options_parsing = false;
|
|
||||||
} else if (positional_args_i == 0) {
|
|
||||||
if (!argv[i][0] || argv[i][0] == '-') {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
++positional_args_i;
|
|
||||||
model_ = argv[i];
|
|
||||||
} else if (positional_args_i == 1) {
|
|
||||||
++positional_args_i;
|
|
||||||
user = argv[i];
|
|
||||||
} else {
|
|
||||||
user += " " + std::string(argv[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_help() const {
|
|
||||||
printf(
|
|
||||||
"Description:\n"
|
|
||||||
" Runs a llm\n"
|
|
||||||
"\n"
|
|
||||||
"Usage:\n"
|
|
||||||
" llama-run [options] model [prompt]\n"
|
|
||||||
"\n"
|
|
||||||
"Options:\n"
|
|
||||||
" -c, --context-size <value>\n"
|
|
||||||
" Context size (default: %d)\n"
|
|
||||||
" -n, --ngl <value>\n"
|
|
||||||
" Number of GPU layers (default: %d)\n"
|
|
||||||
" --temp <value>\n"
|
|
||||||
" Temperature (default: %.1f)\n"
|
|
||||||
" -v, --verbose, --log-verbose\n"
|
|
||||||
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n"
|
|
||||||
" -h, --help\n"
|
|
||||||
" Show help message\n"
|
|
||||||
"\n"
|
|
||||||
"Commands:\n"
|
|
||||||
" model\n"
|
|
||||||
" Model is a string with an optional prefix of \n"
|
|
||||||
" huggingface:// (hf://), ollama://, https:// or file://.\n"
|
|
||||||
" If no protocol is specified and a file exists in the specified\n"
|
|
||||||
" path, file:// is assumed, otherwise if a file does not exist in\n"
|
|
||||||
" the specified path, ollama:// is assumed. Models that are being\n"
|
|
||||||
" pulled are downloaded with .partial extension while being\n"
|
|
||||||
" downloaded and then renamed as the file without the .partial\n"
|
|
||||||
" extension when complete.\n"
|
|
||||||
"\n"
|
|
||||||
"Examples:\n"
|
|
||||||
" llama-run llama3\n"
|
|
||||||
" llama-run ollama://granite-code\n"
|
|
||||||
" llama-run ollama://smollm:135m\n"
|
|
||||||
" llama-run hf://QuantFactory/SmolLM-135M-GGUF/SmolLM-135M.Q2_K.gguf\n"
|
|
||||||
" llama-run "
|
|
||||||
"huggingface://bartowski/SmolLM-1.7B-Instruct-v0.2-GGUF/SmolLM-1.7B-Instruct-v0.2-IQ3_M.gguf\n"
|
|
||||||
" llama-run https://example.com/some-file1.gguf\n"
|
|
||||||
" llama-run some-file2.gguf\n"
|
|
||||||
" llama-run file://some-file3.gguf\n"
|
|
||||||
" llama-run --ngl 999 some-file4.gguf\n"
|
|
||||||
" llama-run --ngl 999 some-file5.gguf Hello World\n",
|
|
||||||
context_size_default, ngl_default, temperature_default);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct progress_data {
|
struct progress_data {
|
||||||
size_t file_size = 0;
|
size_t file_size = 0;
|
||||||
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
|
std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue