check C++ code with -Wmissing-declarations
This commit is contained in:
parent
feea179e9f
commit
cd27e8ab32
23 changed files with 157 additions and 92 deletions
|
@ -427,6 +427,7 @@ if (LLAMA_ALL_WARNINGS)
|
|||
-Wextra
|
||||
-Wpedantic
|
||||
-Wcast-qual
|
||||
-Wmissing-declarations
|
||||
-Wno-unused-function
|
||||
-Wno-multichar
|
||||
)
|
||||
|
|
2
Makefile
2
Makefile
|
@ -172,7 +172,7 @@ endif # LLAMA_DISABLE_LOGS
|
|||
# warnings
|
||||
MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
|
||||
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
|
||||
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
|
||||
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
|
||||
|
||||
ifeq '' '$(findstring clang,$(shell $(CXX) --version))'
|
||||
# g++ only
|
||||
|
|
|
@ -78,7 +78,7 @@ int32_t get_num_physical_cores() {
|
|||
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||
}
|
||||
|
||||
void process_escapes(std::string& input) {
|
||||
static void process_escapes(std::string& input) {
|
||||
std::size_t input_len = input.length();
|
||||
std::size_t output_idx = 0;
|
||||
|
||||
|
|
|
@ -158,7 +158,7 @@ namespace console {
|
|||
}
|
||||
}
|
||||
|
||||
char32_t getchar32() {
|
||||
static char32_t getchar32() {
|
||||
#if defined(_WIN32)
|
||||
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
|
||||
wchar_t high_surrogate = 0;
|
||||
|
@ -212,7 +212,7 @@ namespace console {
|
|||
#endif
|
||||
}
|
||||
|
||||
void pop_cursor() {
|
||||
static void pop_cursor() {
|
||||
#if defined(_WIN32)
|
||||
if (hConsole != NULL) {
|
||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||
|
@ -233,7 +233,7 @@ namespace console {
|
|||
putc('\b', out);
|
||||
}
|
||||
|
||||
int estimateWidth(char32_t codepoint) {
|
||||
static int estimateWidth(char32_t codepoint) {
|
||||
#if defined(_WIN32)
|
||||
(void)codepoint;
|
||||
return 1;
|
||||
|
@ -242,7 +242,7 @@ namespace console {
|
|||
#endif
|
||||
}
|
||||
|
||||
int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
|
||||
static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
|
||||
#if defined(_WIN32)
|
||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||
if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
|
||||
|
@ -303,7 +303,7 @@ namespace console {
|
|||
#endif
|
||||
}
|
||||
|
||||
void replace_last(char ch) {
|
||||
static void replace_last(char ch) {
|
||||
#if defined(_WIN32)
|
||||
pop_cursor();
|
||||
put_codepoint(&ch, 1, 1);
|
||||
|
@ -312,7 +312,7 @@ namespace console {
|
|||
#endif
|
||||
}
|
||||
|
||||
void append_utf8(char32_t ch, std::string & out) {
|
||||
static void append_utf8(char32_t ch, std::string & out) {
|
||||
if (ch <= 0x7F) {
|
||||
out.push_back(static_cast<unsigned char>(ch));
|
||||
} else if (ch <= 0x7FF) {
|
||||
|
@ -333,7 +333,7 @@ namespace console {
|
|||
}
|
||||
|
||||
// Helper function to remove the last UTF-8 character from a string
|
||||
void pop_back_utf8_char(std::string & line) {
|
||||
static void pop_back_utf8_char(std::string & line) {
|
||||
if (line.empty()) {
|
||||
return;
|
||||
}
|
||||
|
@ -349,7 +349,7 @@ namespace console {
|
|||
line.erase(pos);
|
||||
}
|
||||
|
||||
bool readline_advanced(std::string & line, bool multiline_input) {
|
||||
static bool readline_advanced(std::string & line, bool multiline_input) {
|
||||
if (out != stdout) {
|
||||
fflush(stdout);
|
||||
}
|
||||
|
@ -452,7 +452,7 @@ namespace console {
|
|||
return has_more;
|
||||
}
|
||||
|
||||
bool readline_simple(std::string & line, bool multiline_input) {
|
||||
static bool readline_simple(std::string & line, bool multiline_input) {
|
||||
#if defined(_WIN32)
|
||||
std::wstring wline;
|
||||
if (!std::getline(std::wcin, wline)) {
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
namespace grammar_parser {
|
||||
// NOTE: assumes valid utf8 (but checks for overrun)
|
||||
// copied from llama.cpp
|
||||
std::pair<uint32_t, const char *> decode_utf8(const char * src) {
|
||||
static auto decode_utf8(const char * src) -> std::pair<uint32_t, const char *> {
|
||||
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
||||
uint8_t first_byte = static_cast<uint8_t>(*src);
|
||||
uint8_t highbits = first_byte >> 4;
|
||||
|
@ -24,19 +24,19 @@ namespace grammar_parser {
|
|||
return std::make_pair(value, pos);
|
||||
}
|
||||
|
||||
uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
||||
static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||
auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
|
||||
return result.first->second;
|
||||
}
|
||||
|
||||
uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
||||
static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
||||
return next_id;
|
||||
}
|
||||
|
||||
void add_rule(
|
||||
static void add_rule(
|
||||
parse_state & state,
|
||||
uint32_t rule_id,
|
||||
const std::vector<llama_grammar_element> & rule) {
|
||||
|
@ -46,11 +46,11 @@ namespace grammar_parser {
|
|||
state.rules[rule_id] = rule;
|
||||
}
|
||||
|
||||
bool is_word_char(char c) {
|
||||
static bool is_word_char(char c) {
|
||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
|
||||
}
|
||||
|
||||
std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
|
||||
static auto parse_hex(const char * src, int size) -> std::pair<uint32_t, const char *> {
|
||||
const char * pos = src;
|
||||
const char * end = src + size;
|
||||
uint32_t value = 0;
|
||||
|
@ -73,7 +73,7 @@ namespace grammar_parser {
|
|||
return std::make_pair(value, pos);
|
||||
}
|
||||
|
||||
const char * parse_space(const char * src, bool newline_ok) {
|
||||
static const char * parse_space(const char * src, bool newline_ok) {
|
||||
const char * pos = src;
|
||||
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
||||
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
||||
|
@ -88,7 +88,7 @@ namespace grammar_parser {
|
|||
return pos;
|
||||
}
|
||||
|
||||
const char * parse_name(const char * src) {
|
||||
static const char * parse_name(const char * src) {
|
||||
const char * pos = src;
|
||||
while (is_word_char(*pos)) {
|
||||
pos++;
|
||||
|
@ -99,7 +99,7 @@ namespace grammar_parser {
|
|||
return pos;
|
||||
}
|
||||
|
||||
std::pair<uint32_t, const char *> parse_char(const char * src) {
|
||||
static auto parse_char(const char * src) -> std::pair<uint32_t, const char *> {
|
||||
if (*src == '\\') {
|
||||
switch (src[1]) {
|
||||
case 'x': return parse_hex(src + 2, 2);
|
||||
|
@ -129,7 +129,7 @@ namespace grammar_parser {
|
|||
uint32_t rule_id,
|
||||
bool is_nested);
|
||||
|
||||
const char * parse_sequence(
|
||||
static const char * parse_sequence(
|
||||
parse_state & state,
|
||||
const char * src,
|
||||
const std::string & rule_name,
|
||||
|
@ -247,7 +247,7 @@ namespace grammar_parser {
|
|||
return pos;
|
||||
}
|
||||
|
||||
const char * parse_rule(parse_state & state, const char * src) {
|
||||
static const char * parse_rule(parse_state & state, const char * src) {
|
||||
const char * name_end = parse_name(src);
|
||||
const char * pos = parse_space(name_end, false);
|
||||
size_t name_len = name_end - src;
|
||||
|
@ -285,7 +285,7 @@ namespace grammar_parser {
|
|||
}
|
||||
}
|
||||
|
||||
void print_grammar_char(FILE * file, uint32_t c) {
|
||||
static void print_grammar_char(FILE * file, uint32_t c) {
|
||||
if (0x20 <= c && c <= 0x7f) {
|
||||
fprintf(file, "%c", static_cast<char>(c));
|
||||
} else {
|
||||
|
@ -294,7 +294,7 @@ namespace grammar_parser {
|
|||
}
|
||||
}
|
||||
|
||||
bool is_char_element(llama_grammar_element elem) {
|
||||
static bool is_char_element(llama_grammar_element elem) {
|
||||
switch (elem.type) {
|
||||
case LLAMA_GRETYPE_CHAR: return true;
|
||||
case LLAMA_GRETYPE_CHAR_NOT: return true;
|
||||
|
@ -304,7 +304,7 @@ namespace grammar_parser {
|
|||
}
|
||||
}
|
||||
|
||||
void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
|
||||
static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
|
||||
for (auto elem : rule) {
|
||||
switch (elem.type) {
|
||||
case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
|
||||
|
@ -334,7 +334,7 @@ namespace grammar_parser {
|
|||
fprintf(file, "\n");
|
||||
}
|
||||
|
||||
void print_rule(
|
||||
static void print_rule(
|
||||
FILE * file,
|
||||
uint32_t rule_id,
|
||||
const std::vector<llama_grammar_element> & rule,
|
||||
|
|
|
@ -9,11 +9,13 @@
|
|||
#endif
|
||||
|
||||
#ifdef LLAMA_DEFAULT_RMS_EPS
|
||||
static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
||||
constexpr float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
||||
#else
|
||||
static const float rms_norm_eps = 5e-6f;
|
||||
constexpr float rms_norm_eps = 5e-6f;
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
float frand() {
|
||||
return (float)rand()/(float)RAND_MAX;
|
||||
}
|
||||
|
@ -1504,6 +1506,8 @@ struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_t
|
|||
ggml_new_f32(ctx, eps)))))));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 1) {
|
||||
fprintf(stderr, "usage: %s\n", argv[0]);
|
||||
|
|
|
@ -25,6 +25,8 @@
|
|||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
// Used for debugging to print out beam tokens.
|
||||
struct ostream_beam_view {
|
||||
llama_context * ctx;
|
||||
|
@ -82,6 +84,8 @@ void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_stat
|
|||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
gpt_params params;
|
||||
|
|
|
@ -115,6 +115,8 @@ struct TransformerWeights {
|
|||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
|
||||
// we calloc instead of malloc to keep valgrind happy
|
||||
w->token_embedding_table = new float[p->vocab_size * p->dim]();
|
||||
|
@ -444,7 +446,7 @@ __attribute__((format(gnu_printf, 1, 2)))
|
|||
__attribute__((format(printf, 1, 2)))
|
||||
#endif
|
||||
#endif
|
||||
static std::string format(const char * fmt, ...) {
|
||||
std::string format(const char * fmt, ...) {
|
||||
va_list ap, ap2;
|
||||
va_start(ap, fmt);
|
||||
va_copy(ap2, ap);
|
||||
|
@ -540,7 +542,7 @@ bool is_ggml_file(const char *filename) {
|
|||
return magic == GGUF_MAGIC;
|
||||
}
|
||||
|
||||
static std::string llama_escape_whitespaces(const std::string& text) {
|
||||
std::string llama_escape_whitespaces(const std::string& text) {
|
||||
std::ostringstream out;
|
||||
for (char c : text) {
|
||||
if (c == ' ') out << "\xe2\x96\x81";
|
||||
|
@ -909,6 +911,8 @@ std::string basename(const std::string &path) {
|
|||
return path.substr(pos + 1);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
struct train_params params = get_default_train_params();
|
||||
if (!params_parse(argc, argv, ¶ms)) {
|
||||
|
|
|
@ -13,8 +13,10 @@
|
|||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename T>
|
||||
static std::string to_string(const T & val) {
|
||||
std::string to_string(const T & val) {
|
||||
std::stringstream ss;
|
||||
ss << val;
|
||||
return ss.str();
|
||||
|
@ -227,6 +229,8 @@ bool gguf_ex_read_1(const std::string & fname) {
|
|||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 3) {
|
||||
printf("usage: %s data.gguf r|w\n", argv[0]);
|
||||
|
|
|
@ -33,13 +33,15 @@
|
|||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
static llama_context ** g_ctx;
|
||||
static llama_model ** g_model;
|
||||
static gpt_params * g_params;
|
||||
static std::vector<llama_token> * g_input_tokens;
|
||||
static std::ostringstream * g_output_ss;
|
||||
static std::vector<llama_token> * g_output_tokens;
|
||||
static bool is_interacting = false;
|
||||
namespace {
|
||||
|
||||
llama_context ** g_ctx;
|
||||
llama_model ** g_model;
|
||||
gpt_params * g_params;
|
||||
std::vector<llama_token> * g_input_tokens;
|
||||
std::ostringstream * g_output_ss;
|
||||
std::vector<llama_token> * g_output_tokens;
|
||||
bool is_interacting = false;
|
||||
|
||||
void write_logfile(
|
||||
const llama_context * ctx, const gpt_params & params, const llama_model * model,
|
||||
|
@ -101,6 +103,8 @@ void sigint_handler(int signo) {
|
|||
}
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
g_params = ¶ms;
|
||||
|
|
|
@ -28,6 +28,8 @@ struct results_log_softmax {
|
|||
float prob;
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
void write_logfile(const llama_context * ctx, const gpt_params & params,
|
||||
const llama_model * model, const struct results_perplexity & results) {
|
||||
|
||||
|
@ -651,6 +653,8 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
|||
printf("\n");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
|
||||
|
|
|
@ -34,8 +34,8 @@ struct quantize_stats_params {
|
|||
std::vector<enum ggml_type> include_types;
|
||||
};
|
||||
|
||||
const size_t HISTOGRAM_BUCKETS = 150;
|
||||
const double HISTOGRAM_RANGE = 0.03;
|
||||
constexpr size_t HISTOGRAM_BUCKETS = 150;
|
||||
constexpr double HISTOGRAM_RANGE = 0.03;
|
||||
|
||||
struct error_stats {
|
||||
size_t num_samples;
|
||||
|
@ -44,6 +44,7 @@ struct error_stats {
|
|||
uint64_t error_histogram[HISTOGRAM_BUCKETS];
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
void quantize_stats_print_usage(int /*argc*/, char ** argv) {
|
||||
quantize_stats_params params;
|
||||
|
@ -133,7 +134,7 @@ void print_error_stats(const std::string & name, const error_stats & stats, bool
|
|||
}
|
||||
|
||||
// copied from ggml.h - verify that we can access this as a flat array
|
||||
static bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
|
||||
bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return
|
||||
|
@ -238,6 +239,8 @@ void test_roundtrip_on_layer(
|
|||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
ggml_time_init();
|
||||
|
||||
|
|
|
@ -7,13 +7,15 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace {
|
||||
|
||||
struct quant_option {
|
||||
std::string name;
|
||||
llama_ftype ftype;
|
||||
std::string desc;
|
||||
};
|
||||
|
||||
static const std::vector<struct quant_option> QUANT_OPTIONS = {
|
||||
const std::vector<struct quant_option> QUANT_OPTIONS = {
|
||||
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", },
|
||||
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", },
|
||||
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", },
|
||||
|
@ -88,6 +90,8 @@ void usage(const char * executable) {
|
|||
exit(1);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
if (argc < 3) {
|
||||
usage(argv[0]);
|
||||
|
|
|
@ -26,6 +26,8 @@
|
|||
using namespace httplib;
|
||||
using json = nlohmann::json;
|
||||
|
||||
namespace {
|
||||
|
||||
struct server_params
|
||||
{
|
||||
std::string hostname = "127.0.0.1";
|
||||
|
@ -48,7 +50,7 @@ struct completion_token_output
|
|||
llama_token tok;
|
||||
};
|
||||
|
||||
static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
|
||||
size_t common_part(const std::vector<llama_token> & a, const std::vector<llama_token> & b)
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++)
|
||||
|
@ -63,14 +65,13 @@ enum stop_type
|
|||
STOP_PARTIAL,
|
||||
};
|
||||
|
||||
static bool ends_with(const std::string &str, const std::string &suffix)
|
||||
bool ends_with(const std::string & str, const std::string & suffix)
|
||||
{
|
||||
return str.size() >= suffix.size() &&
|
||||
0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
|
||||
}
|
||||
|
||||
static size_t find_partial_stop_string(const std::string &stop,
|
||||
const std::string &text)
|
||||
size_t find_partial_stop_string(const std::string & stop, const std::string & text)
|
||||
{
|
||||
if (!text.empty() && !stop.empty())
|
||||
{
|
||||
|
@ -91,7 +92,7 @@ static size_t find_partial_stop_string(const std::string &stop,
|
|||
}
|
||||
|
||||
template <class Iter>
|
||||
static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
||||
std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
||||
{
|
||||
std::string ret;
|
||||
for (; begin != end; ++begin)
|
||||
|
@ -101,9 +102,9 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void server_log(const char *level, const char *function, int line,
|
||||
const char *message, const nlohmann::ordered_json &extra)
|
||||
{
|
||||
void server_log(
|
||||
const char * level, const char * function, int line, const char * message, const nlohmann::ordered_json & extra
|
||||
) {
|
||||
nlohmann::ordered_json log{
|
||||
{"timestamp", time(nullptr)},
|
||||
{"level", level},
|
||||
|
@ -123,7 +124,7 @@ static void server_log(const char *level, const char *function, int line,
|
|||
}
|
||||
|
||||
// format incomplete utf-8 multibyte character for output
|
||||
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
||||
std::string tokens_to_output_formatted_string(const llama_context * ctx, llama_token token)
|
||||
{
|
||||
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
|
||||
// if the size is 1 and first bit is 1, meaning it's a partial character
|
||||
|
@ -139,7 +140,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
|
|||
}
|
||||
|
||||
// convert a vector of completion_token_output to json
|
||||
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs)
|
||||
json probs_vector_to_json(const llama_context * ctx, const std::vector<completion_token_output> & probs)
|
||||
{
|
||||
json out = json::array();
|
||||
for (const auto &prob : probs)
|
||||
|
@ -162,7 +163,7 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
|
|||
return out;
|
||||
}
|
||||
|
||||
static bool server_verbose = false;
|
||||
bool server_verbose = false;
|
||||
|
||||
#if SERVER_VERBOSE != 1
|
||||
#define LOG_VERBOSE(MSG, ...)
|
||||
|
@ -691,8 +692,7 @@ struct llama_server_context
|
|||
}
|
||||
};
|
||||
|
||||
static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
||||
const server_params &sparams)
|
||||
void server_print_usage(const char * argv0, const gpt_params & params, const server_params & sparams)
|
||||
{
|
||||
printf("usage: %s [options]\n", argv0);
|
||||
printf("\n");
|
||||
|
@ -740,8 +740,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
|||
printf("\n");
|
||||
}
|
||||
|
||||
static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||
gpt_params ¶ms)
|
||||
void server_params_parse(int argc, char ** argv, server_params & sparams, gpt_params & params)
|
||||
{
|
||||
gpt_params default_params;
|
||||
server_params default_sparams;
|
||||
|
@ -995,7 +994,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||
}
|
||||
}
|
||||
|
||||
static json format_generation_settings(llama_server_context &llama)
|
||||
json format_generation_settings(llama_server_context & llama)
|
||||
{
|
||||
const auto eos_bias = llama.params.logit_bias.find(llama_token_eos(llama.ctx));
|
||||
const bool ignore_eos = eos_bias != llama.params.logit_bias.end() &&
|
||||
|
@ -1029,14 +1028,14 @@ static json format_generation_settings(llama_server_context &llama)
|
|||
};
|
||||
}
|
||||
|
||||
static json format_embedding_response(llama_server_context &llama)
|
||||
json format_embedding_response(llama_server_context & llama)
|
||||
{
|
||||
return json{
|
||||
{"embedding", llama.getEmbedding()},
|
||||
};
|
||||
}
|
||||
|
||||
static json format_timings(llama_server_context &llama)
|
||||
json format_timings(llama_server_context & llama)
|
||||
{
|
||||
const auto timings = llama_get_timings(llama.ctx);
|
||||
|
||||
|
@ -1055,8 +1054,9 @@ static json format_timings(llama_server_context &llama)
|
|||
};
|
||||
}
|
||||
|
||||
static json format_final_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
|
||||
{
|
||||
json format_final_response(
|
||||
llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
|
||||
) {
|
||||
|
||||
json res = json{
|
||||
{"content", content},
|
||||
|
@ -1083,8 +1083,9 @@ static json format_final_response(llama_server_context &llama, const std::string
|
|||
return res;
|
||||
}
|
||||
|
||||
static json format_partial_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
|
||||
{
|
||||
json format_partial_response(
|
||||
llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
|
||||
) {
|
||||
json res = json{
|
||||
{"content", content},
|
||||
{"stop", false},
|
||||
|
@ -1098,20 +1099,20 @@ static json format_partial_response(llama_server_context &llama, const std::stri
|
|||
return res;
|
||||
}
|
||||
|
||||
static json format_tokenizer_response(const std::vector<llama_token> &tokens)
|
||||
json format_tokenizer_response(const std::vector<llama_token> & tokens)
|
||||
{
|
||||
return json{
|
||||
{"tokens", tokens}};
|
||||
}
|
||||
|
||||
static json format_detokenized_response(std::string content)
|
||||
json format_detokenized_response(std::string content)
|
||||
{
|
||||
return json{
|
||||
{"content", content}};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static T json_value(const json &body, const std::string &key, const T &default_value)
|
||||
T json_value(const json & body, const std::string & key, const T & default_value)
|
||||
{
|
||||
// Fallback null to default value
|
||||
return body.contains(key) && !body.at(key).is_null()
|
||||
|
@ -1119,7 +1120,7 @@ static T json_value(const json &body, const std::string &key, const T &default_v
|
|||
: default_value;
|
||||
}
|
||||
|
||||
static void parse_options_completion(const json &body, llama_server_context &llama)
|
||||
void parse_options_completion(const json & body, llama_server_context & llama)
|
||||
{
|
||||
gpt_params default_params;
|
||||
|
||||
|
@ -1198,7 +1199,7 @@ static void parse_options_completion(const json &body, llama_server_context &lla
|
|||
LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama));
|
||||
}
|
||||
|
||||
static void log_server_request(const Request &req, const Response &res)
|
||||
void log_server_request(const Request & req, const Response & res)
|
||||
{
|
||||
LOG_INFO("request", {
|
||||
{"remote_addr", req.remote_addr},
|
||||
|
@ -1271,6 +1272,8 @@ void append_to_generated_text_from_generated_token_probs(llama_server_context &
|
|||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// own arguments required by this example
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
struct random_normal_distribution {
|
||||
std::mt19937 gen;
|
||||
std::normal_distribution<float> rd;
|
||||
|
@ -444,11 +446,11 @@ void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int6
|
|||
GGML_ASSERT(tensor->ne[3] == ne3);
|
||||
}
|
||||
|
||||
static size_t hash(void * p) {
|
||||
size_t hash(void * p) {
|
||||
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
||||
}
|
||||
|
||||
static size_t hash_find(void * hash_table[], void * p) {
|
||||
size_t hash_find(void * hash_table[], void * p) {
|
||||
size_t h = hash(p);
|
||||
|
||||
// linear probing
|
||||
|
@ -463,7 +465,7 @@ static size_t hash_find(void * hash_table[], void * p) {
|
|||
return i;
|
||||
}
|
||||
|
||||
static bool hash_insert(void * hash_table[], void * p) {
|
||||
bool hash_insert(void * hash_table[], void * p) {
|
||||
//size_t h = hash(p);
|
||||
size_t i = hash_find(hash_table, p);
|
||||
|
||||
|
@ -479,7 +481,7 @@ static bool hash_insert(void * hash_table[], void * p) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool hash_contains(void * hash_table[], void * p) {
|
||||
bool hash_contains(void * hash_table[], void * p) {
|
||||
size_t i = hash_find(hash_table, p);
|
||||
return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
|
||||
}
|
||||
|
@ -488,7 +490,6 @@ struct hash_map {
|
|||
void * keys[GGML_GRAPH_HASHTABLE_SIZE];
|
||||
void * vals[GGML_GRAPH_HASHTABLE_SIZE];
|
||||
};
|
||||
//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
|
||||
|
||||
struct hash_map * new_hash_map() {
|
||||
struct hash_map * result = new struct hash_map;
|
||||
|
@ -503,12 +504,12 @@ void free_hash_map(struct hash_map * map) {
|
|||
delete map;
|
||||
}
|
||||
|
||||
static bool ggml_is_view(struct ggml_tensor * t) {
|
||||
bool ggml_is_view(struct ggml_tensor * t) {
|
||||
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
|
||||
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
|
||||
}
|
||||
|
||||
static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
|
||||
struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
|
||||
switch (t->op) {
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_RESHAPE:
|
||||
|
@ -522,7 +523,7 @@ static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
|
|||
}
|
||||
}
|
||||
|
||||
static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
|
||||
struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
|
||||
struct ggml_tensor * parent = t;
|
||||
do {
|
||||
parent = get_view_parent(parent);
|
||||
|
@ -1988,6 +1989,8 @@ void opt_callback(void * vdata, float * sched) {
|
|||
data->shuffle_countdown -= n_batch;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
struct train_params params = get_default_train_params();
|
||||
|
||||
|
|
19
llama.cpp
19
llama.cpp
|
@ -1,3 +1,4 @@
|
|||
#define LLAMA_API_INTERNAL
|
||||
#include "llama.h"
|
||||
|
||||
#include "ggml.h"
|
||||
|
@ -108,7 +109,7 @@ static size_t utf8_len(char src) {
|
|||
return lookup[highbits];
|
||||
}
|
||||
|
||||
void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
||||
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
||||
std::string result;
|
||||
for (size_t pos = 0; ; pos += search.length()) {
|
||||
auto new_pos = s.find(search, pos);
|
||||
|
@ -1560,7 +1561,7 @@ struct llama_model_loader {
|
|||
// load LLaMA models
|
||||
//
|
||||
|
||||
std::string llama_model_ftype_name(enum llama_ftype ftype) {
|
||||
static std::string llama_model_ftype_name(enum llama_ftype ftype) {
|
||||
if (ftype & LLAMA_FTYPE_GUESSED) {
|
||||
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
|
||||
}
|
||||
|
@ -3945,7 +3946,7 @@ struct llama_grammar_candidate {
|
|||
|
||||
// Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
|
||||
// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
|
||||
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
|
||||
static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
|
||||
const char * src,
|
||||
llama_partial_utf8 partial_start) {
|
||||
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
|
||||
|
@ -5526,7 +5527,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||
}
|
||||
|
||||
// TODO: after the GGUF PR, this likely won't work and needs to be updated
|
||||
int llama_apply_lora_from_file_internal(const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads) {
|
||||
static int llama_apply_lora_from_file_internal(
|
||||
const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads
|
||||
) {
|
||||
LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
|
||||
|
||||
const int64_t t_start_lora_us = ggml_time_us();
|
||||
|
@ -6073,7 +6076,7 @@ struct llama_context * llama_new_context_with_model(
|
|||
return ctx;
|
||||
}
|
||||
|
||||
struct llama_context * llama_init_from_file(
|
||||
static struct llama_context * llama_init_from_file(
|
||||
const char * path_model,
|
||||
struct llama_context_params params) {
|
||||
struct llama_model * model = llama_load_model_from_file(path_model, params);
|
||||
|
@ -6278,7 +6281,7 @@ struct llama_data_file_context : llama_data_context {
|
|||
* llama_copy_state_data(ctx, &data_ctx);
|
||||
*
|
||||
*/
|
||||
void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
|
||||
static void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
|
||||
// copy rng
|
||||
{
|
||||
std::stringstream rng_ss;
|
||||
|
@ -6816,7 +6819,9 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
|
|||
}
|
||||
|
||||
// For internal test use
|
||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
|
||||
auto llama_internal_get_tensor_map(struct llama_context * ctx)
|
||||
-> const std::vector<std::pair<std::string, struct ggml_tensor *>> &
|
||||
{
|
||||
return ctx->model.tensors_by_name;
|
||||
}
|
||||
|
||||
|
|
3
llama.h
3
llama.h
|
@ -540,7 +540,8 @@ extern "C" {
|
|||
|
||||
struct ggml_tensor;
|
||||
|
||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
|
||||
auto llama_internal_get_tensor_map(struct llama_context * ctx)
|
||||
-> const std::vector<std::pair<std::string, struct ggml_tensor *>> &;
|
||||
|
||||
#endif // LLAMA_API_INTERNAL
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
constexpr int kVecSize = 1 << 18;
|
||||
|
||||
namespace {
|
||||
|
||||
float drawFromGaussianPdf(std::mt19937& rndm) {
|
||||
constexpr double kScale = 1./(1. + std::mt19937::max());
|
||||
constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale;
|
||||
|
@ -218,6 +220,8 @@ static void dot_q4_q8(const int n, float* s, const void* vx, const void* vy) {
|
|||
*s = sumf;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
int nloop = argc > 1 ? atoi(argv[1]) : 10;
|
||||
|
|
|
@ -36,6 +36,8 @@
|
|||
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
float frand(void) {
|
||||
return (float)rand()/(float)RAND_MAX;
|
||||
}
|
||||
|
@ -117,6 +119,8 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
|
|||
((float *)t->data)[idx] = value;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(void) {
|
||||
struct ggml_init_params params = {
|
||||
/* .mem_size = */ 1024*1024*1024,
|
||||
|
|
|
@ -13,15 +13,17 @@
|
|||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
|
||||
const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
|
||||
const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
|
||||
const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
|
||||
const float MAX_DOT_PRODUCT_ERROR = 0.02f;
|
||||
constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
|
||||
constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
|
||||
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
|
||||
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
|
||||
constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
const char* RESULT_STR[] = {"ok", "FAILED"};
|
||||
|
||||
|
||||
// Generate synthetic data
|
||||
void generate_data(float offset, size_t n, float * dst) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
|
@ -90,6 +92,8 @@ float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float
|
|||
return fabsf(result - dot_ref) / test_size;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
bool verbose = false;
|
||||
const size_t test_size = 32 * 128;
|
||||
|
|
|
@ -60,6 +60,8 @@ inline int64_t cpu_cycles() {
|
|||
#endif
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
// Generate synthetic data
|
||||
void generate_data(float offset, size_t n, float * dst) {
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
|
@ -137,6 +139,8 @@ void usage(char * argv[]) {
|
|||
printf(" set test iteration number (%d)\n", ITERATIONS);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
quantize_perf_params params {};
|
||||
|
||||
|
|
|
@ -12,6 +12,9 @@
|
|||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
void dump(const llama_token_data_array * candidates) {
|
||||
for (size_t i = 0; i < candidates->size; i++) {
|
||||
printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);
|
||||
|
@ -173,6 +176,8 @@ void test_frequency_presence_penalty(
|
|||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(void) {
|
||||
ggml_time_init();
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
typedef int codepoint;
|
||||
|
||||
std::string codepoint_to_utf8(codepoint cp) {
|
||||
static std::string codepoint_to_utf8(codepoint cp) {
|
||||
std::string result;
|
||||
if (0x00 <= cp && cp <= 0x7f) {
|
||||
result.push_back(cp);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue