diff --git a/CMakeLists.txt b/CMakeLists.txt index eac5799af..214ede21c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -427,6 +427,7 @@ if (LLAMA_ALL_WARNINGS) -Wextra -Wpedantic -Wcast-qual + -Wmissing-declarations -Wno-unused-function -Wno-multichar ) diff --git a/Makefile b/Makefile index 7ab1b7a09..778acb908 100644 --- a/Makefile +++ b/Makefile @@ -172,7 +172,7 @@ endif # LLAMA_DISABLE_LOGS # warnings MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \ -Wmissing-prototypes -Werror=implicit-int -Wno-unused-function -MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar +MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar ifeq '' '$(findstring clang,$(shell $(CXX) --version))' # g++ only diff --git a/common/common.cpp b/common/common.cpp index afc9b8a55..8cb04c054 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -78,7 +78,7 @@ int32_t get_num_physical_cores() { return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; } -void process_escapes(std::string& input) { +static void process_escapes(std::string& input) { std::size_t input_len = input.length(); std::size_t output_idx = 0; diff --git a/common/console.cpp b/common/console.cpp index 23545e5be..f65cbc6ed 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -158,7 +158,7 @@ namespace console { } } - char32_t getchar32() { + static char32_t getchar32() { #if defined(_WIN32) HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE); wchar_t high_surrogate = 0; @@ -212,7 +212,7 @@ namespace console { #endif } - void pop_cursor() { + static void pop_cursor() { #if defined(_WIN32) if (hConsole != NULL) { CONSOLE_SCREEN_BUFFER_INFO bufferInfo; @@ -233,7 +233,7 @@ namespace console { putc('\b', out); } - int estimateWidth(char32_t codepoint) { + static int estimateWidth(char32_t codepoint) { #if defined(_WIN32) (void)codepoint; return 1; @@ -242,7 +242,7 @@ namespace console { #endif } - int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) { + static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) { #if defined(_WIN32) CONSOLE_SCREEN_BUFFER_INFO bufferInfo; if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) { @@ -303,7 +303,7 @@ namespace console { #endif } - void replace_last(char ch) { + static void replace_last(char ch) { #if defined(_WIN32) pop_cursor(); put_codepoint(&ch, 1, 1); @@ -312,7 +312,7 @@ namespace console { #endif } - void append_utf8(char32_t ch, std::string & out) { + static void append_utf8(char32_t ch, std::string & out) { if (ch <= 0x7F) { out.push_back(static_cast(ch)); } else if (ch <= 0x7FF) { @@ -333,7 +333,7 @@ namespace console { } // Helper function to remove the last UTF-8 character from a string - void pop_back_utf8_char(std::string & line) { + static void pop_back_utf8_char(std::string & line) { if (line.empty()) { return; } @@ -349,7 +349,7 @@ namespace console { line.erase(pos); } - bool readline_advanced(std::string & line, bool multiline_input) { + static bool readline_advanced(std::string & line, bool multiline_input) { if (out != stdout) { fflush(stdout); } @@ -452,7 +452,7 @@ namespace console { return has_more; } - bool readline_simple(std::string & line, bool multiline_input) { + static bool readline_simple(std::string & line, bool multiline_input) { #if defined(_WIN32) std::wstring wline; if (!std::getline(std::wcin, wline)) { diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp index 177d1e3a8..e05d0f8aa 100644 --- a/common/grammar-parser.cpp +++ b/common/grammar-parser.cpp @@ -9,7 +9,7 @@ namespace grammar_parser { // NOTE: assumes valid utf8 (but checks for overrun) // copied from llama.cpp - std::pair decode_utf8(const char * src) { + static auto decode_utf8(const char * src) -> std::pair { static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; uint8_t first_byte = static_cast(*src); uint8_t highbits = first_byte >> 4; @@ -24,19 +24,19 @@ namespace grammar_parser { return std::make_pair(value, pos); } - uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) { + static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) { uint32_t next_id = static_cast(state.symbol_ids.size()); auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id)); return result.first->second; } - uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) { + static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) { uint32_t next_id = static_cast(state.symbol_ids.size()); state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id; return next_id; } - void add_rule( + static void add_rule( parse_state & state, uint32_t rule_id, const std::vector & rule) { @@ -46,11 +46,11 @@ namespace grammar_parser { state.rules[rule_id] = rule; } - bool is_word_char(char c) { + static bool is_word_char(char c) { return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9'); } - std::pair parse_hex(const char * src, int size) { + static auto parse_hex(const char * src, int size) -> std::pair { const char * pos = src; const char * end = src + size; uint32_t value = 0; @@ -73,7 +73,7 @@ namespace grammar_parser { return std::make_pair(value, pos); } - const char * parse_space(const char * src, bool newline_ok) { + static const char * parse_space(const char * src, bool newline_ok) { const char * pos = src; while (*pos == ' ' || *pos == '\t' || *pos == '#' || (newline_ok && (*pos == '\r' || *pos == '\n'))) { @@ -88,7 +88,7 @@ namespace grammar_parser { return pos; } - const char * parse_name(const char * src) { + static const char * parse_name(const char * src) { const char * pos = src; while (is_word_char(*pos)) { pos++; @@ -99,7 +99,7 @@ namespace grammar_parser { return pos; } - std::pair parse_char(const char * src) { + static auto parse_char(const char * src) -> std::pair { if (*src == '\\') { switch (src[1]) { case 'x': return parse_hex(src + 2, 2); @@ -129,7 +129,7 @@ namespace grammar_parser { uint32_t rule_id, bool is_nested); - const char * parse_sequence( + static const char * parse_sequence( parse_state & state, const char * src, const std::string & rule_name, @@ -247,7 +247,7 @@ namespace grammar_parser { return pos; } - const char * parse_rule(parse_state & state, const char * src) { + static const char * parse_rule(parse_state & state, const char * src) { const char * name_end = parse_name(src); const char * pos = parse_space(name_end, false); size_t name_len = name_end - src; @@ -285,7 +285,7 @@ namespace grammar_parser { } } - void print_grammar_char(FILE * file, uint32_t c) { + static void print_grammar_char(FILE * file, uint32_t c) { if (0x20 <= c && c <= 0x7f) { fprintf(file, "%c", static_cast(c)); } else { @@ -294,7 +294,7 @@ namespace grammar_parser { } } - bool is_char_element(llama_grammar_element elem) { + static bool is_char_element(llama_grammar_element elem) { switch (elem.type) { case LLAMA_GRETYPE_CHAR: return true; case LLAMA_GRETYPE_CHAR_NOT: return true; @@ -304,7 +304,7 @@ namespace grammar_parser { } } - void print_rule_binary(FILE * file, const std::vector & rule) { + static void print_rule_binary(FILE * file, const std::vector & rule) { for (auto elem : rule) { switch (elem.type) { case LLAMA_GRETYPE_END: fprintf(file, "END"); break; @@ -334,7 +334,7 @@ namespace grammar_parser { fprintf(file, "\n"); } - void print_rule( + static void print_rule( FILE * file, uint32_t rule_id, const std::vector & rule, diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index a99ece9a6..30a06338a 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -9,11 +9,13 @@ #endif #ifdef LLAMA_DEFAULT_RMS_EPS -static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; +constexpr float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS; #else -static const float rms_norm_eps = 5e-6f; +constexpr float rms_norm_eps = 5e-6f; #endif +namespace { + float frand() { return (float)rand()/(float)RAND_MAX; } @@ -1504,6 +1506,8 @@ struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_t ggml_new_f32(ctx, eps))))))); } +} // namespace + int main(int argc, char ** argv) { if (argc < 1) { fprintf(stderr, "usage: %s\n", argv[0]); diff --git a/examples/beam-search/beam-search.cpp b/examples/beam-search/beam-search.cpp index 6b31aea78..4e685a5e8 100644 --- a/examples/beam-search/beam-search.cpp +++ b/examples/beam-search/beam-search.cpp @@ -25,6 +25,8 @@ #include #endif +namespace { + // Used for debugging to print out beam tokens. struct ostream_beam_view { llama_context * ctx; @@ -82,6 +84,8 @@ void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_stat #endif } +} // namespace + int main(int argc, char ** argv) { gpt_params params; diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp index 293b455d0..c1f3bbb08 100644 --- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp @@ -115,6 +115,8 @@ struct TransformerWeights { } }; +namespace { + void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) { // we calloc instead of malloc to keep valgrind happy w->token_embedding_table = new float[p->vocab_size * p->dim](); @@ -444,7 +446,7 @@ __attribute__((format(gnu_printf, 1, 2))) __attribute__((format(printf, 1, 2))) #endif #endif -static std::string format(const char * fmt, ...) { +std::string format(const char * fmt, ...) { va_list ap, ap2; va_start(ap, fmt); va_copy(ap2, ap); @@ -540,7 +542,7 @@ bool is_ggml_file(const char *filename) { return magic == GGUF_MAGIC; } -static std::string llama_escape_whitespaces(const std::string& text) { +std::string llama_escape_whitespaces(const std::string& text) { std::ostringstream out; for (char c : text) { if (c == ' ') out << "\xe2\x96\x81"; @@ -909,6 +911,8 @@ std::string basename(const std::string &path) { return path.substr(pos + 1); } +} // namespace + int main(int argc, char ** argv) { struct train_params params = get_default_train_params(); if (!params_parse(argc, argv, ¶ms)) { diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp index a34010f10..9601e0f6e 100644 --- a/examples/gguf/gguf.cpp +++ b/examples/gguf/gguf.cpp @@ -13,8 +13,10 @@ #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) +namespace { + template -static std::string to_string(const T & val) { +std::string to_string(const T & val) { std::stringstream ss; ss << val; return ss.str(); @@ -227,6 +229,8 @@ bool gguf_ex_read_1(const std::string & fname) { return true; } +} // namespace + int main(int argc, char ** argv) { if (argc < 3) { printf("usage: %s data.gguf r|w\n", argv[0]); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index baec6ba12..5d1e2c2af 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -33,13 +33,15 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -static llama_context ** g_ctx; -static llama_model ** g_model; -static gpt_params * g_params; -static std::vector * g_input_tokens; -static std::ostringstream * g_output_ss; -static std::vector * g_output_tokens; -static bool is_interacting = false; +namespace { + +llama_context ** g_ctx; +llama_model ** g_model; +gpt_params * g_params; +std::vector * g_input_tokens; +std::ostringstream * g_output_ss; +std::vector * g_output_tokens; +bool is_interacting = false; void write_logfile( const llama_context * ctx, const gpt_params & params, const llama_model * model, @@ -101,6 +103,8 @@ void sigint_handler(int signo) { } #endif +} // namespace + int main(int argc, char ** argv) { gpt_params params; g_params = ¶ms; diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 3a1c8c28d..474ce3158 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -28,6 +28,8 @@ struct results_log_softmax { float prob; }; +namespace { + void write_logfile(const llama_context * ctx, const gpt_params & params, const llama_model * model, const struct results_perplexity & results) { @@ -651,6 +653,8 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) { printf("\n"); } +} // namespace + int main(int argc, char ** argv) { gpt_params params; diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 6ce03ba7b..3d194f7fc 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -34,8 +34,8 @@ struct quantize_stats_params { std::vector include_types; }; -const size_t HISTOGRAM_BUCKETS = 150; -const double HISTOGRAM_RANGE = 0.03; +constexpr size_t HISTOGRAM_BUCKETS = 150; +constexpr double HISTOGRAM_RANGE = 0.03; struct error_stats { size_t num_samples; @@ -44,6 +44,7 @@ struct error_stats { uint64_t error_histogram[HISTOGRAM_BUCKETS]; }; +namespace { void quantize_stats_print_usage(int /*argc*/, char ** argv) { quantize_stats_params params; @@ -133,7 +134,7 @@ void print_error_stats(const std::string & name, const error_stats & stats, bool } // copied from ggml.h - verify that we can access this as a flat array -static bool tensor_is_contiguous(const struct ggml_tensor * tensor) { +bool tensor_is_contiguous(const struct ggml_tensor * tensor) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); return @@ -238,6 +239,8 @@ void test_roundtrip_on_layer( } } +} // namespace + int main(int argc, char ** argv) { ggml_time_init(); diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 1bf182482..85a004946 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -7,13 +7,15 @@ #include #include +namespace { + struct quant_option { std::string name; llama_ftype ftype; std::string desc; }; -static const std::vector QUANT_OPTIONS = { +const std::vector QUANT_OPTIONS = { { "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", }, { "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", }, { "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", }, @@ -88,6 +90,8 @@ void usage(const char * executable) { exit(1); } +} // namespace + int main(int argc, char ** argv) { if (argc < 3) { usage(argv[0]); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 3f3c64650..23ce5fcb7 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -26,6 +26,8 @@ using namespace httplib; using json = nlohmann::json; +namespace { + struct server_params { std::string hostname = "127.0.0.1"; @@ -48,7 +50,7 @@ struct completion_token_output llama_token tok; }; -static size_t common_part(const std::vector &a, const std::vector &b) +size_t common_part(const std::vector & a, const std::vector & b) { size_t i; for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++) @@ -63,14 +65,13 @@ enum stop_type STOP_PARTIAL, }; -static bool ends_with(const std::string &str, const std::string &suffix) +bool ends_with(const std::string & str, const std::string & suffix) { return str.size() >= suffix.size() && 0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix); } -static size_t find_partial_stop_string(const std::string &stop, - const std::string &text) +size_t find_partial_stop_string(const std::string & stop, const std::string & text) { if (!text.empty() && !stop.empty()) { @@ -91,7 +92,7 @@ static size_t find_partial_stop_string(const std::string &stop, } template -static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end) +std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end) { std::string ret; for (; begin != end; ++begin) @@ -101,9 +102,9 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end) return ret; } -static void server_log(const char *level, const char *function, int line, - const char *message, const nlohmann::ordered_json &extra) -{ +void server_log( + const char * level, const char * function, int line, const char * message, const nlohmann::ordered_json & extra +) { nlohmann::ordered_json log{ {"timestamp", time(nullptr)}, {"level", level}, @@ -123,7 +124,7 @@ static void server_log(const char *level, const char *function, int line, } // format incomplete utf-8 multibyte character for output -static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token) +std::string tokens_to_output_formatted_string(const llama_context * ctx, llama_token token) { std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token); // if the size is 1 and first bit is 1, meaning it's a partial character @@ -139,7 +140,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c } // convert a vector of completion_token_output to json -static json probs_vector_to_json(const llama_context *ctx, const std::vector & probs) +json probs_vector_to_json(const llama_context * ctx, const std::vector & probs) { json out = json::array(); for (const auto &prob : probs) @@ -162,7 +163,7 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector &probs) -{ +json format_final_response( + llama_server_context & llama, const std::string & content, const std::vector & probs +) { json res = json{ {"content", content}, @@ -1083,8 +1083,9 @@ static json format_final_response(llama_server_context &llama, const std::string return res; } -static json format_partial_response(llama_server_context &llama, const std::string &content, const std::vector &probs) -{ +json format_partial_response( + llama_server_context & llama, const std::string & content, const std::vector & probs +) { json res = json{ {"content", content}, {"stop", false}, @@ -1098,20 +1099,20 @@ static json format_partial_response(llama_server_context &llama, const std::stri return res; } -static json format_tokenizer_response(const std::vector &tokens) +json format_tokenizer_response(const std::vector & tokens) { return json{ {"tokens", tokens}}; } -static json format_detokenized_response(std::string content) +json format_detokenized_response(std::string content) { return json{ {"content", content}}; } template -static T json_value(const json &body, const std::string &key, const T &default_value) +T json_value(const json & body, const std::string & key, const T & default_value) { // Fallback null to default value return body.contains(key) && !body.at(key).is_null() @@ -1119,7 +1120,7 @@ static T json_value(const json &body, const std::string &key, const T &default_v : default_value; } -static void parse_options_completion(const json &body, llama_server_context &llama) +void parse_options_completion(const json & body, llama_server_context & llama) { gpt_params default_params; @@ -1198,7 +1199,7 @@ static void parse_options_completion(const json &body, llama_server_context &lla LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama)); } -static void log_server_request(const Request &req, const Response &res) +void log_server_request(const Request & req, const Response & res) { LOG_INFO("request", { {"remote_addr", req.remote_addr}, @@ -1271,6 +1272,8 @@ void append_to_generated_text_from_generated_token_probs(llama_server_context & } } +} // namespace + int main(int argc, char **argv) { // own arguments required by this example diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp index 947aa7ed3..785f7be62 100644 --- a/examples/train-text-from-scratch/train-text-from-scratch.cpp +++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp @@ -18,6 +18,8 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif +namespace { + struct random_normal_distribution { std::mt19937 gen; std::normal_distribution rd; @@ -444,11 +446,11 @@ void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int6 GGML_ASSERT(tensor->ne[3] == ne3); } -static size_t hash(void * p) { +size_t hash(void * p) { return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE; } -static size_t hash_find(void * hash_table[], void * p) { +size_t hash_find(void * hash_table[], void * p) { size_t h = hash(p); // linear probing @@ -463,7 +465,7 @@ static size_t hash_find(void * hash_table[], void * p) { return i; } -static bool hash_insert(void * hash_table[], void * p) { +bool hash_insert(void * hash_table[], void * p) { //size_t h = hash(p); size_t i = hash_find(hash_table, p); @@ -479,7 +481,7 @@ static bool hash_insert(void * hash_table[], void * p) { return false; } -static bool hash_contains(void * hash_table[], void * p) { +bool hash_contains(void * hash_table[], void * p) { size_t i = hash_find(hash_table, p); return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p); } @@ -488,7 +490,6 @@ struct hash_map { void * keys[GGML_GRAPH_HASHTABLE_SIZE]; void * vals[GGML_GRAPH_HASHTABLE_SIZE]; }; -//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map); struct hash_map * new_hash_map() { struct hash_map * result = new struct hash_map; @@ -503,12 +504,12 @@ void free_hash_map(struct hash_map * map) { delete map; } -static bool ggml_is_view(struct ggml_tensor * t) { +bool ggml_is_view(struct ggml_tensor * t) { return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE || t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY; } -static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) { +struct ggml_tensor * get_view_parent(struct ggml_tensor * t) { switch (t->op) { case GGML_OP_PERMUTE: case GGML_OP_RESHAPE: @@ -522,7 +523,7 @@ static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) { } } -static struct ggml_tensor * get_view_source(struct ggml_tensor * t) { +struct ggml_tensor * get_view_source(struct ggml_tensor * t) { struct ggml_tensor * parent = t; do { parent = get_view_parent(parent); @@ -1988,6 +1989,8 @@ void opt_callback(void * vdata, float * sched) { data->shuffle_countdown -= n_batch; } +} // namespace + int main(int argc, char ** argv) { struct train_params params = get_default_train_params(); diff --git a/llama.cpp b/llama.cpp index 146605d44..cdf7d88c4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1,3 +1,4 @@ +#define LLAMA_API_INTERNAL #include "llama.h" #include "ggml.h" @@ -108,7 +109,7 @@ static size_t utf8_len(char src) { return lookup[highbits]; } -void replace_all(std::string & s, const std::string & search, const std::string & replace) { +static void replace_all(std::string & s, const std::string & search, const std::string & replace) { std::string result; for (size_t pos = 0; ; pos += search.length()) { auto new_pos = s.find(search, pos); @@ -1560,7 +1561,7 @@ struct llama_model_loader { // load LLaMA models // -std::string llama_model_ftype_name(enum llama_ftype ftype) { +static std::string llama_model_ftype_name(enum llama_ftype ftype) { if (ftype & LLAMA_FTYPE_GUESSED) { return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)"; } @@ -3945,7 +3946,7 @@ struct llama_grammar_candidate { // Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as // pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`. -std::pair, llama_partial_utf8> decode_utf8( +static std::pair, llama_partial_utf8> decode_utf8( const char * src, llama_partial_utf8 partial_start) { static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 }; @@ -5526,7 +5527,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } // TODO: after the GGUF PR, this likely won't work and needs to be updated -int llama_apply_lora_from_file_internal(const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads) { +static int llama_apply_lora_from_file_internal( + const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads +) { LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora); const int64_t t_start_lora_us = ggml_time_us(); @@ -6073,7 +6076,7 @@ struct llama_context * llama_new_context_with_model( return ctx; } -struct llama_context * llama_init_from_file( +static struct llama_context * llama_init_from_file( const char * path_model, struct llama_context_params params) { struct llama_model * model = llama_load_model_from_file(path_model, params); @@ -6278,7 +6281,7 @@ struct llama_data_file_context : llama_data_context { * llama_copy_state_data(ctx, &data_ctx); * */ -void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) { +static void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) { // copy rng { std::stringstream rng_ss; @@ -6816,7 +6819,9 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) { } // For internal test use -const std::vector>& llama_internal_get_tensor_map(struct llama_context * ctx) { +auto llama_internal_get_tensor_map(struct llama_context * ctx) + -> const std::vector> & +{ return ctx->model.tensors_by_name; } diff --git a/llama.h b/llama.h index 37975bebe..f494a83f1 100644 --- a/llama.h +++ b/llama.h @@ -540,7 +540,8 @@ extern "C" { struct ggml_tensor; -const std::vector>& llama_internal_get_tensor_map(struct llama_context * ctx); +auto llama_internal_get_tensor_map(struct llama_context * ctx) + -> const std::vector> &; #endif // LLAMA_API_INTERNAL diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp index 48758cda8..048a3bde1 100644 --- a/pocs/vdot/vdot.cpp +++ b/pocs/vdot/vdot.cpp @@ -16,6 +16,8 @@ constexpr int kVecSize = 1 << 18; +namespace { + float drawFromGaussianPdf(std::mt19937& rndm) { constexpr double kScale = 1./(1. + std::mt19937::max()); constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale; @@ -218,6 +220,8 @@ static void dot_q4_q8(const int n, float* s, const void* vx, const void* vy) { *s = sumf; } +} // namespace + int main(int argc, char** argv) { int nloop = argc > 1 ? atoi(argv[1]) : 10; diff --git a/tests/test-opt.cpp b/tests/test-opt.cpp index 8ab240202..af4bac233 100644 --- a/tests/test-opt.cpp +++ b/tests/test-opt.cpp @@ -36,6 +36,8 @@ #define GGML_PRINT(...) printf(__VA_ARGS__) +namespace { + float frand(void) { return (float)rand()/(float)RAND_MAX; } @@ -117,6 +119,8 @@ void set_element(struct ggml_tensor * t, int idx, float value) { ((float *)t->data)[idx] = value; } +} // namespace + int main(void) { struct ggml_init_params params = { /* .mem_size = */ 1024*1024*1024, diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 8d3c162d2..6ec719dae 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -13,15 +13,17 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif -const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f; -const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f; -const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f; -const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f; -const float MAX_DOT_PRODUCT_ERROR = 0.02f; +constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f; +constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f; +constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f; +constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f; +constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f; + + +namespace { const char* RESULT_STR[] = {"ok", "FAILED"}; - // Generate synthetic data void generate_data(float offset, size_t n, float * dst) { for (size_t i = 0; i < n; i++) { @@ -90,6 +92,8 @@ float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float return fabsf(result - dot_ref) / test_size; } +} // namespace + int main(int argc, char * argv[]) { bool verbose = false; const size_t test_size = 32 * 128; diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp index cbea7d452..b1375ea10 100644 --- a/tests/test-quantize-perf.cpp +++ b/tests/test-quantize-perf.cpp @@ -60,6 +60,8 @@ inline int64_t cpu_cycles() { #endif +namespace { + // Generate synthetic data void generate_data(float offset, size_t n, float * dst) { for (size_t i = 0; i < n; i++) { @@ -137,6 +139,8 @@ void usage(char * argv[]) { printf(" set test iteration number (%d)\n", ITERATIONS); } +} // namespace + int main(int argc, char * argv[]) { quantize_perf_params params {}; diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index 4437c3948..a928f53c1 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -12,6 +12,9 @@ #include #include + +namespace { + void dump(const llama_token_data_array * candidates) { for (size_t i = 0; i < candidates->size; i++) { printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit); @@ -173,6 +176,8 @@ void test_frequency_presence_penalty( } } +} // namespace + int main(void) { ggml_time_init(); diff --git a/tests/test-tokenizer-1-llama.cpp b/tests/test-tokenizer-1-llama.cpp index ab3d822f2..804ea2486 100644 --- a/tests/test-tokenizer-1-llama.cpp +++ b/tests/test-tokenizer-1-llama.cpp @@ -13,7 +13,7 @@ typedef int codepoint; -std::string codepoint_to_utf8(codepoint cp) { +static std::string codepoint_to_utf8(codepoint cp) { std::string result; if (0x00 <= cp && cp <= 0x7f) { result.push_back(cp);