check C++ code with -Wmissing-declarations

2023-09-14 19:03:50 -04:00 · 2023-09-14 19:03:50 -04:00 · cd27e8ab32
commit cd27e8ab32
parent feea179e9f
23 changed files with 157 additions and 92 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -427,6 +427,7 @@ if (LLAMA_ALL_WARNINGS)
            -Wextra
            -Wpedantic
            -Wcast-qual
            -Wmissing-declarations
            -Wno-unused-function
            -Wno-multichar
        )
--- a/2
+++ b/2
@ -172,7 +172,7 @@ endif # LLAMA_DISABLE_LOGS
 # warnings
 MK_CFLAGS    += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
 				-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
-MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
+MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
 ifeq '' '$(findstring clang,$(shell $(CXX) --version))'
 	# g++ only
--- a/common/common.cpp
+++ b/common/common.cpp
@ -78,7 +78,7 @@ int32_t get_num_physical_cores() {
    return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
 }
-void process_escapes(std::string& input) {
+static void process_escapes(std::string& input) {
    std::size_t input_len = input.length();
    std::size_t output_idx = 0;
--- a/common/console.cpp
+++ b/common/console.cpp
@ -158,7 +158,7 @@ namespace console {
        }
    }
-    char32_t getchar32() {
+    static char32_t getchar32() {
 #if defined(_WIN32)
        HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
        wchar_t high_surrogate = 0;
@ -212,7 +212,7 @@ namespace console {
 #endif
    }
-    void pop_cursor() {
+    static void pop_cursor() {
 #if defined(_WIN32)
        if (hConsole != NULL) {
            CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
@ -233,7 +233,7 @@ namespace console {
        putc('\b', out);
    }
-    int estimateWidth(char32_t codepoint) {
+    static int estimateWidth(char32_t codepoint) {
 #if defined(_WIN32)
        (void)codepoint;
        return 1;
@ -242,7 +242,7 @@ namespace console {
 #endif
    }
-    int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
+    static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
 #if defined(_WIN32)
        CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
        if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
@ -303,7 +303,7 @@ namespace console {
 #endif
    }
-    void replace_last(char ch) {
+    static void replace_last(char ch) {
 #if defined(_WIN32)
        pop_cursor();
        put_codepoint(&ch, 1, 1);
@ -312,7 +312,7 @@ namespace console {
 #endif
    }
-    void append_utf8(char32_t ch, std::string & out) {
+    static void append_utf8(char32_t ch, std::string & out) {
        if (ch <= 0x7F) {
            out.push_back(static_cast<unsigned char>(ch));
        } else if (ch <= 0x7FF) {
@ -333,7 +333,7 @@ namespace console {
    }
    // Helper function to remove the last UTF-8 character from a string
-    void pop_back_utf8_char(std::string & line) {
+    static void pop_back_utf8_char(std::string & line) {
        if (line.empty()) {
            return;
        }
@ -349,7 +349,7 @@ namespace console {
        line.erase(pos);
    }
-    bool readline_advanced(std::string & line, bool multiline_input) {
+    static bool readline_advanced(std::string & line, bool multiline_input) {
        if (out != stdout) {
            fflush(stdout);
        }
@ -452,7 +452,7 @@ namespace console {
        return has_more;
    }
-    bool readline_simple(std::string & line, bool multiline_input) {
+    static bool readline_simple(std::string & line, bool multiline_input) {
 #if defined(_WIN32)
        std::wstring wline;
        if (!std::getline(std::wcin, wline)) {
--- a/common/grammar-parser.cpp
+++ b/common/grammar-parser.cpp
@ -9,7 +9,7 @@
 namespace grammar_parser {
    // NOTE: assumes valid utf8 (but checks for overrun)
    // copied from llama.cpp
-    std::pair<uint32_t, const char *> decode_utf8(const char * src) {
+    static auto decode_utf8(const char * src) -> std::pair<uint32_t, const char *> {
        static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
        uint8_t  first_byte = static_cast<uint8_t>(*src);
        uint8_t  highbits   = first_byte >> 4;
@ -24,19 +24,19 @@ namespace grammar_parser {
        return std::make_pair(value, pos);
    }
-    uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
+    static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
        uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
        auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
        return result.first->second;
    }
-    uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
+    static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
        uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
        state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
        return next_id;
    }
-    void add_rule(
+    static void add_rule(
            parse_state & state,
            uint32_t      rule_id,
            const std::vector<llama_grammar_element> & rule) {
@ -46,11 +46,11 @@ namespace grammar_parser {
        state.rules[rule_id] = rule;
    }
-    bool is_word_char(char c) {
+    static bool is_word_char(char c) {
        return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
    }
-    std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
+    static auto parse_hex(const char * src, int size) -> std::pair<uint32_t, const char *> {
        const char * pos   = src;
        const char * end   = src + size;
        uint32_t     value = 0;
@ -73,7 +73,7 @@ namespace grammar_parser {
        return std::make_pair(value, pos);
    }
-    const char * parse_space(const char * src, bool newline_ok) {
+    static const char * parse_space(const char * src, bool newline_ok) {
        const char * pos = src;
        while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
                (newline_ok && (*pos == '\r' || *pos == '\n'))) {
@ -88,7 +88,7 @@ namespace grammar_parser {
        return pos;
    }
-    const char * parse_name(const char * src) {
+    static const char * parse_name(const char * src) {
        const char * pos = src;
        while (is_word_char(*pos)) {
            pos++;
@ -99,7 +99,7 @@ namespace grammar_parser {
        return pos;
    }
-    std::pair<uint32_t, const char *> parse_char(const char * src) {
+    static auto parse_char(const char * src) -> std::pair<uint32_t, const char *> {
        if (*src == '\\') {
            switch (src[1]) {
                case 'x': return parse_hex(src + 2, 2);
@ -129,7 +129,7 @@ namespace grammar_parser {
            uint32_t            rule_id,
            bool                is_nested);
-    const char * parse_sequence(
+    static const char * parse_sequence(
            parse_state                        & state,
            const char                         * src,
            const std::string                  & rule_name,
@ -247,7 +247,7 @@ namespace grammar_parser {
        return pos;
    }
-    const char * parse_rule(parse_state & state, const char * src) {
+    static const char * parse_rule(parse_state & state, const char * src) {
        const char * name_end = parse_name(src);
        const char * pos      = parse_space(name_end, false);
        size_t       name_len = name_end - src;
@ -285,7 +285,7 @@ namespace grammar_parser {
        }
    }
-    void print_grammar_char(FILE * file, uint32_t c) {
+    static void print_grammar_char(FILE * file, uint32_t c) {
        if (0x20 <= c && c <= 0x7f) {
            fprintf(file, "%c", static_cast<char>(c));
        } else {
@ -294,7 +294,7 @@ namespace grammar_parser {
        }
    }
-    bool is_char_element(llama_grammar_element elem) {
+    static bool is_char_element(llama_grammar_element elem) {
        switch (elem.type) {
            case LLAMA_GRETYPE_CHAR:           return true;
            case LLAMA_GRETYPE_CHAR_NOT:       return true;
@ -304,7 +304,7 @@ namespace grammar_parser {
        }
    }
-    void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
+    static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
        for (auto elem : rule) {
            switch (elem.type) {
                case LLAMA_GRETYPE_END:            fprintf(file, "END");            break;
@ -334,7 +334,7 @@ namespace grammar_parser {
        fprintf(file, "\n");
    }
-    void print_rule(
+    static void print_rule(
            FILE     * file,
            uint32_t   rule_id,
            const std::vector<llama_grammar_element> & rule,
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@ -9,11 +9,13 @@
 #endif
 #ifdef LLAMA_DEFAULT_RMS_EPS
-static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
+constexpr float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
 #else
-static const float rms_norm_eps = 5e-6f;
+constexpr float rms_norm_eps = 5e-6f;
 #endif
 namespace {
 float frand() {
    return (float)rand()/(float)RAND_MAX;
 }
@ -1504,6 +1506,8 @@ struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_t
                                ggml_new_f32(ctx, eps)))))));
 }
 } // namespace
 int main(int argc, char ** argv) {
    if (argc < 1) {
        fprintf(stderr, "usage: %s\n", argv[0]);
--- a/examples/beam-search/beam-search.cpp
+++ b/examples/beam-search/beam-search.cpp
@ -25,6 +25,8 @@
 #include <signal.h>
 #endif
 namespace {
 // Used for debugging to print out beam tokens.
 struct ostream_beam_view {
    llama_context * ctx;
@ -82,6 +84,8 @@ void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_stat
 #endif
 }
 } // namespace
 int main(int argc, char ** argv)
 {
    gpt_params params;
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@ -115,6 +115,8 @@ struct TransformerWeights {
    }
 };
 namespace {
 void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
    // we calloc instead of malloc to keep valgrind happy
    w->token_embedding_table = new float[p->vocab_size * p->dim]();
@ -444,7 +446,7 @@ __attribute__((format(gnu_printf, 1, 2)))
 __attribute__((format(printf, 1, 2)))
 #endif
 #endif
-static std::string format(const char * fmt, ...) {
+std::string format(const char * fmt, ...) {
    va_list ap, ap2;
    va_start(ap, fmt);
    va_copy(ap2, ap);
@ -540,7 +542,7 @@ bool is_ggml_file(const char *filename) {
    return magic == GGUF_MAGIC;
 }
-static std::string llama_escape_whitespaces(const std::string& text) {
+std::string llama_escape_whitespaces(const std::string& text) {
    std::ostringstream out;
    for (char c : text) {
        if (c == ' ') out << "\xe2\x96\x81";
@ -909,6 +911,8 @@ std::string basename(const std::string &path) {
    return path.substr(pos + 1);
 }
 } // namespace
 int main(int argc, char ** argv) {
    struct train_params params = get_default_train_params();
    if (!params_parse(argc, argv, &params)) {
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@ -13,8 +13,10 @@
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 namespace {
 template<typename T>
-static std::string to_string(const T & val) {
+std::string to_string(const T & val) {
    std::stringstream ss;
    ss << val;
    return ss.str();
@ -227,6 +229,8 @@ bool gguf_ex_read_1(const std::string & fname) {
    return true;
 }
 } // namespace
 int main(int argc, char ** argv) {
    if (argc < 3) {
        printf("usage: %s data.gguf r|w\n", argv[0]);
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -33,13 +33,15 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
-static llama_context           ** g_ctx;
+namespace {
-static llama_model             ** g_model;
+
-static gpt_params               * g_params;
+llama_context           ** g_ctx;
-static std::vector<llama_token> * g_input_tokens;
+llama_model             ** g_model;
-static std::ostringstream       * g_output_ss;
+gpt_params               * g_params;
-static std::vector<llama_token> * g_output_tokens;
+std::vector<llama_token> * g_input_tokens;
-static bool is_interacting = false;
+std::ostringstream       * g_output_ss;
 std::vector<llama_token> * g_output_tokens;
 bool is_interacting = false;
 void write_logfile(
    const llama_context * ctx, const gpt_params & params, const llama_model * model,
@ -101,6 +103,8 @@ void sigint_handler(int signo) {
 }
 #endif
 } // namespace
 int main(int argc, char ** argv) {
    gpt_params params;
    g_params = &params;
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@ -28,6 +28,8 @@ struct results_log_softmax {
    float  prob;
 };
 namespace {
 void write_logfile(const llama_context * ctx, const gpt_params & params,
                   const llama_model * model, const struct results_perplexity & results) {
@ -651,6 +653,8 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
    printf("\n");
 }
 } // namespace
 int main(int argc, char ** argv) {
    gpt_params params;
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@ -34,8 +34,8 @@ struct quantize_stats_params {
    std::vector<enum ggml_type> include_types;
 };
-const size_t HISTOGRAM_BUCKETS = 150;
+constexpr size_t HISTOGRAM_BUCKETS = 150;
-const double HISTOGRAM_RANGE = 0.03;
+constexpr double HISTOGRAM_RANGE = 0.03;
 struct error_stats {
    size_t num_samples;
@ -44,6 +44,7 @@ struct error_stats {
    uint64_t error_histogram[HISTOGRAM_BUCKETS];
 };
 namespace {
 void quantize_stats_print_usage(int /*argc*/, char ** argv) {
    quantize_stats_params params;
@ -133,7 +134,7 @@ void print_error_stats(const std::string & name, const error_stats & stats, bool
 }
 // copied from ggml.h - verify that we can access this as a flat array
-static bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
+bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
    return
@ -238,6 +239,8 @@ void test_roundtrip_on_layer(
    }
 }
 } // namespace
 int main(int argc, char ** argv) {
    ggml_time_init();
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@ -7,13 +7,15 @@
 #include <vector>
 #include <string>
 namespace {
 struct quant_option {
    std::string name;
    llama_ftype ftype;
    std::string desc;
 };
-static const std::vector<struct quant_option> QUANT_OPTIONS = {
+const std::vector<struct quant_option> QUANT_OPTIONS = {
    { "Q4_0",   LLAMA_FTYPE_MOSTLY_Q4_0,   " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", },
    { "Q4_1",   LLAMA_FTYPE_MOSTLY_Q4_1,   " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", },
    { "Q5_0",   LLAMA_FTYPE_MOSTLY_Q5_0,   " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", },
@ -88,6 +90,8 @@ void usage(const char * executable) {
    exit(1);
 }
 } // namespace
 int main(int argc, char ** argv) {
    if (argc < 3) {
        usage(argv[0]);
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -26,6 +26,8 @@
 using namespace httplib;
 using json = nlohmann::json;
 namespace {
 struct server_params
 {
    std::string hostname = "127.0.0.1";
@ -48,7 +50,7 @@ struct completion_token_output
    llama_token tok;
 };
-static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
+size_t common_part(const std::vector<llama_token> & a, const std::vector<llama_token> & b)
 {
    size_t i;
    for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++)
@ -63,14 +65,13 @@ enum stop_type
    STOP_PARTIAL,
 };
-static bool ends_with(const std::string &str, const std::string &suffix)
+bool ends_with(const std::string & str, const std::string & suffix)
 {
    return str.size() >= suffix.size() &&
           0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
 }
-static size_t find_partial_stop_string(const std::string &stop,
+size_t find_partial_stop_string(const std::string & stop, const std::string & text)
                                       const std::string &text)
 {
    if (!text.empty() && !stop.empty())
    {
@ -91,7 +92,7 @@ static size_t find_partial_stop_string(const std::string &stop,
 }
 template <class Iter>
-static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
+std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
 {
    std::string ret;
    for (; begin != end; ++begin)
@ -101,9 +102,9 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
    return ret;
 }
-static void server_log(const char *level, const char *function, int line,
+void server_log(
-                       const char *message, const nlohmann::ordered_json &extra)
+    const char * level, const char * function, int line, const char * message, const nlohmann::ordered_json & extra
-{
+) {
    nlohmann::ordered_json log{
        {"timestamp", time(nullptr)},
        {"level", level},
@ -123,7 +124,7 @@ static void server_log(const char *level, const char *function, int line,
 }
 // format incomplete utf-8 multibyte character for output
-static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
+std::string tokens_to_output_formatted_string(const llama_context * ctx, llama_token token)
 {
    std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
    // if the size is 1 and first bit is 1, meaning it's a partial character
@ -139,7 +140,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
 }
 // convert a vector of completion_token_output to json
-static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs)
+json probs_vector_to_json(const llama_context * ctx, const std::vector<completion_token_output> & probs)
 {
    json out = json::array();
    for (const auto &prob : probs)
@ -162,7 +163,7 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
    return out;
 }
-static bool server_verbose = false;
+bool server_verbose = false;
 #if SERVER_VERBOSE != 1
 #define LOG_VERBOSE(MSG, ...)
@ -691,8 +692,7 @@ struct llama_server_context
    }
 };
-static void server_print_usage(const char *argv0, const gpt_params &params,
+void server_print_usage(const char * argv0, const gpt_params & params, const server_params & sparams)
                               const server_params &sparams)
 {
    printf("usage: %s [options]\n", argv0);
    printf("\n");
@ -740,8 +740,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
    printf("\n");
 }
-static void server_params_parse(int argc, char **argv, server_params &sparams,
+void server_params_parse(int argc, char ** argv, server_params & sparams, gpt_params & params)
                                gpt_params &params)
 {
    gpt_params default_params;
    server_params default_sparams;
@ -995,7 +994,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
    }
 }
-static json format_generation_settings(llama_server_context &llama)
+json format_generation_settings(llama_server_context & llama)
 {
    const auto eos_bias = llama.params.logit_bias.find(llama_token_eos(llama.ctx));
    const bool ignore_eos = eos_bias != llama.params.logit_bias.end() &&
@ -1029,14 +1028,14 @@ static json format_generation_settings(llama_server_context &llama)
    };
 }
-static json format_embedding_response(llama_server_context &llama)
+json format_embedding_response(llama_server_context & llama)
 {
    return json{
        {"embedding", llama.getEmbedding()},
    };
 }
-static json format_timings(llama_server_context &llama)
+json format_timings(llama_server_context & llama)
 {
    const auto timings = llama_get_timings(llama.ctx);
@ -1055,8 +1054,9 @@ static json format_timings(llama_server_context &llama)
    };
 }
-static json format_final_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
+json format_final_response(
-{
+    llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
 ) {
    json res = json{
        {"content", content},
@ -1083,8 +1083,9 @@ static json format_final_response(llama_server_context &llama, const std::string
    return res;
 }
-static json format_partial_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
+json format_partial_response(
-{
+    llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
 ) {
    json res = json{
        {"content", content},
        {"stop", false},
@ -1098,20 +1099,20 @@ static json format_partial_response(llama_server_context &llama, const std::stri
    return res;
 }
-static json format_tokenizer_response(const std::vector<llama_token> &tokens)
+json format_tokenizer_response(const std::vector<llama_token> & tokens)
 {
    return json{
        {"tokens", tokens}};
 }
-static json format_detokenized_response(std::string content)
+json format_detokenized_response(std::string content)
 {
    return json{
        {"content", content}};
 }
 template <typename T>
-static T json_value(const json &body, const std::string &key, const T &default_value)
+T json_value(const json & body, const std::string & key, const T & default_value)
 {
    // Fallback null to default value
    return body.contains(key) && !body.at(key).is_null()
@ -1119,7 +1120,7 @@ static T json_value(const json &body, const std::string &key, const T &default_v
        : default_value;
 }
-static void parse_options_completion(const json &body, llama_server_context &llama)
+void parse_options_completion(const json & body, llama_server_context & llama)
 {
    gpt_params default_params;
@ -1198,7 +1199,7 @@ static void parse_options_completion(const json &body, llama_server_context &lla
    LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama));
 }
-static void log_server_request(const Request &req, const Response &res)
+void log_server_request(const Request & req, const Response & res)
 {
    LOG_INFO("request", {
                            {"remote_addr", req.remote_addr},
@ -1271,6 +1272,8 @@ void append_to_generated_text_from_generated_token_probs(llama_server_context &
    }
 }
 } // namespace
 int main(int argc, char **argv)
 {
    // own arguments required by this example
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@ -18,6 +18,8 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
 namespace {
 struct random_normal_distribution {
    std::mt19937 gen;
    std::normal_distribution<float> rd;
@ -444,11 +446,11 @@ void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int6
    GGML_ASSERT(tensor->ne[3] == ne3);
 }
-static size_t hash(void * p) {
+size_t hash(void * p) {
    return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
 }
-static size_t hash_find(void * hash_table[], void * p) {
+size_t hash_find(void * hash_table[], void * p) {
    size_t h = hash(p);
    // linear probing
@ -463,7 +465,7 @@ static size_t hash_find(void * hash_table[], void * p) {
    return i;
 }
-static bool hash_insert(void * hash_table[], void * p) {
+bool hash_insert(void * hash_table[], void * p) {
    //size_t h = hash(p);
    size_t i = hash_find(hash_table, p);
@ -479,7 +481,7 @@ static bool hash_insert(void * hash_table[], void * p) {
    return false;
 }
-static bool hash_contains(void * hash_table[], void * p) {
+bool hash_contains(void * hash_table[], void * p) {
    size_t i = hash_find(hash_table, p);
    return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
 }
@ -488,7 +490,6 @@ struct hash_map {
    void * keys[GGML_GRAPH_HASHTABLE_SIZE];
    void * vals[GGML_GRAPH_HASHTABLE_SIZE];
 };
 //static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
 struct hash_map * new_hash_map() {
    struct hash_map * result = new struct hash_map;
@ -503,12 +504,12 @@ void free_hash_map(struct hash_map * map) {
    delete map;
 }
-static bool ggml_is_view(struct ggml_tensor * t) {
+bool ggml_is_view(struct ggml_tensor * t) {
    return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
           t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
 }
-static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
+struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
    switch (t->op) {
        case GGML_OP_PERMUTE:
        case GGML_OP_RESHAPE:
@ -522,7 +523,7 @@ static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
    }
 }
-static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
+struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
    struct ggml_tensor * parent = t;
    do {
        parent = get_view_parent(parent);
@ -1988,6 +1989,8 @@ void opt_callback(void * vdata, float * sched) {
    data->shuffle_countdown -= n_batch;
 }
 } // namespace
 int main(int argc, char ** argv) {
    struct train_params params = get_default_train_params();
--- a/llama.cpp
+++ b/llama.cpp
@ -1,3 +1,4 @@
 #define LLAMA_API_INTERNAL
 #include "llama.h"
 #include "ggml.h"
@ -108,7 +109,7 @@ static size_t utf8_len(char src) {
    return lookup[highbits];
 }
-void replace_all(std::string & s, const std::string & search, const std::string & replace) {
+static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
    std::string result;
    for (size_t pos = 0; ; pos += search.length()) {
        auto new_pos = s.find(search, pos);
@ -1560,7 +1561,7 @@ struct llama_model_loader {
 // load LLaMA models
 //
-std::string llama_model_ftype_name(enum llama_ftype ftype) {
+static std::string llama_model_ftype_name(enum llama_ftype ftype) {
    if (ftype & LLAMA_FTYPE_GUESSED) {
        return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
    }
@ -3945,7 +3946,7 @@ struct llama_grammar_candidate {
 // Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
 // pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
-std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
+static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
        const char         * src,
        llama_partial_utf8   partial_start) {
    static const int      lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
@ -5526,7 +5527,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 }
 // TODO: after the GGUF PR, this likely won't work and needs to be updated
-int llama_apply_lora_from_file_internal(const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads) {
+static int llama_apply_lora_from_file_internal(
    const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads
 ) {
    LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
    const int64_t t_start_lora_us = ggml_time_us();
@ -6073,7 +6076,7 @@ struct llama_context * llama_new_context_with_model(
    return ctx;
 }
-struct llama_context * llama_init_from_file(
+static struct llama_context * llama_init_from_file(
                             const char * path_model,
            struct llama_context_params   params) {
    struct llama_model * model = llama_load_model_from_file(path_model, params);
@ -6278,7 +6281,7 @@ struct llama_data_file_context : llama_data_context {
 * llama_copy_state_data(ctx, &data_ctx);
 *
 */
-void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
+static void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
    // copy rng
    {
        std::stringstream rng_ss;
@ -6816,7 +6819,9 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
 }
 // For internal test use
-const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
+auto llama_internal_get_tensor_map(struct llama_context * ctx)
    -> const std::vector<std::pair<std::string, struct ggml_tensor *>> &
 {
    return ctx->model.tensors_by_name;
 }
--- a/llama.h
+++ b/llama.h
@ -540,7 +540,8 @@ extern "C" {
 struct ggml_tensor;
-const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
+auto llama_internal_get_tensor_map(struct llama_context * ctx)
    -> const std::vector<std::pair<std::string, struct ggml_tensor *>> &;
 #endif // LLAMA_API_INTERNAL
--- a/pocs/vdot/vdot.cpp
+++ b/pocs/vdot/vdot.cpp
@ -16,6 +16,8 @@
 constexpr int kVecSize = 1 << 18;
 namespace {
 float drawFromGaussianPdf(std::mt19937& rndm) {
    constexpr double kScale = 1./(1. + std::mt19937::max());
    constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale;
@ -218,6 +220,8 @@ static void dot_q4_q8(const int n, float* s, const void* vx, const void* vy) {
    *s = sumf;
 }
 } // namespace
 int main(int argc, char** argv) {
    int nloop = argc > 1 ? atoi(argv[1]) : 10;
--- a/tests/test-opt.cpp
+++ b/tests/test-opt.cpp
@ -36,6 +36,8 @@
 #define GGML_PRINT(...) printf(__VA_ARGS__)
 namespace {
 float frand(void) {
    return (float)rand()/(float)RAND_MAX;
 }
@ -117,6 +119,8 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
    ((float *)t->data)[idx] = value;
 }
 } // namespace
 int main(void) {
    struct ggml_init_params params = {
        /* .mem_size   = */ 1024*1024*1024,
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@ -13,15 +13,17 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
-const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
+constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
-const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
-const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
-const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
-const float MAX_DOT_PRODUCT_ERROR = 0.02f;
+constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f;
 namespace {
 const char* RESULT_STR[] = {"ok", "FAILED"};
 // Generate synthetic data
 void generate_data(float offset, size_t n, float * dst) {
    for (size_t i = 0; i < n; i++) {
@ -90,6 +92,8 @@ float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float
    return fabsf(result - dot_ref) / test_size;
 }
 } // namespace
 int main(int argc, char * argv[]) {
    bool verbose = false;
    const size_t test_size = 32 * 128;
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@ -60,6 +60,8 @@ inline int64_t cpu_cycles() {
 #endif
 namespace {
 // Generate synthetic data
 void generate_data(float offset, size_t n, float * dst) {
    for (size_t i = 0; i < n; i++) {
@ -137,6 +139,8 @@ void usage(char * argv[]) {
    printf("                        set test iteration number (%d)\n", ITERATIONS);
 }
 } // namespace
 int main(int argc, char * argv[]) {
    quantize_perf_params params {};
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@ -12,6 +12,9 @@
 #include <vector>
 #include <algorithm>
 namespace {
 void dump(const llama_token_data_array * candidates) {
    for (size_t i = 0; i < candidates->size; i++) {
        printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);
@ -173,6 +176,8 @@ void test_frequency_presence_penalty(
    }
 }
 } // namespace
 int main(void) {
    ggml_time_init();
--- a/tests/test-tokenizer-1-llama.cpp
+++ b/tests/test-tokenizer-1-llama.cpp
@ -13,7 +13,7 @@
 typedef int codepoint;
-std::string codepoint_to_utf8(codepoint cp) {
+static std::string codepoint_to_utf8(codepoint cp) {
    std::string result;
    if (0x00 <= cp && cp <= 0x7f) {
        result.push_back(cp);