diff --git a/CMakeLists.txt b/CMakeLists.txt
index eac5799af..214ede21c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -427,6 +427,7 @@ if (LLAMA_ALL_WARNINGS)
             -Wextra
             -Wpedantic
             -Wcast-qual
+            -Wmissing-declarations
             -Wno-unused-function
             -Wno-multichar
         )
diff --git a/Makefile b/Makefile
index 7ab1b7a09..778acb908 100644
--- a/Makefile
+++ b/Makefile
@@ -172,7 +172,7 @@ endif # LLAMA_DISABLE_LOGS
 # warnings
 MK_CFLAGS    += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
 				-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
-MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
+MK_CXXFLAGS  += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
 
 ifeq '' '$(findstring clang,$(shell $(CXX) --version))'
 	# g++ only
diff --git a/common/common.cpp b/common/common.cpp
index afc9b8a55..8cb04c054 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -78,7 +78,7 @@ int32_t get_num_physical_cores() {
     return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
 }
 
-void process_escapes(std::string& input) {
+static void process_escapes(std::string& input) {
     std::size_t input_len = input.length();
     std::size_t output_idx = 0;
 
diff --git a/common/console.cpp b/common/console.cpp
index 23545e5be..f65cbc6ed 100644
--- a/common/console.cpp
+++ b/common/console.cpp
@@ -158,7 +158,7 @@ namespace console {
         }
     }
 
-    char32_t getchar32() {
+    static char32_t getchar32() {
 #if defined(_WIN32)
         HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
         wchar_t high_surrogate = 0;
@@ -212,7 +212,7 @@ namespace console {
 #endif
     }
 
-    void pop_cursor() {
+    static void pop_cursor() {
 #if defined(_WIN32)
         if (hConsole != NULL) {
             CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
@@ -233,7 +233,7 @@ namespace console {
         putc('\b', out);
     }
 
-    int estimateWidth(char32_t codepoint) {
+    static int estimateWidth(char32_t codepoint) {
 #if defined(_WIN32)
         (void)codepoint;
         return 1;
@@ -242,7 +242,7 @@ namespace console {
 #endif
     }
 
-    int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
+    static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
 #if defined(_WIN32)
         CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
         if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
@@ -303,7 +303,7 @@ namespace console {
 #endif
     }
 
-    void replace_last(char ch) {
+    static void replace_last(char ch) {
 #if defined(_WIN32)
         pop_cursor();
         put_codepoint(&ch, 1, 1);
@@ -312,7 +312,7 @@ namespace console {
 #endif
     }
 
-    void append_utf8(char32_t ch, std::string & out) {
+    static void append_utf8(char32_t ch, std::string & out) {
         if (ch <= 0x7F) {
             out.push_back(static_cast<unsigned char>(ch));
         } else if (ch <= 0x7FF) {
@@ -333,7 +333,7 @@ namespace console {
     }
 
     // Helper function to remove the last UTF-8 character from a string
-    void pop_back_utf8_char(std::string & line) {
+    static void pop_back_utf8_char(std::string & line) {
         if (line.empty()) {
             return;
         }
@@ -349,7 +349,7 @@ namespace console {
         line.erase(pos);
     }
 
-    bool readline_advanced(std::string & line, bool multiline_input) {
+    static bool readline_advanced(std::string & line, bool multiline_input) {
         if (out != stdout) {
             fflush(stdout);
         }
@@ -452,7 +452,7 @@ namespace console {
         return has_more;
     }
 
-    bool readline_simple(std::string & line, bool multiline_input) {
+    static bool readline_simple(std::string & line, bool multiline_input) {
 #if defined(_WIN32)
         std::wstring wline;
         if (!std::getline(std::wcin, wline)) {
diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp
index 177d1e3a8..e05d0f8aa 100644
--- a/common/grammar-parser.cpp
+++ b/common/grammar-parser.cpp
@@ -9,7 +9,7 @@
 namespace grammar_parser {
     // NOTE: assumes valid utf8 (but checks for overrun)
     // copied from llama.cpp
-    std::pair<uint32_t, const char *> decode_utf8(const char * src) {
+    static auto decode_utf8(const char * src) -> std::pair<uint32_t, const char *> {
         static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
         uint8_t  first_byte = static_cast<uint8_t>(*src);
         uint8_t  highbits   = first_byte >> 4;
@@ -24,19 +24,19 @@ namespace grammar_parser {
         return std::make_pair(value, pos);
     }
 
-    uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
+    static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
         uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
         auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
         return result.first->second;
     }
 
-    uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
+    static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
         uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
         state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
         return next_id;
     }
 
-    void add_rule(
+    static void add_rule(
             parse_state & state,
             uint32_t      rule_id,
             const std::vector<llama_grammar_element> & rule) {
@@ -46,11 +46,11 @@ namespace grammar_parser {
         state.rules[rule_id] = rule;
     }
 
-    bool is_word_char(char c) {
+    static bool is_word_char(char c) {
         return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
     }
 
-    std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
+    static auto parse_hex(const char * src, int size) -> std::pair<uint32_t, const char *> {
         const char * pos   = src;
         const char * end   = src + size;
         uint32_t     value = 0;
@@ -73,7 +73,7 @@ namespace grammar_parser {
         return std::make_pair(value, pos);
     }
 
-    const char * parse_space(const char * src, bool newline_ok) {
+    static const char * parse_space(const char * src, bool newline_ok) {
         const char * pos = src;
         while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
                 (newline_ok && (*pos == '\r' || *pos == '\n'))) {
@@ -88,7 +88,7 @@ namespace grammar_parser {
         return pos;
     }
 
-    const char * parse_name(const char * src) {
+    static const char * parse_name(const char * src) {
         const char * pos = src;
         while (is_word_char(*pos)) {
             pos++;
@@ -99,7 +99,7 @@ namespace grammar_parser {
         return pos;
     }
 
-    std::pair<uint32_t, const char *> parse_char(const char * src) {
+    static auto parse_char(const char * src) -> std::pair<uint32_t, const char *> {
         if (*src == '\\') {
             switch (src[1]) {
                 case 'x': return parse_hex(src + 2, 2);
@@ -129,7 +129,7 @@ namespace grammar_parser {
             uint32_t            rule_id,
             bool                is_nested);
 
-    const char * parse_sequence(
+    static const char * parse_sequence(
             parse_state                        & state,
             const char                         * src,
             const std::string                  & rule_name,
@@ -247,7 +247,7 @@ namespace grammar_parser {
         return pos;
     }
 
-    const char * parse_rule(parse_state & state, const char * src) {
+    static const char * parse_rule(parse_state & state, const char * src) {
         const char * name_end = parse_name(src);
         const char * pos      = parse_space(name_end, false);
         size_t       name_len = name_end - src;
@@ -285,7 +285,7 @@ namespace grammar_parser {
         }
     }
 
-    void print_grammar_char(FILE * file, uint32_t c) {
+    static void print_grammar_char(FILE * file, uint32_t c) {
         if (0x20 <= c && c <= 0x7f) {
             fprintf(file, "%c", static_cast<char>(c));
         } else {
@@ -294,7 +294,7 @@ namespace grammar_parser {
         }
     }
 
-    bool is_char_element(llama_grammar_element elem) {
+    static bool is_char_element(llama_grammar_element elem) {
         switch (elem.type) {
             case LLAMA_GRETYPE_CHAR:           return true;
             case LLAMA_GRETYPE_CHAR_NOT:       return true;
@@ -304,7 +304,7 @@ namespace grammar_parser {
         }
     }
 
-    void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
+    static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
         for (auto elem : rule) {
             switch (elem.type) {
                 case LLAMA_GRETYPE_END:            fprintf(file, "END");            break;
@@ -334,7 +334,7 @@ namespace grammar_parser {
         fprintf(file, "\n");
     }
 
-    void print_rule(
+    static void print_rule(
             FILE     * file,
             uint32_t   rule_id,
             const std::vector<llama_grammar_element> & rule,
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index a99ece9a6..30a06338a 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -9,11 +9,13 @@
 #endif
 
 #ifdef LLAMA_DEFAULT_RMS_EPS
-static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
+constexpr float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
 #else
-static const float rms_norm_eps = 5e-6f;
+constexpr float rms_norm_eps = 5e-6f;
 #endif
 
+namespace {
+
 float frand() {
     return (float)rand()/(float)RAND_MAX;
 }
@@ -1504,6 +1506,8 @@ struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_t
                                 ggml_new_f32(ctx, eps)))))));
 }
 
+} // namespace
+
 int main(int argc, char ** argv) {
     if (argc < 1) {
         fprintf(stderr, "usage: %s\n", argv[0]);
diff --git a/examples/beam-search/beam-search.cpp b/examples/beam-search/beam-search.cpp
index 6b31aea78..4e685a5e8 100644
--- a/examples/beam-search/beam-search.cpp
+++ b/examples/beam-search/beam-search.cpp
@@ -25,6 +25,8 @@
 #include <signal.h>
 #endif
 
+namespace {
+
 // Used for debugging to print out beam tokens.
 struct ostream_beam_view {
     llama_context * ctx;
@@ -82,6 +84,8 @@ void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_stat
 #endif
 }
 
+} // namespace
+
 int main(int argc, char ** argv)
 {
     gpt_params params;
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index 293b455d0..c1f3bbb08 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -115,6 +115,8 @@ struct TransformerWeights {
     }
 };
 
+namespace {
+
 void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
     // we calloc instead of malloc to keep valgrind happy
     w->token_embedding_table = new float[p->vocab_size * p->dim]();
@@ -444,7 +446,7 @@ __attribute__((format(gnu_printf, 1, 2)))
 __attribute__((format(printf, 1, 2)))
 #endif
 #endif
-static std::string format(const char * fmt, ...) {
+std::string format(const char * fmt, ...) {
     va_list ap, ap2;
     va_start(ap, fmt);
     va_copy(ap2, ap);
@@ -540,7 +542,7 @@ bool is_ggml_file(const char *filename) {
     return magic == GGUF_MAGIC;
 }
 
-static std::string llama_escape_whitespaces(const std::string& text) {
+std::string llama_escape_whitespaces(const std::string& text) {
     std::ostringstream out;
     for (char c : text) {
         if (c == ' ') out << "\xe2\x96\x81";
@@ -909,6 +911,8 @@ std::string basename(const std::string &path) {
     return path.substr(pos + 1);
 }
 
+} // namespace
+
 int main(int argc, char ** argv) {
     struct train_params params = get_default_train_params();
     if (!params_parse(argc, argv, &params)) {
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
index a34010f10..9601e0f6e 100644
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@@ -13,8 +13,10 @@
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
+namespace {
+
 template<typename T>
-static std::string to_string(const T & val) {
+std::string to_string(const T & val) {
     std::stringstream ss;
     ss << val;
     return ss.str();
@@ -227,6 +229,8 @@ bool gguf_ex_read_1(const std::string & fname) {
     return true;
 }
 
+} // namespace
+
 int main(int argc, char ** argv) {
     if (argc < 3) {
         printf("usage: %s data.gguf r|w\n", argv[0]);
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index baec6ba12..5d1e2c2af 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -33,13 +33,15 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
 
-static llama_context           ** g_ctx;
-static llama_model             ** g_model;
-static gpt_params               * g_params;
-static std::vector<llama_token> * g_input_tokens;
-static std::ostringstream       * g_output_ss;
-static std::vector<llama_token> * g_output_tokens;
-static bool is_interacting = false;
+namespace {
+
+llama_context           ** g_ctx;
+llama_model             ** g_model;
+gpt_params               * g_params;
+std::vector<llama_token> * g_input_tokens;
+std::ostringstream       * g_output_ss;
+std::vector<llama_token> * g_output_tokens;
+bool is_interacting = false;
 
 void write_logfile(
     const llama_context * ctx, const gpt_params & params, const llama_model * model,
@@ -101,6 +103,8 @@ void sigint_handler(int signo) {
 }
 #endif
 
+} // namespace
+
 int main(int argc, char ** argv) {
     gpt_params params;
     g_params = &params;
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index 3a1c8c28d..474ce3158 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -28,6 +28,8 @@ struct results_log_softmax {
     float  prob;
 };
 
+namespace {
+
 void write_logfile(const llama_context * ctx, const gpt_params & params,
                    const llama_model * model, const struct results_perplexity & results) {
 
@@ -651,6 +653,8 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
     printf("\n");
 }
 
+} // namespace
+
 int main(int argc, char ** argv) {
     gpt_params params;
 
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index 6ce03ba7b..3d194f7fc 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -34,8 +34,8 @@ struct quantize_stats_params {
     std::vector<enum ggml_type> include_types;
 };
 
-const size_t HISTOGRAM_BUCKETS = 150;
-const double HISTOGRAM_RANGE = 0.03;
+constexpr size_t HISTOGRAM_BUCKETS = 150;
+constexpr double HISTOGRAM_RANGE = 0.03;
 
 struct error_stats {
     size_t num_samples;
@@ -44,6 +44,7 @@ struct error_stats {
     uint64_t error_histogram[HISTOGRAM_BUCKETS];
 };
 
+namespace {
 
 void quantize_stats_print_usage(int /*argc*/, char ** argv) {
     quantize_stats_params params;
@@ -133,7 +134,7 @@ void print_error_stats(const std::string & name, const error_stats & stats, bool
 }
 
 // copied from ggml.h - verify that we can access this as a flat array
-static bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
+bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
     static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
 
     return
@@ -238,6 +239,8 @@ void test_roundtrip_on_layer(
     }
 }
 
+} // namespace
+
 int main(int argc, char ** argv) {
     ggml_time_init();
 
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 1bf182482..85a004946 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -7,13 +7,15 @@
 #include <vector>
 #include <string>
 
+namespace {
+
 struct quant_option {
     std::string name;
     llama_ftype ftype;
     std::string desc;
 };
 
-static const std::vector<struct quant_option> QUANT_OPTIONS = {
+const std::vector<struct quant_option> QUANT_OPTIONS = {
     { "Q4_0",   LLAMA_FTYPE_MOSTLY_Q4_0,   " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", },
     { "Q4_1",   LLAMA_FTYPE_MOSTLY_Q4_1,   " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", },
     { "Q5_0",   LLAMA_FTYPE_MOSTLY_Q5_0,   " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", },
@@ -88,6 +90,8 @@ void usage(const char * executable) {
     exit(1);
 }
 
+} // namespace
+
 int main(int argc, char ** argv) {
     if (argc < 3) {
         usage(argv[0]);
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 3f3c64650..23ce5fcb7 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -26,6 +26,8 @@
 using namespace httplib;
 using json = nlohmann::json;
 
+namespace {
+
 struct server_params
 {
     std::string hostname = "127.0.0.1";
@@ -48,7 +50,7 @@ struct completion_token_output
     llama_token tok;
 };
 
-static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
+size_t common_part(const std::vector<llama_token> & a, const std::vector<llama_token> & b)
 {
     size_t i;
     for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++)
@@ -63,14 +65,13 @@ enum stop_type
     STOP_PARTIAL,
 };
 
-static bool ends_with(const std::string &str, const std::string &suffix)
+bool ends_with(const std::string & str, const std::string & suffix)
 {
     return str.size() >= suffix.size() &&
            0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
 }
 
-static size_t find_partial_stop_string(const std::string &stop,
-                                       const std::string &text)
+size_t find_partial_stop_string(const std::string & stop, const std::string & text)
 {
     if (!text.empty() && !stop.empty())
     {
@@ -91,7 +92,7 @@ static size_t find_partial_stop_string(const std::string &stop,
 }
 
 template <class Iter>
-static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
+std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
 {
     std::string ret;
     for (; begin != end; ++begin)
@@ -101,9 +102,9 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
     return ret;
 }
 
-static void server_log(const char *level, const char *function, int line,
-                       const char *message, const nlohmann::ordered_json &extra)
-{
+void server_log(
+    const char * level, const char * function, int line, const char * message, const nlohmann::ordered_json & extra
+) {
     nlohmann::ordered_json log{
         {"timestamp", time(nullptr)},
         {"level", level},
@@ -123,7 +124,7 @@ static void server_log(const char *level, const char *function, int line,
 }
 
 // format incomplete utf-8 multibyte character for output
-static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
+std::string tokens_to_output_formatted_string(const llama_context * ctx, llama_token token)
 {
     std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
     // if the size is 1 and first bit is 1, meaning it's a partial character
@@ -139,7 +140,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
 }
 
 // convert a vector of completion_token_output to json
-static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs)
+json probs_vector_to_json(const llama_context * ctx, const std::vector<completion_token_output> & probs)
 {
     json out = json::array();
     for (const auto &prob : probs)
@@ -162,7 +163,7 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
     return out;
 }
 
-static bool server_verbose = false;
+bool server_verbose = false;
 
 #if SERVER_VERBOSE != 1
 #define LOG_VERBOSE(MSG, ...)
@@ -691,8 +692,7 @@ struct llama_server_context
     }
 };
 
-static void server_print_usage(const char *argv0, const gpt_params &params,
-                               const server_params &sparams)
+void server_print_usage(const char * argv0, const gpt_params & params, const server_params & sparams)
 {
     printf("usage: %s [options]\n", argv0);
     printf("\n");
@@ -740,8 +740,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
     printf("\n");
 }
 
-static void server_params_parse(int argc, char **argv, server_params &sparams,
-                                gpt_params &params)
+void server_params_parse(int argc, char ** argv, server_params & sparams, gpt_params & params)
 {
     gpt_params default_params;
     server_params default_sparams;
@@ -995,7 +994,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
     }
 }
 
-static json format_generation_settings(llama_server_context &llama)
+json format_generation_settings(llama_server_context & llama)
 {
     const auto eos_bias = llama.params.logit_bias.find(llama_token_eos(llama.ctx));
     const bool ignore_eos = eos_bias != llama.params.logit_bias.end() &&
@@ -1029,14 +1028,14 @@ static json format_generation_settings(llama_server_context &llama)
     };
 }
 
-static json format_embedding_response(llama_server_context &llama)
+json format_embedding_response(llama_server_context & llama)
 {
     return json{
         {"embedding", llama.getEmbedding()},
     };
 }
 
-static json format_timings(llama_server_context &llama)
+json format_timings(llama_server_context & llama)
 {
     const auto timings = llama_get_timings(llama.ctx);
 
@@ -1055,8 +1054,9 @@ static json format_timings(llama_server_context &llama)
     };
 }
 
-static json format_final_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
-{
+json format_final_response(
+    llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
+) {
 
     json res = json{
         {"content", content},
@@ -1083,8 +1083,9 @@ static json format_final_response(llama_server_context &llama, const std::string
     return res;
 }
 
-static json format_partial_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
-{
+json format_partial_response(
+    llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
+) {
     json res = json{
         {"content", content},
         {"stop", false},
@@ -1098,20 +1099,20 @@ static json format_partial_response(llama_server_context &llama, const std::stri
     return res;
 }
 
-static json format_tokenizer_response(const std::vector<llama_token> &tokens)
+json format_tokenizer_response(const std::vector<llama_token> & tokens)
 {
     return json{
         {"tokens", tokens}};
 }
 
-static json format_detokenized_response(std::string content)
+json format_detokenized_response(std::string content)
 {
     return json{
         {"content", content}};
 }
 
 template <typename T>
-static T json_value(const json &body, const std::string &key, const T &default_value)
+T json_value(const json & body, const std::string & key, const T & default_value)
 {
     // Fallback null to default value
     return body.contains(key) && !body.at(key).is_null()
@@ -1119,7 +1120,7 @@ static T json_value(const json &body, const std::string &key, const T &default_v
         : default_value;
 }
 
-static void parse_options_completion(const json &body, llama_server_context &llama)
+void parse_options_completion(const json & body, llama_server_context & llama)
 {
     gpt_params default_params;
 
@@ -1198,7 +1199,7 @@ static void parse_options_completion(const json &body, llama_server_context &lla
     LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama));
 }
 
-static void log_server_request(const Request &req, const Response &res)
+void log_server_request(const Request & req, const Response & res)
 {
     LOG_INFO("request", {
                             {"remote_addr", req.remote_addr},
@@ -1271,6 +1272,8 @@ void append_to_generated_text_from_generated_token_probs(llama_server_context &
     }
 }
 
+} // namespace
+
 int main(int argc, char **argv)
 {
     // own arguments required by this example
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 947aa7ed3..785f7be62 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -18,6 +18,8 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
 
+namespace {
+
 struct random_normal_distribution {
     std::mt19937 gen;
     std::normal_distribution<float> rd;
@@ -444,11 +446,11 @@ void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int6
     GGML_ASSERT(tensor->ne[3] == ne3);
 }
 
-static size_t hash(void * p) {
+size_t hash(void * p) {
     return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
 }
 
-static size_t hash_find(void * hash_table[], void * p) {
+size_t hash_find(void * hash_table[], void * p) {
     size_t h = hash(p);
 
     // linear probing
@@ -463,7 +465,7 @@ static size_t hash_find(void * hash_table[], void * p) {
     return i;
 }
 
-static bool hash_insert(void * hash_table[], void * p) {
+bool hash_insert(void * hash_table[], void * p) {
     //size_t h = hash(p);
     size_t i = hash_find(hash_table, p);
 
@@ -479,7 +481,7 @@ static bool hash_insert(void * hash_table[], void * p) {
     return false;
 }
 
-static bool hash_contains(void * hash_table[], void * p) {
+bool hash_contains(void * hash_table[], void * p) {
     size_t i = hash_find(hash_table, p);
     return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
 }
@@ -488,7 +490,6 @@ struct hash_map {
     void * keys[GGML_GRAPH_HASHTABLE_SIZE];
     void * vals[GGML_GRAPH_HASHTABLE_SIZE];
 };
-//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
 
 struct hash_map * new_hash_map() {
     struct hash_map * result = new struct hash_map;
@@ -503,12 +504,12 @@ void free_hash_map(struct hash_map * map) {
     delete map;
 }
 
-static bool ggml_is_view(struct ggml_tensor * t) {
+bool ggml_is_view(struct ggml_tensor * t) {
     return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
            t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
 }
 
-static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
+struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
     switch (t->op) {
         case GGML_OP_PERMUTE:
         case GGML_OP_RESHAPE:
@@ -522,7 +523,7 @@ static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
     }
 }
 
-static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
+struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
     struct ggml_tensor * parent = t;
     do {
         parent = get_view_parent(parent);
@@ -1988,6 +1989,8 @@ void opt_callback(void * vdata, float * sched) {
     data->shuffle_countdown -= n_batch;
 }
 
+} // namespace
+
 int main(int argc, char ** argv) {
     struct train_params params = get_default_train_params();
 
diff --git a/llama.cpp b/llama.cpp
index 146605d44..cdf7d88c4 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1,3 +1,4 @@
+#define LLAMA_API_INTERNAL
 #include "llama.h"
 
 #include "ggml.h"
@@ -108,7 +109,7 @@ static size_t utf8_len(char src) {
     return lookup[highbits];
 }
 
-void replace_all(std::string & s, const std::string & search, const std::string & replace) {
+static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
     std::string result;
     for (size_t pos = 0; ; pos += search.length()) {
         auto new_pos = s.find(search, pos);
@@ -1560,7 +1561,7 @@ struct llama_model_loader {
 // load LLaMA models
 //
 
-std::string llama_model_ftype_name(enum llama_ftype ftype) {
+static std::string llama_model_ftype_name(enum llama_ftype ftype) {
     if (ftype & LLAMA_FTYPE_GUESSED) {
         return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
     }
@@ -3945,7 +3946,7 @@ struct llama_grammar_candidate {
 
 // Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
 // pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
-std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
+static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
         const char         * src,
         llama_partial_utf8   partial_start) {
     static const int      lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
@@ -5526,7 +5527,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
 }
 
 // TODO: after the GGUF PR, this likely won't work and needs to be updated
-int llama_apply_lora_from_file_internal(const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads) {
+static int llama_apply_lora_from_file_internal(
+    const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads
+) {
     LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
 
     const int64_t t_start_lora_us = ggml_time_us();
@@ -6073,7 +6076,7 @@ struct llama_context * llama_new_context_with_model(
     return ctx;
 }
 
-struct llama_context * llama_init_from_file(
+static struct llama_context * llama_init_from_file(
                              const char * path_model,
             struct llama_context_params   params) {
     struct llama_model * model = llama_load_model_from_file(path_model, params);
@@ -6278,7 +6281,7 @@ struct llama_data_file_context : llama_data_context {
  * llama_copy_state_data(ctx, &data_ctx);
  *
 */
-void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
+static void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
     // copy rng
     {
         std::stringstream rng_ss;
@@ -6816,7 +6819,9 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
 }
 
 // For internal test use
-const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
+auto llama_internal_get_tensor_map(struct llama_context * ctx)
+    -> const std::vector<std::pair<std::string, struct ggml_tensor *>> &
+{
     return ctx->model.tensors_by_name;
 }
 
diff --git a/llama.h b/llama.h
index 37975bebe..f494a83f1 100644
--- a/llama.h
+++ b/llama.h
@@ -540,7 +540,8 @@ extern "C" {
 
 struct ggml_tensor;
 
-const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
+auto llama_internal_get_tensor_map(struct llama_context * ctx)
+    -> const std::vector<std::pair<std::string, struct ggml_tensor *>> &;
 
 #endif // LLAMA_API_INTERNAL
 
diff --git a/pocs/vdot/vdot.cpp b/pocs/vdot/vdot.cpp
index 48758cda8..048a3bde1 100644
--- a/pocs/vdot/vdot.cpp
+++ b/pocs/vdot/vdot.cpp
@@ -16,6 +16,8 @@
 
 constexpr int kVecSize = 1 << 18;
 
+namespace {
+
 float drawFromGaussianPdf(std::mt19937& rndm) {
     constexpr double kScale = 1./(1. + std::mt19937::max());
     constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale;
@@ -218,6 +220,8 @@ static void dot_q4_q8(const int n, float* s, const void* vx, const void* vy) {
     *s = sumf;
 }
 
+} // namespace
+
 int main(int argc, char** argv) {
 
     int nloop = argc > 1 ? atoi(argv[1]) : 10;
diff --git a/tests/test-opt.cpp b/tests/test-opt.cpp
index 8ab240202..af4bac233 100644
--- a/tests/test-opt.cpp
+++ b/tests/test-opt.cpp
@@ -36,6 +36,8 @@
 #define GGML_PRINT(...) printf(__VA_ARGS__)
 
 
+namespace {
+
 float frand(void) {
     return (float)rand()/(float)RAND_MAX;
 }
@@ -117,6 +119,8 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
     ((float *)t->data)[idx] = value;
 }
 
+} // namespace
+
 int main(void) {
     struct ggml_init_params params = {
         /* .mem_size   = */ 1024*1024*1024,
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
index 8d3c162d2..6ec719dae 100644
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -13,15 +13,17 @@
 #pragma warning(disable: 4244 4267) // possible loss of data
 #endif
 
-const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
-const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
-const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
-const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
-const float MAX_DOT_PRODUCT_ERROR = 0.02f;
+constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
+constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
+constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f;
+
+
+namespace {
 
 const char* RESULT_STR[] = {"ok", "FAILED"};
 
-
 // Generate synthetic data
 void generate_data(float offset, size_t n, float * dst) {
     for (size_t i = 0; i < n; i++) {
@@ -90,6 +92,8 @@ float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float
     return fabsf(result - dot_ref) / test_size;
 }
 
+} // namespace
+
 int main(int argc, char * argv[]) {
     bool verbose = false;
     const size_t test_size = 32 * 128;
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp
index cbea7d452..b1375ea10 100644
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -60,6 +60,8 @@ inline int64_t cpu_cycles() {
 #endif
 
 
+namespace {
+
 // Generate synthetic data
 void generate_data(float offset, size_t n, float * dst) {
     for (size_t i = 0; i < n; i++) {
@@ -137,6 +139,8 @@ void usage(char * argv[]) {
     printf("                        set test iteration number (%d)\n", ITERATIONS);
 }
 
+} // namespace
+
 int main(int argc, char * argv[]) {
     quantize_perf_params params {};
 
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp
index 4437c3948..a928f53c1 100644
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -12,6 +12,9 @@
 #include <vector>
 #include <algorithm>
 
+
+namespace {
+
 void dump(const llama_token_data_array * candidates) {
     for (size_t i = 0; i < candidates->size; i++) {
         printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);
@@ -173,6 +176,8 @@ void test_frequency_presence_penalty(
     }
 }
 
+} // namespace
+
 int main(void) {
     ggml_time_init();
 
diff --git a/tests/test-tokenizer-1-llama.cpp b/tests/test-tokenizer-1-llama.cpp
index ab3d822f2..804ea2486 100644
--- a/tests/test-tokenizer-1-llama.cpp
+++ b/tests/test-tokenizer-1-llama.cpp
@@ -13,7 +13,7 @@
 
 typedef int codepoint;
 
-std::string codepoint_to_utf8(codepoint cp) {
+static std::string codepoint_to_utf8(codepoint cp) {
     std::string result;
     if (0x00 <= cp && cp <= 0x7f) {
         result.push_back(cp);