check C++ code with -Wmissing-declarations

This commit is contained in:
Cebtenzzre 2023-09-14 19:03:50 -04:00
parent feea179e9f
commit cd27e8ab32
23 changed files with 157 additions and 92 deletions

View file

@ -427,6 +427,7 @@ if (LLAMA_ALL_WARNINGS)
-Wextra
-Wpedantic
-Wcast-qual
-Wmissing-declarations
-Wno-unused-function
-Wno-multichar
)

View file

@ -172,7 +172,7 @@ endif # LLAMA_DISABLE_LOGS
# warnings
MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
ifeq '' '$(findstring clang,$(shell $(CXX) --version))'
# g++ only

View file

@ -78,7 +78,7 @@ int32_t get_num_physical_cores() {
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
}
void process_escapes(std::string& input) {
static void process_escapes(std::string& input) {
std::size_t input_len = input.length();
std::size_t output_idx = 0;

View file

@ -158,7 +158,7 @@ namespace console {
}
}
char32_t getchar32() {
static char32_t getchar32() {
#if defined(_WIN32)
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
wchar_t high_surrogate = 0;
@ -212,7 +212,7 @@ namespace console {
#endif
}
void pop_cursor() {
static void pop_cursor() {
#if defined(_WIN32)
if (hConsole != NULL) {
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
@ -233,7 +233,7 @@ namespace console {
putc('\b', out);
}
int estimateWidth(char32_t codepoint) {
static int estimateWidth(char32_t codepoint) {
#if defined(_WIN32)
(void)codepoint;
return 1;
@ -242,7 +242,7 @@ namespace console {
#endif
}
int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
#if defined(_WIN32)
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
@ -303,7 +303,7 @@ namespace console {
#endif
}
void replace_last(char ch) {
static void replace_last(char ch) {
#if defined(_WIN32)
pop_cursor();
put_codepoint(&ch, 1, 1);
@ -312,7 +312,7 @@ namespace console {
#endif
}
void append_utf8(char32_t ch, std::string & out) {
static void append_utf8(char32_t ch, std::string & out) {
if (ch <= 0x7F) {
out.push_back(static_cast<unsigned char>(ch));
} else if (ch <= 0x7FF) {
@ -333,7 +333,7 @@ namespace console {
}
// Helper function to remove the last UTF-8 character from a string
void pop_back_utf8_char(std::string & line) {
static void pop_back_utf8_char(std::string & line) {
if (line.empty()) {
return;
}
@ -349,7 +349,7 @@ namespace console {
line.erase(pos);
}
bool readline_advanced(std::string & line, bool multiline_input) {
static bool readline_advanced(std::string & line, bool multiline_input) {
if (out != stdout) {
fflush(stdout);
}
@ -452,7 +452,7 @@ namespace console {
return has_more;
}
bool readline_simple(std::string & line, bool multiline_input) {
static bool readline_simple(std::string & line, bool multiline_input) {
#if defined(_WIN32)
std::wstring wline;
if (!std::getline(std::wcin, wline)) {

View file

@ -9,7 +9,7 @@
namespace grammar_parser {
// NOTE: assumes valid utf8 (but checks for overrun)
// copied from llama.cpp
std::pair<uint32_t, const char *> decode_utf8(const char * src) {
static auto decode_utf8(const char * src) -> std::pair<uint32_t, const char *> {
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
uint8_t first_byte = static_cast<uint8_t>(*src);
uint8_t highbits = first_byte >> 4;
@ -24,19 +24,19 @@ namespace grammar_parser {
return std::make_pair(value, pos);
}
uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
return result.first->second;
}
uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
return next_id;
}
void add_rule(
static void add_rule(
parse_state & state,
uint32_t rule_id,
const std::vector<llama_grammar_element> & rule) {
@ -46,11 +46,11 @@ namespace grammar_parser {
state.rules[rule_id] = rule;
}
bool is_word_char(char c) {
static bool is_word_char(char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
}
std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
static auto parse_hex(const char * src, int size) -> std::pair<uint32_t, const char *> {
const char * pos = src;
const char * end = src + size;
uint32_t value = 0;
@ -73,7 +73,7 @@ namespace grammar_parser {
return std::make_pair(value, pos);
}
const char * parse_space(const char * src, bool newline_ok) {
static const char * parse_space(const char * src, bool newline_ok) {
const char * pos = src;
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
@ -88,7 +88,7 @@ namespace grammar_parser {
return pos;
}
const char * parse_name(const char * src) {
static const char * parse_name(const char * src) {
const char * pos = src;
while (is_word_char(*pos)) {
pos++;
@ -99,7 +99,7 @@ namespace grammar_parser {
return pos;
}
std::pair<uint32_t, const char *> parse_char(const char * src) {
static auto parse_char(const char * src) -> std::pair<uint32_t, const char *> {
if (*src == '\\') {
switch (src[1]) {
case 'x': return parse_hex(src + 2, 2);
@ -129,7 +129,7 @@ namespace grammar_parser {
uint32_t rule_id,
bool is_nested);
const char * parse_sequence(
static const char * parse_sequence(
parse_state & state,
const char * src,
const std::string & rule_name,
@ -247,7 +247,7 @@ namespace grammar_parser {
return pos;
}
const char * parse_rule(parse_state & state, const char * src) {
static const char * parse_rule(parse_state & state, const char * src) {
const char * name_end = parse_name(src);
const char * pos = parse_space(name_end, false);
size_t name_len = name_end - src;
@ -285,7 +285,7 @@ namespace grammar_parser {
}
}
void print_grammar_char(FILE * file, uint32_t c) {
static void print_grammar_char(FILE * file, uint32_t c) {
if (0x20 <= c && c <= 0x7f) {
fprintf(file, "%c", static_cast<char>(c));
} else {
@ -294,7 +294,7 @@ namespace grammar_parser {
}
}
bool is_char_element(llama_grammar_element elem) {
static bool is_char_element(llama_grammar_element elem) {
switch (elem.type) {
case LLAMA_GRETYPE_CHAR: return true;
case LLAMA_GRETYPE_CHAR_NOT: return true;
@ -304,7 +304,7 @@ namespace grammar_parser {
}
}
void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
for (auto elem : rule) {
switch (elem.type) {
case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
@ -334,7 +334,7 @@ namespace grammar_parser {
fprintf(file, "\n");
}
void print_rule(
static void print_rule(
FILE * file,
uint32_t rule_id,
const std::vector<llama_grammar_element> & rule,

View file

@ -9,11 +9,13 @@
#endif
#ifdef LLAMA_DEFAULT_RMS_EPS
static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
constexpr float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
#else
static const float rms_norm_eps = 5e-6f;
constexpr float rms_norm_eps = 5e-6f;
#endif
namespace {
float frand() {
return (float)rand()/(float)RAND_MAX;
}
@ -1504,6 +1506,8 @@ struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_t
ggml_new_f32(ctx, eps)))))));
}
} // namespace
int main(int argc, char ** argv) {
if (argc < 1) {
fprintf(stderr, "usage: %s\n", argv[0]);

View file

@ -25,6 +25,8 @@
#include <signal.h>
#endif
namespace {
// Used for debugging to print out beam tokens.
struct ostream_beam_view {
llama_context * ctx;
@ -82,6 +84,8 @@ void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_stat
#endif
}
} // namespace
int main(int argc, char ** argv)
{
gpt_params params;

View file

@ -115,6 +115,8 @@ struct TransformerWeights {
}
};
namespace {
void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
// we calloc instead of malloc to keep valgrind happy
w->token_embedding_table = new float[p->vocab_size * p->dim]();
@ -444,7 +446,7 @@ __attribute__((format(gnu_printf, 1, 2)))
__attribute__((format(printf, 1, 2)))
#endif
#endif
static std::string format(const char * fmt, ...) {
std::string format(const char * fmt, ...) {
va_list ap, ap2;
va_start(ap, fmt);
va_copy(ap2, ap);
@ -540,7 +542,7 @@ bool is_ggml_file(const char *filename) {
return magic == GGUF_MAGIC;
}
static std::string llama_escape_whitespaces(const std::string& text) {
std::string llama_escape_whitespaces(const std::string& text) {
std::ostringstream out;
for (char c : text) {
if (c == ' ') out << "\xe2\x96\x81";
@ -909,6 +911,8 @@ std::string basename(const std::string &path) {
return path.substr(pos + 1);
}
} // namespace
int main(int argc, char ** argv) {
struct train_params params = get_default_train_params();
if (!params_parse(argc, argv, &params)) {

View file

@ -13,8 +13,10 @@
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
namespace {
template<typename T>
static std::string to_string(const T & val) {
std::string to_string(const T & val) {
std::stringstream ss;
ss << val;
return ss.str();
@ -227,6 +229,8 @@ bool gguf_ex_read_1(const std::string & fname) {
return true;
}
} // namespace
int main(int argc, char ** argv) {
if (argc < 3) {
printf("usage: %s data.gguf r|w\n", argv[0]);

View file

@ -33,13 +33,15 @@
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
static llama_context ** g_ctx;
static llama_model ** g_model;
static gpt_params * g_params;
static std::vector<llama_token> * g_input_tokens;
static std::ostringstream * g_output_ss;
static std::vector<llama_token> * g_output_tokens;
static bool is_interacting = false;
namespace {
llama_context ** g_ctx;
llama_model ** g_model;
gpt_params * g_params;
std::vector<llama_token> * g_input_tokens;
std::ostringstream * g_output_ss;
std::vector<llama_token> * g_output_tokens;
bool is_interacting = false;
void write_logfile(
const llama_context * ctx, const gpt_params & params, const llama_model * model,
@ -101,6 +103,8 @@ void sigint_handler(int signo) {
}
#endif
} // namespace
int main(int argc, char ** argv) {
gpt_params params;
g_params = &params;

View file

@ -28,6 +28,8 @@ struct results_log_softmax {
float prob;
};
namespace {
void write_logfile(const llama_context * ctx, const gpt_params & params,
const llama_model * model, const struct results_perplexity & results) {
@ -651,6 +653,8 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
printf("\n");
}
} // namespace
int main(int argc, char ** argv) {
gpt_params params;

View file

@ -34,8 +34,8 @@ struct quantize_stats_params {
std::vector<enum ggml_type> include_types;
};
const size_t HISTOGRAM_BUCKETS = 150;
const double HISTOGRAM_RANGE = 0.03;
constexpr size_t HISTOGRAM_BUCKETS = 150;
constexpr double HISTOGRAM_RANGE = 0.03;
struct error_stats {
size_t num_samples;
@ -44,6 +44,7 @@ struct error_stats {
uint64_t error_histogram[HISTOGRAM_BUCKETS];
};
namespace {
void quantize_stats_print_usage(int /*argc*/, char ** argv) {
quantize_stats_params params;
@ -133,7 +134,7 @@ void print_error_stats(const std::string & name, const error_stats & stats, bool
}
// copied from ggml.h - verify that we can access this as a flat array
static bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return
@ -238,6 +239,8 @@ void test_roundtrip_on_layer(
}
}
} // namespace
int main(int argc, char ** argv) {
ggml_time_init();

View file

@ -7,13 +7,15 @@
#include <vector>
#include <string>
namespace {
struct quant_option {
std::string name;
llama_ftype ftype;
std::string desc;
};
static const std::vector<struct quant_option> QUANT_OPTIONS = {
const std::vector<struct quant_option> QUANT_OPTIONS = {
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", },
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", },
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", },
@ -88,6 +90,8 @@ void usage(const char * executable) {
exit(1);
}
} // namespace
int main(int argc, char ** argv) {
if (argc < 3) {
usage(argv[0]);

View file

@ -26,6 +26,8 @@
using namespace httplib;
using json = nlohmann::json;
namespace {
struct server_params
{
std::string hostname = "127.0.0.1";
@ -48,7 +50,7 @@ struct completion_token_output
llama_token tok;
};
static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
size_t common_part(const std::vector<llama_token> & a, const std::vector<llama_token> & b)
{
size_t i;
for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++)
@ -63,14 +65,13 @@ enum stop_type
STOP_PARTIAL,
};
static bool ends_with(const std::string &str, const std::string &suffix)
bool ends_with(const std::string & str, const std::string & suffix)
{
return str.size() >= suffix.size() &&
0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
}
static size_t find_partial_stop_string(const std::string &stop,
const std::string &text)
size_t find_partial_stop_string(const std::string & stop, const std::string & text)
{
if (!text.empty() && !stop.empty())
{
@ -91,7 +92,7 @@ static size_t find_partial_stop_string(const std::string &stop,
}
template <class Iter>
static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
{
std::string ret;
for (; begin != end; ++begin)
@ -101,9 +102,9 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
return ret;
}
static void server_log(const char *level, const char *function, int line,
const char *message, const nlohmann::ordered_json &extra)
{
void server_log(
const char * level, const char * function, int line, const char * message, const nlohmann::ordered_json & extra
) {
nlohmann::ordered_json log{
{"timestamp", time(nullptr)},
{"level", level},
@ -123,7 +124,7 @@ static void server_log(const char *level, const char *function, int line,
}
// format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
std::string tokens_to_output_formatted_string(const llama_context * ctx, llama_token token)
{
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character
@ -139,7 +140,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
}
// convert a vector of completion_token_output to json
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs)
json probs_vector_to_json(const llama_context * ctx, const std::vector<completion_token_output> & probs)
{
json out = json::array();
for (const auto &prob : probs)
@ -162,7 +163,7 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
return out;
}
static bool server_verbose = false;
bool server_verbose = false;
#if SERVER_VERBOSE != 1
#define LOG_VERBOSE(MSG, ...)
@ -691,8 +692,7 @@ struct llama_server_context
}
};
static void server_print_usage(const char *argv0, const gpt_params &params,
const server_params &sparams)
void server_print_usage(const char * argv0, const gpt_params & params, const server_params & sparams)
{
printf("usage: %s [options]\n", argv0);
printf("\n");
@ -740,8 +740,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
printf("\n");
}
static void server_params_parse(int argc, char **argv, server_params &sparams,
gpt_params &params)
void server_params_parse(int argc, char ** argv, server_params & sparams, gpt_params & params)
{
gpt_params default_params;
server_params default_sparams;
@ -995,7 +994,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
}
}
static json format_generation_settings(llama_server_context &llama)
json format_generation_settings(llama_server_context & llama)
{
const auto eos_bias = llama.params.logit_bias.find(llama_token_eos(llama.ctx));
const bool ignore_eos = eos_bias != llama.params.logit_bias.end() &&
@ -1029,14 +1028,14 @@ static json format_generation_settings(llama_server_context &llama)
};
}
static json format_embedding_response(llama_server_context &llama)
json format_embedding_response(llama_server_context & llama)
{
return json{
{"embedding", llama.getEmbedding()},
};
}
static json format_timings(llama_server_context &llama)
json format_timings(llama_server_context & llama)
{
const auto timings = llama_get_timings(llama.ctx);
@ -1055,8 +1054,9 @@ static json format_timings(llama_server_context &llama)
};
}
static json format_final_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
{
json format_final_response(
llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
) {
json res = json{
{"content", content},
@ -1083,8 +1083,9 @@ static json format_final_response(llama_server_context &llama, const std::string
return res;
}
static json format_partial_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
{
json format_partial_response(
llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
) {
json res = json{
{"content", content},
{"stop", false},
@ -1098,20 +1099,20 @@ static json format_partial_response(llama_server_context &llama, const std::stri
return res;
}
static json format_tokenizer_response(const std::vector<llama_token> &tokens)
json format_tokenizer_response(const std::vector<llama_token> & tokens)
{
return json{
{"tokens", tokens}};
}
static json format_detokenized_response(std::string content)
json format_detokenized_response(std::string content)
{
return json{
{"content", content}};
}
template <typename T>
static T json_value(const json &body, const std::string &key, const T &default_value)
T json_value(const json & body, const std::string & key, const T & default_value)
{
// Fallback null to default value
return body.contains(key) && !body.at(key).is_null()
@ -1119,7 +1120,7 @@ static T json_value(const json &body, const std::string &key, const T &default_v
: default_value;
}
static void parse_options_completion(const json &body, llama_server_context &llama)
void parse_options_completion(const json & body, llama_server_context & llama)
{
gpt_params default_params;
@ -1198,7 +1199,7 @@ static void parse_options_completion(const json &body, llama_server_context &lla
LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama));
}
static void log_server_request(const Request &req, const Response &res)
void log_server_request(const Request & req, const Response & res)
{
LOG_INFO("request", {
{"remote_addr", req.remote_addr},
@ -1271,6 +1272,8 @@ void append_to_generated_text_from_generated_token_probs(llama_server_context &
}
}
} // namespace
int main(int argc, char **argv)
{
// own arguments required by this example

View file

@ -18,6 +18,8 @@
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
namespace {
struct random_normal_distribution {
std::mt19937 gen;
std::normal_distribution<float> rd;
@ -444,11 +446,11 @@ void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int6
GGML_ASSERT(tensor->ne[3] == ne3);
}
static size_t hash(void * p) {
size_t hash(void * p) {
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
}
static size_t hash_find(void * hash_table[], void * p) {
size_t hash_find(void * hash_table[], void * p) {
size_t h = hash(p);
// linear probing
@ -463,7 +465,7 @@ static size_t hash_find(void * hash_table[], void * p) {
return i;
}
static bool hash_insert(void * hash_table[], void * p) {
bool hash_insert(void * hash_table[], void * p) {
//size_t h = hash(p);
size_t i = hash_find(hash_table, p);
@ -479,7 +481,7 @@ static bool hash_insert(void * hash_table[], void * p) {
return false;
}
static bool hash_contains(void * hash_table[], void * p) {
bool hash_contains(void * hash_table[], void * p) {
size_t i = hash_find(hash_table, p);
return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
}
@ -488,7 +490,6 @@ struct hash_map {
void * keys[GGML_GRAPH_HASHTABLE_SIZE];
void * vals[GGML_GRAPH_HASHTABLE_SIZE];
};
//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
struct hash_map * new_hash_map() {
struct hash_map * result = new struct hash_map;
@ -503,12 +504,12 @@ void free_hash_map(struct hash_map * map) {
delete map;
}
static bool ggml_is_view(struct ggml_tensor * t) {
bool ggml_is_view(struct ggml_tensor * t) {
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
}
static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
switch (t->op) {
case GGML_OP_PERMUTE:
case GGML_OP_RESHAPE:
@ -522,7 +523,7 @@ static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
}
}
static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
struct ggml_tensor * parent = t;
do {
parent = get_view_parent(parent);
@ -1988,6 +1989,8 @@ void opt_callback(void * vdata, float * sched) {
data->shuffle_countdown -= n_batch;
}
} // namespace
int main(int argc, char ** argv) {
struct train_params params = get_default_train_params();

View file

@ -1,3 +1,4 @@
#define LLAMA_API_INTERNAL
#include "llama.h"
#include "ggml.h"
@ -108,7 +109,7 @@ static size_t utf8_len(char src) {
return lookup[highbits];
}
void replace_all(std::string & s, const std::string & search, const std::string & replace) {
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
std::string result;
for (size_t pos = 0; ; pos += search.length()) {
auto new_pos = s.find(search, pos);
@ -1560,7 +1561,7 @@ struct llama_model_loader {
// load LLaMA models
//
std::string llama_model_ftype_name(enum llama_ftype ftype) {
static std::string llama_model_ftype_name(enum llama_ftype ftype) {
if (ftype & LLAMA_FTYPE_GUESSED) {
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
}
@ -3945,7 +3946,7 @@ struct llama_grammar_candidate {
// Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
const char * src,
llama_partial_utf8 partial_start) {
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
@ -5526,7 +5527,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
}
// TODO: after the GGUF PR, this likely won't work and needs to be updated
int llama_apply_lora_from_file_internal(const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads) {
static int llama_apply_lora_from_file_internal(
const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads
) {
LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
const int64_t t_start_lora_us = ggml_time_us();
@ -6073,7 +6076,7 @@ struct llama_context * llama_new_context_with_model(
return ctx;
}
struct llama_context * llama_init_from_file(
static struct llama_context * llama_init_from_file(
const char * path_model,
struct llama_context_params params) {
struct llama_model * model = llama_load_model_from_file(path_model, params);
@ -6278,7 +6281,7 @@ struct llama_data_file_context : llama_data_context {
* llama_copy_state_data(ctx, &data_ctx);
*
*/
void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
static void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
// copy rng
{
std::stringstream rng_ss;
@ -6816,7 +6819,9 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
}
// For internal test use
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
auto llama_internal_get_tensor_map(struct llama_context * ctx)
-> const std::vector<std::pair<std::string, struct ggml_tensor *>> &
{
return ctx->model.tensors_by_name;
}

View file

@ -540,7 +540,8 @@ extern "C" {
struct ggml_tensor;
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
auto llama_internal_get_tensor_map(struct llama_context * ctx)
-> const std::vector<std::pair<std::string, struct ggml_tensor *>> &;
#endif // LLAMA_API_INTERNAL

View file

@ -16,6 +16,8 @@
constexpr int kVecSize = 1 << 18;
namespace {
float drawFromGaussianPdf(std::mt19937& rndm) {
constexpr double kScale = 1./(1. + std::mt19937::max());
constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale;
@ -218,6 +220,8 @@ static void dot_q4_q8(const int n, float* s, const void* vx, const void* vy) {
*s = sumf;
}
} // namespace
int main(int argc, char** argv) {
int nloop = argc > 1 ? atoi(argv[1]) : 10;

View file

@ -36,6 +36,8 @@
#define GGML_PRINT(...) printf(__VA_ARGS__)
namespace {
float frand(void) {
return (float)rand()/(float)RAND_MAX;
}
@ -117,6 +119,8 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
((float *)t->data)[idx] = value;
}
} // namespace
int main(void) {
struct ggml_init_params params = {
/* .mem_size = */ 1024*1024*1024,

View file

@ -13,15 +13,17 @@
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
const float MAX_DOT_PRODUCT_ERROR = 0.02f;
constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f;
namespace {
const char* RESULT_STR[] = {"ok", "FAILED"};
// Generate synthetic data
void generate_data(float offset, size_t n, float * dst) {
for (size_t i = 0; i < n; i++) {
@ -90,6 +92,8 @@ float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float
return fabsf(result - dot_ref) / test_size;
}
} // namespace
int main(int argc, char * argv[]) {
bool verbose = false;
const size_t test_size = 32 * 128;

View file

@ -60,6 +60,8 @@ inline int64_t cpu_cycles() {
#endif
namespace {
// Generate synthetic data
void generate_data(float offset, size_t n, float * dst) {
for (size_t i = 0; i < n; i++) {
@ -137,6 +139,8 @@ void usage(char * argv[]) {
printf(" set test iteration number (%d)\n", ITERATIONS);
}
} // namespace
int main(int argc, char * argv[]) {
quantize_perf_params params {};

View file

@ -12,6 +12,9 @@
#include <vector>
#include <algorithm>
namespace {
void dump(const llama_token_data_array * candidates) {
for (size_t i = 0; i < candidates->size; i++) {
printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);
@ -173,6 +176,8 @@ void test_frequency_presence_penalty(
}
}
} // namespace
int main(void) {
ggml_time_init();

View file

@ -13,7 +13,7 @@
typedef int codepoint;
std::string codepoint_to_utf8(codepoint cp) {
static std::string codepoint_to_utf8(codepoint cp) {
std::string result;
if (0x00 <= cp && cp <= 0x7f) {
result.push_back(cp);