check C++ code with -Wmissing-declarations
This commit is contained in:
parent
feea179e9f
commit
cd27e8ab32
23 changed files with 157 additions and 92 deletions
|
@ -427,6 +427,7 @@ if (LLAMA_ALL_WARNINGS)
|
||||||
-Wextra
|
-Wextra
|
||||||
-Wpedantic
|
-Wpedantic
|
||||||
-Wcast-qual
|
-Wcast-qual
|
||||||
|
-Wmissing-declarations
|
||||||
-Wno-unused-function
|
-Wno-unused-function
|
||||||
-Wno-multichar
|
-Wno-multichar
|
||||||
)
|
)
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -172,7 +172,7 @@ endif # LLAMA_DISABLE_LOGS
|
||||||
# warnings
|
# warnings
|
||||||
MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
|
MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
|
||||||
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
|
-Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
|
||||||
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
|
MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
|
||||||
|
|
||||||
ifeq '' '$(findstring clang,$(shell $(CXX) --version))'
|
ifeq '' '$(findstring clang,$(shell $(CXX) --version))'
|
||||||
# g++ only
|
# g++ only
|
||||||
|
|
|
@ -78,7 +78,7 @@ int32_t get_num_physical_cores() {
|
||||||
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_escapes(std::string& input) {
|
static void process_escapes(std::string& input) {
|
||||||
std::size_t input_len = input.length();
|
std::size_t input_len = input.length();
|
||||||
std::size_t output_idx = 0;
|
std::size_t output_idx = 0;
|
||||||
|
|
||||||
|
|
|
@ -158,7 +158,7 @@ namespace console {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
char32_t getchar32() {
|
static char32_t getchar32() {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
|
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
|
||||||
wchar_t high_surrogate = 0;
|
wchar_t high_surrogate = 0;
|
||||||
|
@ -212,7 +212,7 @@ namespace console {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void pop_cursor() {
|
static void pop_cursor() {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
if (hConsole != NULL) {
|
if (hConsole != NULL) {
|
||||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||||
|
@ -233,7 +233,7 @@ namespace console {
|
||||||
putc('\b', out);
|
putc('\b', out);
|
||||||
}
|
}
|
||||||
|
|
||||||
int estimateWidth(char32_t codepoint) {
|
static int estimateWidth(char32_t codepoint) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
(void)codepoint;
|
(void)codepoint;
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -242,7 +242,7 @@ namespace console {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
|
static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||||
if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
|
if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
|
||||||
|
@ -303,7 +303,7 @@ namespace console {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void replace_last(char ch) {
|
static void replace_last(char ch) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
pop_cursor();
|
pop_cursor();
|
||||||
put_codepoint(&ch, 1, 1);
|
put_codepoint(&ch, 1, 1);
|
||||||
|
@ -312,7 +312,7 @@ namespace console {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void append_utf8(char32_t ch, std::string & out) {
|
static void append_utf8(char32_t ch, std::string & out) {
|
||||||
if (ch <= 0x7F) {
|
if (ch <= 0x7F) {
|
||||||
out.push_back(static_cast<unsigned char>(ch));
|
out.push_back(static_cast<unsigned char>(ch));
|
||||||
} else if (ch <= 0x7FF) {
|
} else if (ch <= 0x7FF) {
|
||||||
|
@ -333,7 +333,7 @@ namespace console {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper function to remove the last UTF-8 character from a string
|
// Helper function to remove the last UTF-8 character from a string
|
||||||
void pop_back_utf8_char(std::string & line) {
|
static void pop_back_utf8_char(std::string & line) {
|
||||||
if (line.empty()) {
|
if (line.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -349,7 +349,7 @@ namespace console {
|
||||||
line.erase(pos);
|
line.erase(pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool readline_advanced(std::string & line, bool multiline_input) {
|
static bool readline_advanced(std::string & line, bool multiline_input) {
|
||||||
if (out != stdout) {
|
if (out != stdout) {
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
@ -452,7 +452,7 @@ namespace console {
|
||||||
return has_more;
|
return has_more;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool readline_simple(std::string & line, bool multiline_input) {
|
static bool readline_simple(std::string & line, bool multiline_input) {
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
std::wstring wline;
|
std::wstring wline;
|
||||||
if (!std::getline(std::wcin, wline)) {
|
if (!std::getline(std::wcin, wline)) {
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
namespace grammar_parser {
|
namespace grammar_parser {
|
||||||
// NOTE: assumes valid utf8 (but checks for overrun)
|
// NOTE: assumes valid utf8 (but checks for overrun)
|
||||||
// copied from llama.cpp
|
// copied from llama.cpp
|
||||||
std::pair<uint32_t, const char *> decode_utf8(const char * src) {
|
static auto decode_utf8(const char * src) -> std::pair<uint32_t, const char *> {
|
||||||
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
||||||
uint8_t first_byte = static_cast<uint8_t>(*src);
|
uint8_t first_byte = static_cast<uint8_t>(*src);
|
||||||
uint8_t highbits = first_byte >> 4;
|
uint8_t highbits = first_byte >> 4;
|
||||||
|
@ -24,19 +24,19 @@ namespace grammar_parser {
|
||||||
return std::make_pair(value, pos);
|
return std::make_pair(value, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
||||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||||
auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
|
auto result = state.symbol_ids.insert(std::make_pair(std::string(src, len), next_id));
|
||||||
return result.first->second;
|
return result.first->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
||||||
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||||
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
||||||
return next_id;
|
return next_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_rule(
|
static void add_rule(
|
||||||
parse_state & state,
|
parse_state & state,
|
||||||
uint32_t rule_id,
|
uint32_t rule_id,
|
||||||
const std::vector<llama_grammar_element> & rule) {
|
const std::vector<llama_grammar_element> & rule) {
|
||||||
|
@ -46,11 +46,11 @@ namespace grammar_parser {
|
||||||
state.rules[rule_id] = rule;
|
state.rules[rule_id] = rule;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_word_char(char c) {
|
static bool is_word_char(char c) {
|
||||||
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
|
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || ('0' <= c && c <= '9');
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
|
static auto parse_hex(const char * src, int size) -> std::pair<uint32_t, const char *> {
|
||||||
const char * pos = src;
|
const char * pos = src;
|
||||||
const char * end = src + size;
|
const char * end = src + size;
|
||||||
uint32_t value = 0;
|
uint32_t value = 0;
|
||||||
|
@ -73,7 +73,7 @@ namespace grammar_parser {
|
||||||
return std::make_pair(value, pos);
|
return std::make_pair(value, pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * parse_space(const char * src, bool newline_ok) {
|
static const char * parse_space(const char * src, bool newline_ok) {
|
||||||
const char * pos = src;
|
const char * pos = src;
|
||||||
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
||||||
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
||||||
|
@ -88,7 +88,7 @@ namespace grammar_parser {
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * parse_name(const char * src) {
|
static const char * parse_name(const char * src) {
|
||||||
const char * pos = src;
|
const char * pos = src;
|
||||||
while (is_word_char(*pos)) {
|
while (is_word_char(*pos)) {
|
||||||
pos++;
|
pos++;
|
||||||
|
@ -99,7 +99,7 @@ namespace grammar_parser {
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<uint32_t, const char *> parse_char(const char * src) {
|
static auto parse_char(const char * src) -> std::pair<uint32_t, const char *> {
|
||||||
if (*src == '\\') {
|
if (*src == '\\') {
|
||||||
switch (src[1]) {
|
switch (src[1]) {
|
||||||
case 'x': return parse_hex(src + 2, 2);
|
case 'x': return parse_hex(src + 2, 2);
|
||||||
|
@ -129,7 +129,7 @@ namespace grammar_parser {
|
||||||
uint32_t rule_id,
|
uint32_t rule_id,
|
||||||
bool is_nested);
|
bool is_nested);
|
||||||
|
|
||||||
const char * parse_sequence(
|
static const char * parse_sequence(
|
||||||
parse_state & state,
|
parse_state & state,
|
||||||
const char * src,
|
const char * src,
|
||||||
const std::string & rule_name,
|
const std::string & rule_name,
|
||||||
|
@ -247,7 +247,7 @@ namespace grammar_parser {
|
||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * parse_rule(parse_state & state, const char * src) {
|
static const char * parse_rule(parse_state & state, const char * src) {
|
||||||
const char * name_end = parse_name(src);
|
const char * name_end = parse_name(src);
|
||||||
const char * pos = parse_space(name_end, false);
|
const char * pos = parse_space(name_end, false);
|
||||||
size_t name_len = name_end - src;
|
size_t name_len = name_end - src;
|
||||||
|
@ -285,7 +285,7 @@ namespace grammar_parser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_grammar_char(FILE * file, uint32_t c) {
|
static void print_grammar_char(FILE * file, uint32_t c) {
|
||||||
if (0x20 <= c && c <= 0x7f) {
|
if (0x20 <= c && c <= 0x7f) {
|
||||||
fprintf(file, "%c", static_cast<char>(c));
|
fprintf(file, "%c", static_cast<char>(c));
|
||||||
} else {
|
} else {
|
||||||
|
@ -294,7 +294,7 @@ namespace grammar_parser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_char_element(llama_grammar_element elem) {
|
static bool is_char_element(llama_grammar_element elem) {
|
||||||
switch (elem.type) {
|
switch (elem.type) {
|
||||||
case LLAMA_GRETYPE_CHAR: return true;
|
case LLAMA_GRETYPE_CHAR: return true;
|
||||||
case LLAMA_GRETYPE_CHAR_NOT: return true;
|
case LLAMA_GRETYPE_CHAR_NOT: return true;
|
||||||
|
@ -304,7 +304,7 @@ namespace grammar_parser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
|
static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
|
||||||
for (auto elem : rule) {
|
for (auto elem : rule) {
|
||||||
switch (elem.type) {
|
switch (elem.type) {
|
||||||
case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
|
case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
|
||||||
|
@ -334,7 +334,7 @@ namespace grammar_parser {
|
||||||
fprintf(file, "\n");
|
fprintf(file, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_rule(
|
static void print_rule(
|
||||||
FILE * file,
|
FILE * file,
|
||||||
uint32_t rule_id,
|
uint32_t rule_id,
|
||||||
const std::vector<llama_grammar_element> & rule,
|
const std::vector<llama_grammar_element> & rule,
|
||||||
|
|
|
@ -9,11 +9,13 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef LLAMA_DEFAULT_RMS_EPS
|
#ifdef LLAMA_DEFAULT_RMS_EPS
|
||||||
static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
constexpr float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
||||||
#else
|
#else
|
||||||
static const float rms_norm_eps = 5e-6f;
|
constexpr float rms_norm_eps = 5e-6f;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
float frand() {
|
float frand() {
|
||||||
return (float)rand()/(float)RAND_MAX;
|
return (float)rand()/(float)RAND_MAX;
|
||||||
}
|
}
|
||||||
|
@ -1504,6 +1506,8 @@ struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_t
|
||||||
ggml_new_f32(ctx, eps)))))));
|
ggml_new_f32(ctx, eps)))))));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
if (argc < 1) {
|
if (argc < 1) {
|
||||||
fprintf(stderr, "usage: %s\n", argv[0]);
|
fprintf(stderr, "usage: %s\n", argv[0]);
|
||||||
|
|
|
@ -25,6 +25,8 @@
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// Used for debugging to print out beam tokens.
|
// Used for debugging to print out beam tokens.
|
||||||
struct ostream_beam_view {
|
struct ostream_beam_view {
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
|
@ -82,6 +84,8 @@ void beam_search_callback(void * callback_data_ptr, llama_beams_state beams_stat
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
int main(int argc, char ** argv)
|
||||||
{
|
{
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
|
@ -115,6 +115,8 @@ struct TransformerWeights {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
|
void malloc_weights(TransformerWeights* w, Config* p, bool shared_weights) {
|
||||||
// we calloc instead of malloc to keep valgrind happy
|
// we calloc instead of malloc to keep valgrind happy
|
||||||
w->token_embedding_table = new float[p->vocab_size * p->dim]();
|
w->token_embedding_table = new float[p->vocab_size * p->dim]();
|
||||||
|
@ -444,7 +446,7 @@ __attribute__((format(gnu_printf, 1, 2)))
|
||||||
__attribute__((format(printf, 1, 2)))
|
__attribute__((format(printf, 1, 2)))
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
static std::string format(const char * fmt, ...) {
|
std::string format(const char * fmt, ...) {
|
||||||
va_list ap, ap2;
|
va_list ap, ap2;
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
va_copy(ap2, ap);
|
va_copy(ap2, ap);
|
||||||
|
@ -540,7 +542,7 @@ bool is_ggml_file(const char *filename) {
|
||||||
return magic == GGUF_MAGIC;
|
return magic == GGUF_MAGIC;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string llama_escape_whitespaces(const std::string& text) {
|
std::string llama_escape_whitespaces(const std::string& text) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
for (char c : text) {
|
for (char c : text) {
|
||||||
if (c == ' ') out << "\xe2\x96\x81";
|
if (c == ' ') out << "\xe2\x96\x81";
|
||||||
|
@ -909,6 +911,8 @@ std::string basename(const std::string &path) {
|
||||||
return path.substr(pos + 1);
|
return path.substr(pos + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
struct train_params params = get_default_train_params();
|
struct train_params params = get_default_train_params();
|
||||||
if (!params_parse(argc, argv, ¶ms)) {
|
if (!params_parse(argc, argv, ¶ms)) {
|
||||||
|
|
|
@ -13,8 +13,10 @@
|
||||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static std::string to_string(const T & val) {
|
std::string to_string(const T & val) {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << val;
|
ss << val;
|
||||||
return ss.str();
|
return ss.str();
|
||||||
|
@ -227,6 +229,8 @@ bool gguf_ex_read_1(const std::string & fname) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
if (argc < 3) {
|
if (argc < 3) {
|
||||||
printf("usage: %s data.gguf r|w\n", argv[0]);
|
printf("usage: %s data.gguf r|w\n", argv[0]);
|
||||||
|
|
|
@ -33,13 +33,15 @@
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static llama_context ** g_ctx;
|
namespace {
|
||||||
static llama_model ** g_model;
|
|
||||||
static gpt_params * g_params;
|
llama_context ** g_ctx;
|
||||||
static std::vector<llama_token> * g_input_tokens;
|
llama_model ** g_model;
|
||||||
static std::ostringstream * g_output_ss;
|
gpt_params * g_params;
|
||||||
static std::vector<llama_token> * g_output_tokens;
|
std::vector<llama_token> * g_input_tokens;
|
||||||
static bool is_interacting = false;
|
std::ostringstream * g_output_ss;
|
||||||
|
std::vector<llama_token> * g_output_tokens;
|
||||||
|
bool is_interacting = false;
|
||||||
|
|
||||||
void write_logfile(
|
void write_logfile(
|
||||||
const llama_context * ctx, const gpt_params & params, const llama_model * model,
|
const llama_context * ctx, const gpt_params & params, const llama_model * model,
|
||||||
|
@ -101,6 +103,8 @@ void sigint_handler(int signo) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
g_params = ¶ms;
|
g_params = ¶ms;
|
||||||
|
|
|
@ -28,6 +28,8 @@ struct results_log_softmax {
|
||||||
float prob;
|
float prob;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
void write_logfile(const llama_context * ctx, const gpt_params & params,
|
void write_logfile(const llama_context * ctx, const gpt_params & params,
|
||||||
const llama_model * model, const struct results_perplexity & results) {
|
const llama_model * model, const struct results_perplexity & results) {
|
||||||
|
|
||||||
|
@ -651,6 +653,8 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
|
|
||||||
|
|
|
@ -34,8 +34,8 @@ struct quantize_stats_params {
|
||||||
std::vector<enum ggml_type> include_types;
|
std::vector<enum ggml_type> include_types;
|
||||||
};
|
};
|
||||||
|
|
||||||
const size_t HISTOGRAM_BUCKETS = 150;
|
constexpr size_t HISTOGRAM_BUCKETS = 150;
|
||||||
const double HISTOGRAM_RANGE = 0.03;
|
constexpr double HISTOGRAM_RANGE = 0.03;
|
||||||
|
|
||||||
struct error_stats {
|
struct error_stats {
|
||||||
size_t num_samples;
|
size_t num_samples;
|
||||||
|
@ -44,6 +44,7 @@ struct error_stats {
|
||||||
uint64_t error_histogram[HISTOGRAM_BUCKETS];
|
uint64_t error_histogram[HISTOGRAM_BUCKETS];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
void quantize_stats_print_usage(int /*argc*/, char ** argv) {
|
void quantize_stats_print_usage(int /*argc*/, char ** argv) {
|
||||||
quantize_stats_params params;
|
quantize_stats_params params;
|
||||||
|
@ -133,7 +134,7 @@ void print_error_stats(const std::string & name, const error_stats & stats, bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// copied from ggml.h - verify that we can access this as a flat array
|
// copied from ggml.h - verify that we can access this as a flat array
|
||||||
static bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
|
bool tensor_is_contiguous(const struct ggml_tensor * tensor) {
|
||||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||||
|
|
||||||
return
|
return
|
||||||
|
@ -238,6 +239,8 @@ void test_roundtrip_on_layer(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
|
|
|
@ -7,13 +7,15 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
struct quant_option {
|
struct quant_option {
|
||||||
std::string name;
|
std::string name;
|
||||||
llama_ftype ftype;
|
llama_ftype ftype;
|
||||||
std::string desc;
|
std::string desc;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::vector<struct quant_option> QUANT_OPTIONS = {
|
const std::vector<struct quant_option> QUANT_OPTIONS = {
|
||||||
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", },
|
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", },
|
||||||
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", },
|
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", },
|
||||||
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", },
|
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", },
|
||||||
|
@ -88,6 +90,8 @@ void usage(const char * executable) {
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
if (argc < 3) {
|
if (argc < 3) {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
|
|
|
@ -26,6 +26,8 @@
|
||||||
using namespace httplib;
|
using namespace httplib;
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
struct server_params
|
struct server_params
|
||||||
{
|
{
|
||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
|
@ -48,7 +50,7 @@ struct completion_token_output
|
||||||
llama_token tok;
|
llama_token tok;
|
||||||
};
|
};
|
||||||
|
|
||||||
static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
|
size_t common_part(const std::vector<llama_token> & a, const std::vector<llama_token> & b)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++)
|
for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++)
|
||||||
|
@ -63,14 +65,13 @@ enum stop_type
|
||||||
STOP_PARTIAL,
|
STOP_PARTIAL,
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool ends_with(const std::string &str, const std::string &suffix)
|
bool ends_with(const std::string & str, const std::string & suffix)
|
||||||
{
|
{
|
||||||
return str.size() >= suffix.size() &&
|
return str.size() >= suffix.size() &&
|
||||||
0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
|
0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t find_partial_stop_string(const std::string &stop,
|
size_t find_partial_stop_string(const std::string & stop, const std::string & text)
|
||||||
const std::string &text)
|
|
||||||
{
|
{
|
||||||
if (!text.empty() && !stop.empty())
|
if (!text.empty() && !stop.empty())
|
||||||
{
|
{
|
||||||
|
@ -91,7 +92,7 @@ static size_t find_partial_stop_string(const std::string &stop,
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Iter>
|
template <class Iter>
|
||||||
static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
||||||
{
|
{
|
||||||
std::string ret;
|
std::string ret;
|
||||||
for (; begin != end; ++begin)
|
for (; begin != end; ++begin)
|
||||||
|
@ -101,9 +102,9 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void server_log(const char *level, const char *function, int line,
|
void server_log(
|
||||||
const char *message, const nlohmann::ordered_json &extra)
|
const char * level, const char * function, int line, const char * message, const nlohmann::ordered_json & extra
|
||||||
{
|
) {
|
||||||
nlohmann::ordered_json log{
|
nlohmann::ordered_json log{
|
||||||
{"timestamp", time(nullptr)},
|
{"timestamp", time(nullptr)},
|
||||||
{"level", level},
|
{"level", level},
|
||||||
|
@ -123,7 +124,7 @@ static void server_log(const char *level, const char *function, int line,
|
||||||
}
|
}
|
||||||
|
|
||||||
// format incomplete utf-8 multibyte character for output
|
// format incomplete utf-8 multibyte character for output
|
||||||
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
|
std::string tokens_to_output_formatted_string(const llama_context * ctx, llama_token token)
|
||||||
{
|
{
|
||||||
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
|
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
|
||||||
// if the size is 1 and first bit is 1, meaning it's a partial character
|
// if the size is 1 and first bit is 1, meaning it's a partial character
|
||||||
|
@ -139,7 +140,7 @@ static std::string tokens_to_output_formatted_string(const llama_context *ctx, c
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert a vector of completion_token_output to json
|
// convert a vector of completion_token_output to json
|
||||||
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> & probs)
|
json probs_vector_to_json(const llama_context * ctx, const std::vector<completion_token_output> & probs)
|
||||||
{
|
{
|
||||||
json out = json::array();
|
json out = json::array();
|
||||||
for (const auto &prob : probs)
|
for (const auto &prob : probs)
|
||||||
|
@ -162,7 +163,7 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector<com
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool server_verbose = false;
|
bool server_verbose = false;
|
||||||
|
|
||||||
#if SERVER_VERBOSE != 1
|
#if SERVER_VERBOSE != 1
|
||||||
#define LOG_VERBOSE(MSG, ...)
|
#define LOG_VERBOSE(MSG, ...)
|
||||||
|
@ -691,8 +692,7 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
void server_print_usage(const char * argv0, const gpt_params & params, const server_params & sparams)
|
||||||
const server_params &sparams)
|
|
||||||
{
|
{
|
||||||
printf("usage: %s [options]\n", argv0);
|
printf("usage: %s [options]\n", argv0);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
@ -740,8 +740,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void server_params_parse(int argc, char **argv, server_params &sparams,
|
void server_params_parse(int argc, char ** argv, server_params & sparams, gpt_params & params)
|
||||||
gpt_params ¶ms)
|
|
||||||
{
|
{
|
||||||
gpt_params default_params;
|
gpt_params default_params;
|
||||||
server_params default_sparams;
|
server_params default_sparams;
|
||||||
|
@ -995,7 +994,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static json format_generation_settings(llama_server_context &llama)
|
json format_generation_settings(llama_server_context & llama)
|
||||||
{
|
{
|
||||||
const auto eos_bias = llama.params.logit_bias.find(llama_token_eos(llama.ctx));
|
const auto eos_bias = llama.params.logit_bias.find(llama_token_eos(llama.ctx));
|
||||||
const bool ignore_eos = eos_bias != llama.params.logit_bias.end() &&
|
const bool ignore_eos = eos_bias != llama.params.logit_bias.end() &&
|
||||||
|
@ -1029,14 +1028,14 @@ static json format_generation_settings(llama_server_context &llama)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static json format_embedding_response(llama_server_context &llama)
|
json format_embedding_response(llama_server_context & llama)
|
||||||
{
|
{
|
||||||
return json{
|
return json{
|
||||||
{"embedding", llama.getEmbedding()},
|
{"embedding", llama.getEmbedding()},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static json format_timings(llama_server_context &llama)
|
json format_timings(llama_server_context & llama)
|
||||||
{
|
{
|
||||||
const auto timings = llama_get_timings(llama.ctx);
|
const auto timings = llama_get_timings(llama.ctx);
|
||||||
|
|
||||||
|
@ -1055,8 +1054,9 @@ static json format_timings(llama_server_context &llama)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static json format_final_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
|
json format_final_response(
|
||||||
{
|
llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
|
||||||
|
) {
|
||||||
|
|
||||||
json res = json{
|
json res = json{
|
||||||
{"content", content},
|
{"content", content},
|
||||||
|
@ -1083,8 +1083,9 @@ static json format_final_response(llama_server_context &llama, const std::string
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static json format_partial_response(llama_server_context &llama, const std::string &content, const std::vector<completion_token_output> &probs)
|
json format_partial_response(
|
||||||
{
|
llama_server_context & llama, const std::string & content, const std::vector<completion_token_output> & probs
|
||||||
|
) {
|
||||||
json res = json{
|
json res = json{
|
||||||
{"content", content},
|
{"content", content},
|
||||||
{"stop", false},
|
{"stop", false},
|
||||||
|
@ -1098,20 +1099,20 @@ static json format_partial_response(llama_server_context &llama, const std::stri
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static json format_tokenizer_response(const std::vector<llama_token> &tokens)
|
json format_tokenizer_response(const std::vector<llama_token> & tokens)
|
||||||
{
|
{
|
||||||
return json{
|
return json{
|
||||||
{"tokens", tokens}};
|
{"tokens", tokens}};
|
||||||
}
|
}
|
||||||
|
|
||||||
static json format_detokenized_response(std::string content)
|
json format_detokenized_response(std::string content)
|
||||||
{
|
{
|
||||||
return json{
|
return json{
|
||||||
{"content", content}};
|
{"content", content}};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static T json_value(const json &body, const std::string &key, const T &default_value)
|
T json_value(const json & body, const std::string & key, const T & default_value)
|
||||||
{
|
{
|
||||||
// Fallback null to default value
|
// Fallback null to default value
|
||||||
return body.contains(key) && !body.at(key).is_null()
|
return body.contains(key) && !body.at(key).is_null()
|
||||||
|
@ -1119,7 +1120,7 @@ static T json_value(const json &body, const std::string &key, const T &default_v
|
||||||
: default_value;
|
: default_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void parse_options_completion(const json &body, llama_server_context &llama)
|
void parse_options_completion(const json & body, llama_server_context & llama)
|
||||||
{
|
{
|
||||||
gpt_params default_params;
|
gpt_params default_params;
|
||||||
|
|
||||||
|
@ -1198,7 +1199,7 @@ static void parse_options_completion(const json &body, llama_server_context &lla
|
||||||
LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama));
|
LOG_VERBOSE("completion parameters parsed", format_generation_settings(llama));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void log_server_request(const Request &req, const Response &res)
|
void log_server_request(const Request & req, const Response & res)
|
||||||
{
|
{
|
||||||
LOG_INFO("request", {
|
LOG_INFO("request", {
|
||||||
{"remote_addr", req.remote_addr},
|
{"remote_addr", req.remote_addr},
|
||||||
|
@ -1271,6 +1272,8 @@ void append_to_generated_text_from_generated_token_probs(llama_server_context &
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
// own arguments required by this example
|
// own arguments required by this example
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
struct random_normal_distribution {
|
struct random_normal_distribution {
|
||||||
std::mt19937 gen;
|
std::mt19937 gen;
|
||||||
std::normal_distribution<float> rd;
|
std::normal_distribution<float> rd;
|
||||||
|
@ -444,11 +446,11 @@ void assert_shape_4d(struct ggml_tensor * tensor, int64_t ne0, int64_t ne1, int6
|
||||||
GGML_ASSERT(tensor->ne[3] == ne3);
|
GGML_ASSERT(tensor->ne[3] == ne3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t hash(void * p) {
|
size_t hash(void * p) {
|
||||||
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t hash_find(void * hash_table[], void * p) {
|
size_t hash_find(void * hash_table[], void * p) {
|
||||||
size_t h = hash(p);
|
size_t h = hash(p);
|
||||||
|
|
||||||
// linear probing
|
// linear probing
|
||||||
|
@ -463,7 +465,7 @@ static size_t hash_find(void * hash_table[], void * p) {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool hash_insert(void * hash_table[], void * p) {
|
bool hash_insert(void * hash_table[], void * p) {
|
||||||
//size_t h = hash(p);
|
//size_t h = hash(p);
|
||||||
size_t i = hash_find(hash_table, p);
|
size_t i = hash_find(hash_table, p);
|
||||||
|
|
||||||
|
@ -479,7 +481,7 @@ static bool hash_insert(void * hash_table[], void * p) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool hash_contains(void * hash_table[], void * p) {
|
bool hash_contains(void * hash_table[], void * p) {
|
||||||
size_t i = hash_find(hash_table, p);
|
size_t i = hash_find(hash_table, p);
|
||||||
return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
|
return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
|
||||||
}
|
}
|
||||||
|
@ -488,7 +490,6 @@ struct hash_map {
|
||||||
void * keys[GGML_GRAPH_HASHTABLE_SIZE];
|
void * keys[GGML_GRAPH_HASHTABLE_SIZE];
|
||||||
void * vals[GGML_GRAPH_HASHTABLE_SIZE];
|
void * vals[GGML_GRAPH_HASHTABLE_SIZE];
|
||||||
};
|
};
|
||||||
//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
|
|
||||||
|
|
||||||
struct hash_map * new_hash_map() {
|
struct hash_map * new_hash_map() {
|
||||||
struct hash_map * result = new struct hash_map;
|
struct hash_map * result = new struct hash_map;
|
||||||
|
@ -503,12 +504,12 @@ void free_hash_map(struct hash_map * map) {
|
||||||
delete map;
|
delete map;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ggml_is_view(struct ggml_tensor * t) {
|
bool ggml_is_view(struct ggml_tensor * t) {
|
||||||
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
|
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
|
||||||
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
|
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
|
struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
|
||||||
switch (t->op) {
|
switch (t->op) {
|
||||||
case GGML_OP_PERMUTE:
|
case GGML_OP_PERMUTE:
|
||||||
case GGML_OP_RESHAPE:
|
case GGML_OP_RESHAPE:
|
||||||
|
@ -522,7 +523,7 @@ static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
|
struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
|
||||||
struct ggml_tensor * parent = t;
|
struct ggml_tensor * parent = t;
|
||||||
do {
|
do {
|
||||||
parent = get_view_parent(parent);
|
parent = get_view_parent(parent);
|
||||||
|
@ -1988,6 +1989,8 @@ void opt_callback(void * vdata, float * sched) {
|
||||||
data->shuffle_countdown -= n_batch;
|
data->shuffle_countdown -= n_batch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
struct train_params params = get_default_train_params();
|
struct train_params params = get_default_train_params();
|
||||||
|
|
||||||
|
|
19
llama.cpp
19
llama.cpp
|
@ -1,3 +1,4 @@
|
||||||
|
#define LLAMA_API_INTERNAL
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
@ -108,7 +109,7 @@ static size_t utf8_len(char src) {
|
||||||
return lookup[highbits];
|
return lookup[highbits];
|
||||||
}
|
}
|
||||||
|
|
||||||
void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
||||||
std::string result;
|
std::string result;
|
||||||
for (size_t pos = 0; ; pos += search.length()) {
|
for (size_t pos = 0; ; pos += search.length()) {
|
||||||
auto new_pos = s.find(search, pos);
|
auto new_pos = s.find(search, pos);
|
||||||
|
@ -1560,7 +1561,7 @@ struct llama_model_loader {
|
||||||
// load LLaMA models
|
// load LLaMA models
|
||||||
//
|
//
|
||||||
|
|
||||||
std::string llama_model_ftype_name(enum llama_ftype ftype) {
|
static std::string llama_model_ftype_name(enum llama_ftype ftype) {
|
||||||
if (ftype & LLAMA_FTYPE_GUESSED) {
|
if (ftype & LLAMA_FTYPE_GUESSED) {
|
||||||
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
|
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
|
||||||
}
|
}
|
||||||
|
@ -3945,7 +3946,7 @@ struct llama_grammar_candidate {
|
||||||
|
|
||||||
// Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
|
// Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as
|
||||||
// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
|
// pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`.
|
||||||
std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
|
static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
|
||||||
const char * src,
|
const char * src,
|
||||||
llama_partial_utf8 partial_start) {
|
llama_partial_utf8 partial_start) {
|
||||||
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
|
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
|
||||||
|
@ -5526,7 +5527,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: after the GGUF PR, this likely won't work and needs to be updated
|
// TODO: after the GGUF PR, this likely won't work and needs to be updated
|
||||||
int llama_apply_lora_from_file_internal(const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads) {
|
static int llama_apply_lora_from_file_internal(
|
||||||
|
const struct llama_model & model, const char * path_lora, const char * path_base_model, int n_threads
|
||||||
|
) {
|
||||||
LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
|
LLAMA_LOG_INFO("%s: applying lora adapter from '%s' - please wait ...\n", __func__, path_lora);
|
||||||
|
|
||||||
const int64_t t_start_lora_us = ggml_time_us();
|
const int64_t t_start_lora_us = ggml_time_us();
|
||||||
|
@ -6073,7 +6076,7 @@ struct llama_context * llama_new_context_with_model(
|
||||||
return ctx;
|
return ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct llama_context * llama_init_from_file(
|
static struct llama_context * llama_init_from_file(
|
||||||
const char * path_model,
|
const char * path_model,
|
||||||
struct llama_context_params params) {
|
struct llama_context_params params) {
|
||||||
struct llama_model * model = llama_load_model_from_file(path_model, params);
|
struct llama_model * model = llama_load_model_from_file(path_model, params);
|
||||||
|
@ -6278,7 +6281,7 @@ struct llama_data_file_context : llama_data_context {
|
||||||
* llama_copy_state_data(ctx, &data_ctx);
|
* llama_copy_state_data(ctx, &data_ctx);
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
|
static void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
|
||||||
// copy rng
|
// copy rng
|
||||||
{
|
{
|
||||||
std::stringstream rng_ss;
|
std::stringstream rng_ss;
|
||||||
|
@ -6816,7 +6819,9 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// For internal test use
|
// For internal test use
|
||||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx) {
|
auto llama_internal_get_tensor_map(struct llama_context * ctx)
|
||||||
|
-> const std::vector<std::pair<std::string, struct ggml_tensor *>> &
|
||||||
|
{
|
||||||
return ctx->model.tensors_by_name;
|
return ctx->model.tensors_by_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
3
llama.h
3
llama.h
|
@ -540,7 +540,8 @@ extern "C" {
|
||||||
|
|
||||||
struct ggml_tensor;
|
struct ggml_tensor;
|
||||||
|
|
||||||
const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
|
auto llama_internal_get_tensor_map(struct llama_context * ctx)
|
||||||
|
-> const std::vector<std::pair<std::string, struct ggml_tensor *>> &;
|
||||||
|
|
||||||
#endif // LLAMA_API_INTERNAL
|
#endif // LLAMA_API_INTERNAL
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
|
|
||||||
constexpr int kVecSize = 1 << 18;
|
constexpr int kVecSize = 1 << 18;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
float drawFromGaussianPdf(std::mt19937& rndm) {
|
float drawFromGaussianPdf(std::mt19937& rndm) {
|
||||||
constexpr double kScale = 1./(1. + std::mt19937::max());
|
constexpr double kScale = 1./(1. + std::mt19937::max());
|
||||||
constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale;
|
constexpr double kTwoPiTimesScale = 6.28318530717958647692*kScale;
|
||||||
|
@ -218,6 +220,8 @@ static void dot_q4_q8(const int n, float* s, const void* vx, const void* vy) {
|
||||||
*s = sumf;
|
*s = sumf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
int nloop = argc > 1 ? atoi(argv[1]) : 10;
|
int nloop = argc > 1 ? atoi(argv[1]) : 10;
|
||||||
|
|
|
@ -36,6 +36,8 @@
|
||||||
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
float frand(void) {
|
float frand(void) {
|
||||||
return (float)rand()/(float)RAND_MAX;
|
return (float)rand()/(float)RAND_MAX;
|
||||||
}
|
}
|
||||||
|
@ -117,6 +119,8 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
|
||||||
((float *)t->data)[idx] = value;
|
((float *)t->data)[idx] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
/* .mem_size = */ 1024*1024*1024,
|
/* .mem_size = */ 1024*1024*1024,
|
||||||
|
|
|
@ -13,15 +13,17 @@
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
|
constexpr float MAX_QUANTIZATION_REFERENCE_ERROR = 0.0001f;
|
||||||
const float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
|
constexpr float MAX_QUANTIZATION_TOTAL_ERROR = 0.002f;
|
||||||
const float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
|
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_2BITS = 0.0075f;
|
||||||
const float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
|
constexpr float MAX_QUANTIZATION_TOTAL_ERROR_3BITS = 0.0040f;
|
||||||
const float MAX_DOT_PRODUCT_ERROR = 0.02f;
|
constexpr float MAX_DOT_PRODUCT_ERROR = 0.02f;
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
const char* RESULT_STR[] = {"ok", "FAILED"};
|
const char* RESULT_STR[] = {"ok", "FAILED"};
|
||||||
|
|
||||||
|
|
||||||
// Generate synthetic data
|
// Generate synthetic data
|
||||||
void generate_data(float offset, size_t n, float * dst) {
|
void generate_data(float offset, size_t n, float * dst) {
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
@ -90,6 +92,8 @@ float dot_product_error(ggml_type_traits_t & qfns, size_t test_size, const float
|
||||||
return fabsf(result - dot_ref) / test_size;
|
return fabsf(result - dot_ref) / test_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char * argv[]) {
|
int main(int argc, char * argv[]) {
|
||||||
bool verbose = false;
|
bool verbose = false;
|
||||||
const size_t test_size = 32 * 128;
|
const size_t test_size = 32 * 128;
|
||||||
|
|
|
@ -60,6 +60,8 @@ inline int64_t cpu_cycles() {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// Generate synthetic data
|
// Generate synthetic data
|
||||||
void generate_data(float offset, size_t n, float * dst) {
|
void generate_data(float offset, size_t n, float * dst) {
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
@ -137,6 +139,8 @@ void usage(char * argv[]) {
|
||||||
printf(" set test iteration number (%d)\n", ITERATIONS);
|
printf(" set test iteration number (%d)\n", ITERATIONS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(int argc, char * argv[]) {
|
int main(int argc, char * argv[]) {
|
||||||
quantize_perf_params params {};
|
quantize_perf_params params {};
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
void dump(const llama_token_data_array * candidates) {
|
void dump(const llama_token_data_array * candidates) {
|
||||||
for (size_t i = 0; i < candidates->size; i++) {
|
for (size_t i = 0; i < candidates->size; i++) {
|
||||||
printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);
|
printf("%d: %f (%f)\n", candidates->data[i].id, candidates->data[i].p, candidates->data[i].logit);
|
||||||
|
@ -173,6 +176,8 @@ void test_frequency_presence_penalty(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
ggml_time_init();
|
ggml_time_init();
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
|
|
||||||
typedef int codepoint;
|
typedef int codepoint;
|
||||||
|
|
||||||
std::string codepoint_to_utf8(codepoint cp) {
|
static std::string codepoint_to_utf8(codepoint cp) {
|
||||||
std::string result;
|
std::string result;
|
||||||
if (0x00 <= cp && cp <= 0x7f) {
|
if (0x00 <= cp && cp <= 0x7f) {
|
||||||
result.push_back(cp);
|
result.push_back(cp);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue