llama : minor

This commit is contained in:
Georgi Gerganov 2023-08-14 16:26:40 +03:00
parent f85395252f
commit 18d00611e2
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 24 additions and 28 deletions

View file

@ -14,14 +14,12 @@
#define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b))
/*
template<typename T> template<typename T>
static std::string to_string(const T & val) { static std::string to_string(const T & val) {
std::stringstream ss; std::stringstream ss;
ss << val; ss << val;
return ss.str(); return ss.str();
} }
*/
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) { void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
const int32_t n = val.size(); const int32_t n = val.size();

View file

@ -61,6 +61,12 @@ static void llama_log_callback_default(llama_log_level level, const char * text,
#define LLAMA_LOG_WARN(...) llama_log_internal(LLAMA_LOG_LEVEL_WARN , __VA_ARGS__) #define LLAMA_LOG_WARN(...) llama_log_internal(LLAMA_LOG_LEVEL_WARN , __VA_ARGS__)
#define LLAMA_LOG_ERROR(...) llama_log_internal(LLAMA_LOG_LEVEL_ERROR, __VA_ARGS__) #define LLAMA_LOG_ERROR(...) llama_log_internal(LLAMA_LOG_LEVEL_ERROR, __VA_ARGS__)
template<typename T>
static std::string to_string(const T & val) {
std::stringstream ss;
ss << val;
return ss.str();
}
#if !defined(GGML_USE_CUBLAS) && !defined(GGML_USE_METAL) #if !defined(GGML_USE_CUBLAS) && !defined(GGML_USE_METAL)
#include "ggml-alloc.h" #include "ggml-alloc.h"
@ -70,25 +76,6 @@ static void llama_log_callback_default(llama_log_level level, const char * text,
#define LLAMA_MAX_SCRATCH_BUFFERS 16 #define LLAMA_MAX_SCRATCH_BUFFERS 16
#endif #endif
// available llama models
enum e_model {
MODEL_UNKNOWN,
MODEL_3B,
MODEL_7B,
MODEL_13B,
MODEL_30B,
MODEL_65B,
MODEL_70B,
};
static const size_t kB = 1024;
static const size_t MB = 1024*1024;
// computed for n_ctx == 2048
// TODO: dynamically determine these sizes
// needs modifications in ggml
typedef void (*offload_func_t)(struct ggml_tensor * tensor); typedef void (*offload_func_t)(struct ggml_tensor * tensor);
#ifdef GGML_USE_CUBLAS #ifdef GGML_USE_CUBLAS
@ -161,6 +148,24 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
// memory sizes (calculated for n_batch == 512) // memory sizes (calculated for n_batch == 512)
// //
// computed for n_ctx == 2048
// TODO: dynamically determine these sizes
// needs modifications in ggml
// available llama models
enum e_model {
MODEL_UNKNOWN,
MODEL_3B,
MODEL_7B,
MODEL_13B,
MODEL_30B,
MODEL_65B,
MODEL_70B,
};
static const size_t kB = 1024;
static const size_t MB = 1024*1024;
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0(int n_ctx) static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0(int n_ctx)
{ {
static std::map<e_model, size_t> k_sizes = { static std::map<e_model, size_t> k_sizes = {

View file

@ -64,13 +64,6 @@ static std::string format(const char * fmt, ...) {
return std::string(buf.data(), size); return std::string(buf.data(), size);
} }
template<typename T>
static std::string to_string(const T & val) {
std::stringstream ss;
ss << val;
return ss.str();
}
// TODO: can we merge this one and gguf_context? // TODO: can we merge this one and gguf_context?
struct gguf_file { struct gguf_file {
// use FILE * so we don't have to re-open the file to mmap // use FILE * so we don't have to re-open the file to mmap