llama : minor
This commit is contained in:
parent
f85395252f
commit
18d00611e2
3 changed files with 24 additions and 28 deletions
|
@ -14,14 +14,12 @@
|
||||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
/*
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
static std::string to_string(const T & val) {
|
static std::string to_string(const T & val) {
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << val;
|
ss << val;
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
|
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
|
||||||
const int32_t n = val.size();
|
const int32_t n = val.size();
|
||||||
|
|
|
@ -61,6 +61,12 @@ static void llama_log_callback_default(llama_log_level level, const char * text,
|
||||||
#define LLAMA_LOG_WARN(...) llama_log_internal(LLAMA_LOG_LEVEL_WARN , __VA_ARGS__)
|
#define LLAMA_LOG_WARN(...) llama_log_internal(LLAMA_LOG_LEVEL_WARN , __VA_ARGS__)
|
||||||
#define LLAMA_LOG_ERROR(...) llama_log_internal(LLAMA_LOG_LEVEL_ERROR, __VA_ARGS__)
|
#define LLAMA_LOG_ERROR(...) llama_log_internal(LLAMA_LOG_LEVEL_ERROR, __VA_ARGS__)
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
static std::string to_string(const T & val) {
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << val;
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
#if !defined(GGML_USE_CUBLAS) && !defined(GGML_USE_METAL)
|
#if !defined(GGML_USE_CUBLAS) && !defined(GGML_USE_METAL)
|
||||||
#include "ggml-alloc.h"
|
#include "ggml-alloc.h"
|
||||||
|
@ -70,25 +76,6 @@ static void llama_log_callback_default(llama_log_level level, const char * text,
|
||||||
#define LLAMA_MAX_SCRATCH_BUFFERS 16
|
#define LLAMA_MAX_SCRATCH_BUFFERS 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// available llama models
|
|
||||||
enum e_model {
|
|
||||||
MODEL_UNKNOWN,
|
|
||||||
MODEL_3B,
|
|
||||||
MODEL_7B,
|
|
||||||
MODEL_13B,
|
|
||||||
MODEL_30B,
|
|
||||||
MODEL_65B,
|
|
||||||
MODEL_70B,
|
|
||||||
};
|
|
||||||
|
|
||||||
static const size_t kB = 1024;
|
|
||||||
static const size_t MB = 1024*1024;
|
|
||||||
|
|
||||||
// computed for n_ctx == 2048
|
|
||||||
// TODO: dynamically determine these sizes
|
|
||||||
// needs modifications in ggml
|
|
||||||
|
|
||||||
typedef void (*offload_func_t)(struct ggml_tensor * tensor);
|
typedef void (*offload_func_t)(struct ggml_tensor * tensor);
|
||||||
|
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
|
@ -161,6 +148,24 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
|
||||||
// memory sizes (calculated for n_batch == 512)
|
// memory sizes (calculated for n_batch == 512)
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// computed for n_ctx == 2048
|
||||||
|
// TODO: dynamically determine these sizes
|
||||||
|
// needs modifications in ggml
|
||||||
|
|
||||||
|
// available llama models
|
||||||
|
enum e_model {
|
||||||
|
MODEL_UNKNOWN,
|
||||||
|
MODEL_3B,
|
||||||
|
MODEL_7B,
|
||||||
|
MODEL_13B,
|
||||||
|
MODEL_30B,
|
||||||
|
MODEL_65B,
|
||||||
|
MODEL_70B,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const size_t kB = 1024;
|
||||||
|
static const size_t MB = 1024*1024;
|
||||||
|
|
||||||
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0(int n_ctx)
|
static const std::map<e_model, size_t> & MEM_REQ_SCRATCH0(int n_ctx)
|
||||||
{
|
{
|
||||||
static std::map<e_model, size_t> k_sizes = {
|
static std::map<e_model, size_t> k_sizes = {
|
||||||
|
|
|
@ -64,13 +64,6 @@ static std::string format(const char * fmt, ...) {
|
||||||
return std::string(buf.data(), size);
|
return std::string(buf.data(), size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
|
||||||
static std::string to_string(const T & val) {
|
|
||||||
std::stringstream ss;
|
|
||||||
ss << val;
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: can we merge this one and gguf_context?
|
// TODO: can we merge this one and gguf_context?
|
||||||
struct gguf_file {
|
struct gguf_file {
|
||||||
// use FILE * so we don't have to re-open the file to mmap
|
// use FILE * so we don't have to re-open the file to mmap
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue