Merge branch 'master' into concedo
This commit is contained in:
commit
96fb12cfa2
6 changed files with 20 additions and 10 deletions
|
@ -951,8 +951,9 @@ class OutputFile:
|
||||||
|
|
||||||
ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8)
|
ndarrays = bounded_parallel_map(do_item, model.items(), concurrency=8)
|
||||||
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
|
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
|
||||||
size = ' x '.join(map(str, lazy_tensor.shape))
|
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
||||||
print(f"[{i+1}/{len(model)}] Writing tensor {name}, size {size}...")
|
padi = len(str(len(model)))
|
||||||
|
print(f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type}")
|
||||||
of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
|
of.write_tensor_header(name, lazy_tensor.shape, lazy_tensor.data_type)
|
||||||
ndarray.tofile(of.fout)
|
ndarray.tofile(of.fout)
|
||||||
of.fout.close()
|
of.fout.close()
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
params.model = "models/llama-7B/ggml-model.bin";
|
params.model = "models/llama-7B/ggml-model.bin";
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <ctime>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
std::vector<float> softmax(const std::vector<float>& logits) {
|
std::vector<float> softmax(const std::vector<float>& logits) {
|
||||||
std::vector<float> probs(logits.size());
|
std::vector<float> probs(logits.size());
|
||||||
|
|
15
llama.cpp
15
llama.cpp
|
@ -9,6 +9,7 @@
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <ctime>
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
@ -261,12 +262,12 @@ static size_t checked_div(size_t a, size_t b) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
|
static std::string llama_format_tensor_shape(const std::vector<uint32_t> & ne) {
|
||||||
std::string ret = "[" + std::to_string(ne.at(0));
|
char buf[256];
|
||||||
|
snprintf(buf, sizeof(buf), "%5u", ne.at(0));
|
||||||
for (size_t i = 1; i < ne.size(); i++) {
|
for (size_t i = 1; i < ne.size(); i++) {
|
||||||
ret += " x " + std::to_string(ne.at(i));
|
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " x %5u", ne.at(i));
|
||||||
}
|
}
|
||||||
ret += "]";
|
return buf;
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml_type type) {
|
static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml_type type) {
|
||||||
|
@ -948,8 +949,8 @@ static void llama_model_load_internal(
|
||||||
ml->ggml_ctx = ctx;
|
ml->ggml_ctx = ctx;
|
||||||
|
|
||||||
model.tok_embeddings = ml->get_tensor("tok_embeddings.weight", {n_embd, n_vocab});
|
model.tok_embeddings = ml->get_tensor("tok_embeddings.weight", {n_embd, n_vocab});
|
||||||
model.norm = ml->get_tensor("norm.weight", {n_embd});
|
model.norm = ml->get_tensor("norm.weight", {n_embd});
|
||||||
model.output = ml->get_tensor("output.weight", {n_embd, n_vocab});
|
model.output = ml->get_tensor("output.weight", {n_embd, n_vocab});
|
||||||
|
|
||||||
model.layers.resize(n_layer);
|
model.layers.resize(n_layer);
|
||||||
for (uint32_t i = 0; i < n_layer; ++i) {
|
for (uint32_t i = 0; i < n_layer; ++i) {
|
||||||
|
@ -1576,7 +1577,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
tensor.data = read_data.addr;
|
tensor.data = read_data.addr;
|
||||||
model_loader->load_data_for(tensor);
|
model_loader->load_data_for(tensor);
|
||||||
|
|
||||||
printf("[%zu/%zu] %36s - %s, type = %6s, ",
|
printf("[%4zu/%4zu] %36s - %16s, type = %6s, ",
|
||||||
++idx, model_loader->tensors_map.tensors.size(),
|
++idx, model_loader->tensors_map.tensors.size(),
|
||||||
tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(),
|
tensor.name.c_str(), llama_format_tensor_shape(tensor.ne).c_str(),
|
||||||
ggml_type_name(tensor.type));
|
ggml_type_name(tensor.type));
|
||||||
|
|
|
@ -43,8 +43,12 @@
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
|
#ifdef __MINGW32__
|
||||||
|
__attribute__((format(gnu_printf, 1, 2)))
|
||||||
|
#else
|
||||||
__attribute__((format(printf, 1, 2)))
|
__attribute__((format(printf, 1, 2)))
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
static std::string format(const char * fmt, ...) {
|
static std::string format(const char * fmt, ...) {
|
||||||
va_list ap, ap2;
|
va_list ap, ap2;
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
|
@ -57,7 +61,7 @@ static std::string format(const char * fmt, ...) {
|
||||||
va_end(ap2);
|
va_end(ap2);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
return std::string(buf.data(), size);
|
return std::string(buf.data(), size);
|
||||||
};
|
}
|
||||||
|
|
||||||
struct llama_file {
|
struct llama_file {
|
||||||
// use FILE * so we don't have to re-open the file to mmap
|
// use FILE * so we don't have to re-open the file to mmap
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue