mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-15 07:19:18 +00:00
Perform some code cleanup
This commit is contained in:
parent
cc1732bc42
commit
210187cf77
205 changed files with 1748 additions and 2595 deletions
2
third_party/ggml/common.cc
vendored
2
third_party/ggml/common.cc
vendored
|
@ -685,7 +685,7 @@ int put_codepoint(console_state & con_st, const char* utf8_codepoint, size_t len
|
|||
if (width < 0) {
|
||||
// Calculate the width considering text wrapping
|
||||
struct winsize w;
|
||||
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
|
||||
tcgetwinsize(STDOUT_FILENO, &w);
|
||||
width += w.ws_col;
|
||||
}
|
||||
return width;
|
||||
|
|
2
third_party/ggml/fp16.c
vendored
2
third_party/ggml/fp16.c
vendored
|
@ -85,7 +85,7 @@ void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n) {
|
|||
|
||||
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n) {
|
||||
size_t i = 0;
|
||||
#if defined(__F16C__)
|
||||
#ifdef __F16C__
|
||||
for (; i + 7 < n; i += 8) {
|
||||
__m256 x_vec = _mm256_loadu_ps(x + i);
|
||||
__m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
|
||||
|
|
13
third_party/ggml/ggml.c
vendored
13
third_party/ggml/ggml.c
vendored
|
@ -48,6 +48,7 @@
|
|||
#include "libc/assert.h"
|
||||
#include "libc/assert.h"
|
||||
#include "third_party/ggml/ggml.h"
|
||||
#include "libc/intrin/bsr.h"
|
||||
#include "third_party/libcxx/math.h"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
|
@ -8096,10 +8097,10 @@ static void ggml_compute_forward_alibi_f32(
|
|||
assert(ne1 + n_past == ne0); (void) n_past;
|
||||
|
||||
// add alibi to src0 (KQ_scaled)
|
||||
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
|
||||
const int n_heads_log2_floor = 1 << _bsr(n_head);
|
||||
|
||||
const float m0 = powf(2.0f, -8.0f / n_heads_log2_floor);
|
||||
const float m1 = powf(2.0f, -4.0f / n_heads_log2_floor);
|
||||
const float m0 = exp2f(-8.0f / n_heads_log2_floor);
|
||||
const float m1 = exp2f(-4.0f / n_heads_log2_floor);
|
||||
|
||||
for (int i = 0; i < ne0; i++) {
|
||||
for (int j = 0; j < ne1; j++) {
|
||||
|
@ -8157,10 +8158,10 @@ static void ggml_compute_forward_alibi_f16(
|
|||
assert(ne1 + n_past == ne0); (void) n_past;
|
||||
|
||||
// add alibi to src0 (KQ_scaled)
|
||||
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
|
||||
const int n_heads_log2_floor = 1 << _bsr(n_head);
|
||||
|
||||
const float m0 = powf(2.0f, -8.0f / n_heads_log2_floor);
|
||||
const float m1 = powf(2.0f, -4.0f / n_heads_log2_floor);
|
||||
const float m0 = exp2f(-8.0f / n_heads_log2_floor);
|
||||
const float m1 = exp2f(-4.0f / n_heads_log2_floor);
|
||||
|
||||
for (int i = 0; i < ne0; i++) {
|
||||
for (int j = 0; j < ne1; j++) {
|
||||
|
|
40
third_party/ggml/ggml.mk
vendored
40
third_party/ggml/ggml.mk
vendored
|
@ -99,12 +99,23 @@ endif
|
|||
|
||||
THIRD_PARTY_GGML_ARTIFACTS += THIRD_PARTY_GGML_LLAMA
|
||||
THIRD_PARTY_GGML_LLAMA = o/$(MODE)/third_party/ggml/llama.com
|
||||
THIRD_PARTY_GGML_LLAMA_HDRS = third_party/ggml/llama.h third_party/ggml/llama_util.h third_party/ggml/common.h
|
||||
THIRD_PARTY_GGML_LLAMA_SRCS = third_party/ggml/main.cc third_party/ggml/llama.cc third_party/ggml/common.cc
|
||||
THIRD_PARTY_GGML_LLAMA_OBJS = $(THIRD_PARTY_GGML_LLAMA_SRCS:%.cc=o/$(MODE)/%.o)
|
||||
THIRD_PARTY_GGML_LLAMA_FILES := $(THIRD_PARTY_GGML_LLAMA_SRCS) $(THIRD_PARTY_GGML_LLAMA_HDRS)
|
||||
THIRD_PARTY_GGML_LLAMA_CHECKS = $(THIRD_PARTY_GGML_LLAMA).pkg $(THIRD_PARTY_GGML_LLAMA_HDRS:%=o/$(MODE)/%.okk)
|
||||
|
||||
THIRD_PARTY_GGML_LLAMA_HDRS = \
|
||||
third_party/ggml/common.cc \
|
||||
third_party/ggml/llama.h \
|
||||
third_party/ggml/llama_util.h \
|
||||
third_party/ggml/common.h
|
||||
|
||||
THIRD_PARTY_GGML_LLAMA_SRCS = \
|
||||
third_party/ggml/main.cc \
|
||||
third_party/ggml/llama.cc \
|
||||
third_party/ggml/common.cc \
|
||||
third_party/ggml/quantize.cc \
|
||||
third_party/ggml/perplexity.cc
|
||||
|
||||
THIRD_PARTY_GGML_LLAMA_DIRECTDEPS = \
|
||||
LIBC_CALLS \
|
||||
LIBC_FMT \
|
||||
|
@ -137,6 +148,26 @@ $(THIRD_PARTY_GGML_LLAMA).dbg: \
|
|||
$(APE_NO_MODIFY_SELF)
|
||||
@$(APELINK)
|
||||
|
||||
o/$(MODE)/third_party/ggml/quantize.com.dbg: \
|
||||
$(THIRD_PARTY_GGML_LLAMA).pkg \
|
||||
$(THIRD_PARTY_GGML_LLAMA_DEPS) \
|
||||
o/$(MODE)/third_party/ggml/common.o \
|
||||
o/$(MODE)/third_party/ggml/llama.o \
|
||||
o/$(MODE)/third_party/ggml/quantize.o \
|
||||
$(CRT) \
|
||||
$(APE_NO_MODIFY_SELF)
|
||||
@$(APELINK)
|
||||
|
||||
o/$(MODE)/third_party/ggml/perplexity.com.dbg: \
|
||||
$(THIRD_PARTY_GGML_LLAMA).pkg \
|
||||
$(THIRD_PARTY_GGML_LLAMA_DEPS) \
|
||||
o/$(MODE)/third_party/ggml/common.o \
|
||||
o/$(MODE)/third_party/ggml/llama.o \
|
||||
o/$(MODE)/third_party/ggml/perplexity.o \
|
||||
$(CRT) \
|
||||
$(APE_NO_MODIFY_SELF)
|
||||
@$(APELINK)
|
||||
|
||||
$(THIRD_PARTY_GGML_LLAMA).pkg: \
|
||||
$(THIRD_PARTY_GGML_LLAMA_OBJS) \
|
||||
$(foreach x,$(THIRD_PARTY_GGML_LLAMA_DIRECTDEPS),$($(x)_A).pkg)
|
||||
|
@ -147,7 +178,10 @@ o/$(MODE)/third_party/ggml/companionai.txt.zip.o: private \
|
|||
|
||||
################################################################################
|
||||
|
||||
THIRD_PARTY_GGML_COMS = $(THIRD_PARTY_GGML_LLAMA)
|
||||
THIRD_PARTY_GGML_COMS = \
|
||||
$(THIRD_PARTY_GGML_LLAMA) \
|
||||
o/$(MODE)/third_party/ggml/perplexity.com
|
||||
|
||||
THIRD_PARTY_GGML_BINS = $(THIRD_PARTY_GGML_COMS) $(THIRD_PARTY_GGML_COMS:%=%.dbg)
|
||||
THIRD_PARTY_GGML_LIBS = $(foreach x,$(THIRD_PARTY_GGML_ARTIFACTS),$($(x)))
|
||||
THIRD_PARTY_GGML_SRCS = $(foreach x,$(THIRD_PARTY_GGML_ARTIFACTS),$($(x)_SRCS))
|
||||
|
|
197
third_party/ggml/perplexity.cc
vendored
Normal file
197
third_party/ggml/perplexity.cc
vendored
Normal file
|
@ -0,0 +1,197 @@
|
|||
/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8-*-│
|
||||
│vi: set net ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ llama.com │
|
||||
│ Copyright (c) 2023 Georgi Gerganov │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "third_party/ggml/common.h"
|
||||
#include "third_party/ggml/llama.h"
|
||||
#include "third_party/libcxx/vector"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
llama.cpp (MIT License)\\n\
|
||||
Copyright (c) 2023 Georgi Gerganov\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
// clang-format off
|
||||
|
||||
std::vector<float> softmax(const std::vector<float>& logits) {
|
||||
std::vector<float> probs(logits.size());
|
||||
float max_logit = logits[0];
|
||||
for (float v : logits) max_logit = std::max(max_logit, v);
|
||||
double sum_exp = 0.0;
|
||||
for (size_t i = 0; i < logits.size(); i++) {
|
||||
// Subtract the maximum logit value from the current logit value for numerical stability
|
||||
const float logit = logits[i] - max_logit;
|
||||
const float exp_logit = expf(logit);
|
||||
sum_exp += exp_logit;
|
||||
probs[i] = exp_logit;
|
||||
}
|
||||
for (size_t i = 0; i < probs.size(); i++) probs[i] /= sum_exp;
|
||||
return probs;
|
||||
}
|
||||
|
||||
void perplexity(llama_context * ctx, const gpt_params & params) {
|
||||
// Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
|
||||
// Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
||||
// Output: `perplexity: 13.5106 [114/114]`
|
||||
// BOS tokens will be added for each chunk before eval
|
||||
auto tokens = ::llama_tokenize(ctx, params.prompt, true);
|
||||
|
||||
int count = 0;
|
||||
|
||||
const int n_chunk = tokens.size() / params.n_ctx;
|
||||
const int n_vocab = llama_n_vocab(ctx);
|
||||
const int n_batch = params.n_batch;
|
||||
|
||||
double nll = 0.0;
|
||||
fprintf(stderr, "%s: calculating perplexity over %d chunks, batch_size=%d\n", __func__, n_chunk, n_batch);
|
||||
|
||||
for (int i = 0; i < n_chunk; ++i) {
|
||||
const int start = i * params.n_ctx;
|
||||
const int end = start + params.n_ctx;
|
||||
|
||||
const int num_batches = (params.n_ctx + n_batch - 1) / n_batch;
|
||||
|
||||
std::vector<float> logits;
|
||||
|
||||
const auto t_start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
for (int j = 0; j < num_batches; ++j) {
|
||||
const int batch_start = start + j * n_batch;
|
||||
const int batch_size = std::min(end - batch_start, n_batch);
|
||||
|
||||
// save original token and restore it after eval
|
||||
const auto token_org = tokens[batch_start];
|
||||
|
||||
// add BOS token for the first batch of each chunk
|
||||
if (j == 0) {
|
||||
tokens[batch_start] = llama_token_bos();
|
||||
}
|
||||
|
||||
if (llama_eval(ctx, tokens.data() + batch_start, batch_size, j * n_batch, params.n_threads)) {
|
||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||
return;
|
||||
}
|
||||
|
||||
// restore the original token in case it was set to BOS
|
||||
tokens[batch_start] = token_org;
|
||||
|
||||
const auto batch_logits = llama_get_logits(ctx);
|
||||
logits.insert(logits.end(), batch_logits, batch_logits + batch_size * n_vocab);
|
||||
}
|
||||
|
||||
const auto t_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
if (i == 0) {
|
||||
const float t_total = std::chrono::duration<float>(t_end - t_start).count();
|
||||
fprintf(stderr, "%s: %.2f seconds per pass - ETA ", __func__, t_total);
|
||||
int total_seconds = (int)(t_total * n_chunk);
|
||||
if (total_seconds >= 60*60) {
|
||||
fprintf(stderr, "%d hours ", total_seconds / (60*60));
|
||||
total_seconds = total_seconds % (60*60);
|
||||
}
|
||||
fprintf(stderr, "%d minutes\n", total_seconds / 60);
|
||||
}
|
||||
|
||||
// We get the logits for all the tokens in the context window (params.n_ctx)
|
||||
// from llama_eval above. Now, based on https://huggingface.co/docs/transformers/perplexity,
|
||||
// calculate the perplexity over the last half of the window (so the model always has
|
||||
// some context to predict the token).
|
||||
//
|
||||
// We rely on the fact that attention in the forward pass only looks at previous
|
||||
// tokens here, so the logits returned for each token are an accurate representation
|
||||
// of what the model would have predicted at that point.
|
||||
//
|
||||
// Example, we have a context window of 512, we will compute perplexity for each of the
|
||||
// last 256 tokens. Then, we split the input up into context window size chunks to
|
||||
// process the entire prompt.
|
||||
for (int j = std::min(512, params.n_ctx / 2); j < params.n_ctx - 1; ++j) {
|
||||
// Calculate probability of next token, given the previous ones.
|
||||
const std::vector<float> tok_logits(
|
||||
logits.begin() + (j + 0) * n_vocab,
|
||||
logits.begin() + (j + 1) * n_vocab);
|
||||
|
||||
const float prob = softmax(tok_logits)[tokens[start + j + 1]];
|
||||
|
||||
nll += -std::log(prob);
|
||||
++count;
|
||||
}
|
||||
// perplexity is e^(average negative log-likelihood)
|
||||
printf("[%d]%.4lf,", i + 1, std::exp(nll / count));
|
||||
fflush(stdout);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
gpt_params params;
|
||||
params.model = "models/llama-7B/ggml-model.bin";
|
||||
|
||||
params.n_batch = 512;
|
||||
if (gpt_params_parse(argc, argv, params) == false) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
params.perplexity = true;
|
||||
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
||||
|
||||
if (params.n_ctx > 2048) {
|
||||
fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
|
||||
"expect poor results\n", __func__, params.n_ctx);
|
||||
}
|
||||
|
||||
if (params.seed < 0) {
|
||||
params.seed = time(NULL);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
|
||||
|
||||
std::mt19937 rng(params.seed);
|
||||
if (params.random_prompt) {
|
||||
params.prompt = gpt_random_prompt(rng);
|
||||
}
|
||||
|
||||
llama_context * ctx;
|
||||
|
||||
// load the model and apply lora adapter, if any
|
||||
ctx = llama_init_from_gpt_params(params);
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: unable to load model\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// print system information
|
||||
{
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
|
||||
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
|
||||
}
|
||||
|
||||
perplexity(ctx, params);
|
||||
|
||||
llama_print_timings(ctx);
|
||||
llama_free(ctx);
|
||||
|
||||
return 0;
|
||||
}
|
175
third_party/ggml/quantize.cc
vendored
Normal file
175
third_party/ggml/quantize.cc
vendored
Normal file
|
@ -0,0 +1,175 @@
|
|||
/*-*-mode:c++;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8-*-│
|
||||
│vi: set net ft=c++ ts=4 sts=4 sw=4 fenc=utf-8 :vi│
|
||||
╚──────────────────────────────────────────────────────────────────────────────╝
|
||||
│ │
|
||||
│ llama.com │
|
||||
│ Copyright (c) 2023 Georgi Gerganov │
|
||||
│ │
|
||||
│ Permission is hereby granted, free of charge, to any person obtaining │
|
||||
│ a copy of this software and associated documentation files (the │
|
||||
│ "Software"), to deal in the Software without restriction, including │
|
||||
│ without limitation the rights to use, copy, modify, merge, publish, │
|
||||
│ distribute, sublicense, and/or sell copies of the Software, and to │
|
||||
│ permit persons to whom the Software is furnished to do so, subject to │
|
||||
│ the following conditions: │
|
||||
│ │
|
||||
│ The above copyright notice and this permission notice shall be │
|
||||
│ included in all copies or substantial portions of the Software. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, │
|
||||
│ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF │
|
||||
│ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. │
|
||||
│ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY │
|
||||
│ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, │
|
||||
│ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE │
|
||||
│ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. │
|
||||
│ │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "third_party/ggml/common.h"
|
||||
#include "third_party/ggml/ggml.h"
|
||||
#include "third_party/ggml/llama.h"
|
||||
#include "third_party/libcxx/map"
|
||||
#include "third_party/libcxx/vector"
|
||||
|
||||
asm(".ident\t\"\\n\\n\
|
||||
llama.cpp (MIT License)\\n\
|
||||
Copyright (c) 2023 Georgi Gerganov\"");
|
||||
asm(".include \"libc/disclaimer.inc\"");
|
||||
// clang-format off
|
||||
|
||||
static const std::map<std::string, llama_ftype> LLAMA_FTYPE_MAP = {
|
||||
{"q4_0", LLAMA_FTYPE_MOSTLY_Q4_0},
|
||||
{"q4_1", LLAMA_FTYPE_MOSTLY_Q4_1},
|
||||
{"q4_2", LLAMA_FTYPE_MOSTLY_Q4_2},
|
||||
{"q5_0", LLAMA_FTYPE_MOSTLY_Q5_0},
|
||||
{"q5_1", LLAMA_FTYPE_MOSTLY_Q5_1},
|
||||
{"q8_0", LLAMA_FTYPE_MOSTLY_Q8_0},
|
||||
};
|
||||
|
||||
bool try_parse_ftype(const std::string & ftype_str, llama_ftype & ftype, std::string & ftype_str_out) {
|
||||
auto it = LLAMA_FTYPE_MAP.find(ftype_str);
|
||||
if (it != LLAMA_FTYPE_MAP.end()) {
|
||||
ftype = it->second;
|
||||
ftype_str_out = it->first;
|
||||
return true;
|
||||
}
|
||||
// try to parse as an integer
|
||||
// try {
|
||||
int ftype_int = std::stoi(ftype_str);
|
||||
for (auto it = LLAMA_FTYPE_MAP.begin(); it != LLAMA_FTYPE_MAP.end(); it++) {
|
||||
if (it->second == ftype_int) {
|
||||
ftype = it->second;
|
||||
ftype_str_out = it->first;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// }
|
||||
// catch (...) {
|
||||
// // stoi failed
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
||||
// usage:
|
||||
// ./quantize models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads]
|
||||
//
|
||||
int main(int argc, char ** argv) {
|
||||
ggml_time_init();
|
||||
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "usage: %s model-f32.bin [model-quant.bin] type [nthreads]\n", argv[0]);
|
||||
for (auto it = LLAMA_FTYPE_MAP.begin(); it != LLAMA_FTYPE_MAP.end(); it++) {
|
||||
fprintf(stderr, " type = \"%s\" or %d\n", it->first.c_str(), it->second);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
// needed to initialize f16 tables
|
||||
{
|
||||
struct ggml_init_params params = { 0, NULL, false };
|
||||
struct ggml_context * ctx = ggml_init(params);
|
||||
ggml_free(ctx);
|
||||
}
|
||||
|
||||
// parse command line arguments
|
||||
const std::string fname_inp = argv[1];
|
||||
std::string fname_out;
|
||||
int nthread;
|
||||
llama_ftype ftype;
|
||||
|
||||
int arg_idx = 2;
|
||||
std::string ftype_str;
|
||||
if (try_parse_ftype(argv[arg_idx], ftype, ftype_str)) {
|
||||
// argv[2] is the ftype
|
||||
std::string fpath;
|
||||
const size_t pos = fname_inp.find_last_of('/');
|
||||
if (pos != std::string::npos) {
|
||||
fpath = fname_inp.substr(0, pos + 1);
|
||||
}
|
||||
// export as [inp path]/ggml-model-[ftype].bin
|
||||
fname_out = fpath + "ggml-model-" + ftype_str + ".bin";
|
||||
arg_idx++;
|
||||
}
|
||||
else {
|
||||
// argv[2] is the output path
|
||||
fname_out = argv[arg_idx];
|
||||
arg_idx++;
|
||||
|
||||
if (argc <= arg_idx) {
|
||||
fprintf(stderr, "%s: missing ftype\n", __func__);
|
||||
return 1;
|
||||
}
|
||||
// argv[3] is the ftype
|
||||
if (!try_parse_ftype(argv[arg_idx], ftype, ftype_str)) {
|
||||
fprintf(stderr, "%s: invalid ftype '%s'\n", __func__, argv[3]);
|
||||
return 1;
|
||||
}
|
||||
arg_idx++;
|
||||
}
|
||||
|
||||
// parse nthreads
|
||||
if (argc > arg_idx) {
|
||||
// try {
|
||||
nthread = std::stoi(argv[arg_idx]);
|
||||
// }
|
||||
// catch (const std::exception & e) {
|
||||
// Die("%s: invalid nthread '%s' (%s)\n", __func__, argv[arg_idx], e.what());
|
||||
// return 1;
|
||||
// }
|
||||
} else {
|
||||
nthread = 0;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: quantizing '%s' to '%s' as %s", __func__, fname_inp.c_str(), fname_out.c_str(), ftype_str.c_str());
|
||||
if (nthread > 0) {
|
||||
fprintf(stderr, " using %d threads", nthread);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
const int64_t t_main_start_us = ggml_time_us();
|
||||
|
||||
int64_t t_quantize_us = 0;
|
||||
|
||||
// load the model
|
||||
{
|
||||
const int64_t t_start_us = ggml_time_us();
|
||||
|
||||
if (llama_model_quantize(fname_inp.c_str(), fname_out.c_str(), ftype, nthread)) {
|
||||
fprintf(stderr, "%s: failed to quantize model from '%s'\n", __func__, fname_inp.c_str());
|
||||
return 1;
|
||||
}
|
||||
|
||||
t_quantize_us = ggml_time_us() - t_start_us;
|
||||
}
|
||||
|
||||
// report timing
|
||||
{
|
||||
const int64_t t_main_end_us = ggml_time_us();
|
||||
|
||||
printf("\n");
|
||||
printf("%s: quantize time = %8.2f ms\n", __func__, t_quantize_us/1000.0);
|
||||
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
0
third_party/math.h
vendored
0
third_party/math.h
vendored
2
third_party/nsync/mem/mem.mk
vendored
2
third_party/nsync/mem/mem.mk
vendored
|
@ -48,7 +48,7 @@ THIRD_PARTY_NSYNC_MEM_LIBS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(
|
|||
THIRD_PARTY_NSYNC_MEM_SRCS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_SRCS))
|
||||
THIRD_PARTY_NSYNC_MEM_CHECKS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_CHECKS))
|
||||
THIRD_PARTY_NSYNC_MEM_OBJS = $(foreach x,$(THIRD_PARTY_NSYNC_MEM_ARTIFACTS),$($(x)_OBJS))
|
||||
$(THIRD_PARTY_NSYNC_MEM_OBJS): third_party/nsync/mem/nsync.mk
|
||||
$(THIRD_PARTY_NSYNC_MEM_OBJS): third_party/nsync/mem/mem.mk
|
||||
|
||||
.PHONY: o/$(MODE)/third_party/nsync/mem
|
||||
o/$(MODE)/third_party/nsync/mem: $(THIRD_PARTY_NSYNC_MEM_CHECKS)
|
||||
|
|
0
third_party/nsync/mem/nsync.mk
vendored
0
third_party/nsync/mem/nsync.mk
vendored
Loading…
Add table
Add a link
Reference in a new issue