compress: format
This commit is contained in:
parent
b9a32f464f
commit
bec83989be
1 changed files with 33 additions and 27 deletions
|
@ -78,7 +78,8 @@ std::vector<uint8_t> encode(llama_context *ctx, std::vector<llama_token> inp, gp
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(match<0){
|
if (match < 0)
|
||||||
|
{
|
||||||
LOG_ERR("\n couldn't match %s", llama_token_to_piece(ctx, inp[index]).c_str());
|
LOG_ERR("\n couldn't match %s", llama_token_to_piece(ctx, inp[index]).c_str());
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
@ -140,7 +141,6 @@ std::vector<uint8_t> encode(llama_context *ctx, std::vector<llama_token> inp, gp
|
||||||
}
|
}
|
||||||
sample_ids_bitpacked[block_start + 1] = block_size & 0xff;
|
sample_ids_bitpacked[block_start + 1] = block_size & 0xff;
|
||||||
|
|
||||||
|
|
||||||
// put last bytes
|
// put last bytes
|
||||||
if (PAD)
|
if (PAD)
|
||||||
{
|
{
|
||||||
|
@ -544,7 +544,8 @@ int main(int argc, char **argv)
|
||||||
auto t_enc_end = ggml_time_us();
|
auto t_enc_end = ggml_time_us();
|
||||||
|
|
||||||
LOG("\n");
|
LOG("\n");
|
||||||
if(!params.no_perf){
|
if (!params.no_perf)
|
||||||
|
{
|
||||||
LOG("\nInput: %d characters (%d tokens)", params.prompt.length(), inp.size());
|
LOG("\nInput: %d characters (%d tokens)", params.prompt.length(), inp.size());
|
||||||
|
|
||||||
float compressed_bits_per_token = 8 * (float)sample_ids_bitpacked.size() / (float)inp.size();
|
float compressed_bits_per_token = 8 * (float)sample_ids_bitpacked.size() / (float)inp.size();
|
||||||
|
@ -555,19 +556,22 @@ int main(int argc, char **argv)
|
||||||
LOG("\nPPL (over)estimation: %04f (%04f with padding)", exp2(compressed_bits_per_token - total_pad / (float)inp.size()), exp2(compressed_bits_per_token));
|
LOG("\nPPL (over)estimation: %04f (%04f with padding)", exp2(compressed_bits_per_token - total_pad / (float)inp.size()), exp2(compressed_bits_per_token));
|
||||||
}
|
}
|
||||||
// maybe this needs to be changed
|
// maybe this needs to be changed
|
||||||
if(params.out_file != "imatrix.dat"){
|
if (params.out_file != "imatrix.dat")
|
||||||
|
{
|
||||||
// dump uint8array to bin file
|
// dump uint8array to bin file
|
||||||
std::ofstream ofs(params.out_file.c_str(), std::ios::binary);
|
std::ofstream ofs(params.out_file.c_str(), std::ios::binary);
|
||||||
ofs.write((char *)&sample_ids_bitpacked[0], sample_ids_bitpacked.size());
|
ofs.write((char *)&sample_ids_bitpacked[0], sample_ids_bitpacked.size());
|
||||||
ofs.close();
|
ofs.close();
|
||||||
}else{
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
LOG("\n------------\n");
|
LOG("\n------------\n");
|
||||||
// print as hex to stdout
|
// print as hex to stdout
|
||||||
for (int i = 0; i < sample_ids_bitpacked.size(); i++){
|
for (int i = 0; i < sample_ids_bitpacked.size(); i++)
|
||||||
|
{
|
||||||
LOG("%02X ", sample_ids_bitpacked[i]);
|
LOG("%02X ", sample_ids_bitpacked[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else if (params.compress_mode == 2)
|
else if (params.compress_mode == 2)
|
||||||
{
|
{
|
||||||
|
@ -575,7 +579,8 @@ int main(int argc, char **argv)
|
||||||
// load sample_ids_bitpacked from params.prompt_file
|
// load sample_ids_bitpacked from params.prompt_file
|
||||||
std::ifstream ifs(params.prompt_file.c_str(), std::ios::binary);
|
std::ifstream ifs(params.prompt_file.c_str(), std::ios::binary);
|
||||||
|
|
||||||
if (!ifs) {
|
if (!ifs)
|
||||||
|
{
|
||||||
LOG_ERR("%s: failed to open file\n", __func__);
|
LOG_ERR("%s: failed to open file\n", __func__);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -588,14 +593,16 @@ int main(int argc, char **argv)
|
||||||
std::vector<uint8_t> sample_ids_bitpacked(fileSize);
|
std::vector<uint8_t> sample_ids_bitpacked(fileSize);
|
||||||
|
|
||||||
// Read the ifs into the vector
|
// Read the ifs into the vector
|
||||||
if (!ifs.read(reinterpret_cast<char*>(sample_ids_bitpacked.data()), fileSize)) {
|
if (!ifs.read(reinterpret_cast<char *>(sample_ids_bitpacked.data()), fileSize))
|
||||||
|
{
|
||||||
LOG_ERR("%s: failed to read file\n", __func__);
|
LOG_ERR("%s: failed to read file\n", __func__);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
ifs.close();
|
ifs.close();
|
||||||
|
|
||||||
// Debug: print as hex
|
// Debug: print as hex
|
||||||
for (int i = 0; i < sample_ids_bitpacked.size(); i++){
|
for (int i = 0; i < sample_ids_bitpacked.size(); i++)
|
||||||
|
{
|
||||||
LOG("%02X ", sample_ids_bitpacked[i]);
|
LOG("%02X ", sample_ids_bitpacked[i]);
|
||||||
}
|
}
|
||||||
LOG("\n");
|
LOG("\n");
|
||||||
|
@ -612,12 +619,12 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
std::vector<llama_token> out = decode(ctx, smpl, sample_ids_bitpacked);
|
std::vector<llama_token> out = decode(ctx, smpl, sample_ids_bitpacked);
|
||||||
|
|
||||||
|
|
||||||
gpt_sampler_free(smpl);
|
gpt_sampler_free(smpl);
|
||||||
auto t_dec_end = ggml_time_us();
|
auto t_dec_end = ggml_time_us();
|
||||||
|
|
||||||
// maybe this needs to be changed
|
// maybe this needs to be changed
|
||||||
if(params.out_file != "imatrix.dat"){
|
if (params.out_file != "imatrix.dat")
|
||||||
|
{
|
||||||
// dump as string to file
|
// dump as string to file
|
||||||
std::string out_str = ::llama_detokenize(ctx, out);
|
std::string out_str = ::llama_detokenize(ctx, out);
|
||||||
|
|
||||||
|
@ -628,7 +635,6 @@ int main(int argc, char **argv)
|
||||||
|
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_backend_free();
|
llama_backend_free();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue