compress: format

2024-09-25 01:26:39 +02:00 · 2024-09-25 01:26:39 +02:00 · bec83989be
commit bec83989be
parent b9a32f464f
1 changed files with 33 additions and 27 deletions
--- a/examples/compress/compress.cpp
+++ b/examples/compress/compress.cpp
@ -78,7 +78,8 @@ std::vector<uint8_t> encode(llama_context *ctx, std::vector<llama_token> inp, gp
                break;
            }
        }
-        if(match<0){
+        if (match < 0)
+        {
            LOG_ERR("\n couldn't match %s", llama_token_to_piece(ctx, inp[index]).c_str());
            exit(1);
        }
@ -140,7 +141,6 @@ std::vector<uint8_t> encode(llama_context *ctx, std::vector<llama_token> inp, gp
                }
                sample_ids_bitpacked[block_start + 1] = block_size & 0xff;

-
                // put last bytes
                if (PAD)
                {
@ -544,7 +544,8 @@ int main(int argc, char **argv)
        auto t_enc_end = ggml_time_us();

        LOG("\n");
-        if(!params.no_perf){
+        if (!params.no_perf)
+        {
            LOG("\nInput: %d characters (%d tokens)", params.prompt.length(), inp.size());

            float compressed_bits_per_token = 8 * (float)sample_ids_bitpacked.size() / (float)inp.size();
@ -555,19 +556,22 @@ int main(int argc, char **argv)
            LOG("\nPPL (over)estimation: %04f (%04f with padding)", exp2(compressed_bits_per_token - total_pad / (float)inp.size()), exp2(compressed_bits_per_token));
        }
        // maybe this needs to be changed
-        if(params.out_file != "imatrix.dat"){
+        if (params.out_file != "imatrix.dat")
+        {
            // dump uint8array to bin file
            std::ofstream ofs(params.out_file.c_str(), std::ios::binary);
            ofs.write((char *)&sample_ids_bitpacked[0], sample_ids_bitpacked.size());
            ofs.close();
-        }else{
+        }
+        else
+        {
            LOG("\n------------\n");
            // print as hex to stdout
-            for (int i = 0; i < sample_ids_bitpacked.size(); i++){
+            for (int i = 0; i < sample_ids_bitpacked.size(); i++)
+            {
                LOG("%02X ", sample_ids_bitpacked[i]);
            }
        }
-
    }
    else if (params.compress_mode == 2)
    {
@ -575,7 +579,8 @@ int main(int argc, char **argv)
        //  load sample_ids_bitpacked from params.prompt_file
        std::ifstream ifs(params.prompt_file.c_str(), std::ios::binary);

-        if (!ifs) {
+        if (!ifs)
+        {
            LOG_ERR("%s: failed to open file\n", __func__);
            return -1;
        }
@ -588,14 +593,16 @@ int main(int argc, char **argv)
        std::vector<uint8_t> sample_ids_bitpacked(fileSize);

        // Read the ifs into the vector
-        if (!ifs.read(reinterpret_cast<char*>(sample_ids_bitpacked.data()), fileSize)) {
+        if (!ifs.read(reinterpret_cast<char *>(sample_ids_bitpacked.data()), fileSize))
+        {
            LOG_ERR("%s: failed to read file\n", __func__);
            return -1;
        }
        ifs.close();

        // Debug: print as hex
-        for (int i = 0; i < sample_ids_bitpacked.size(); i++){
+        for (int i = 0; i < sample_ids_bitpacked.size(); i++)
+        {
            LOG("%02X ", sample_ids_bitpacked[i]);
        }
        LOG("\n");
@ -612,12 +619,12 @@ int main(int argc, char **argv)

        std::vector<llama_token> out = decode(ctx, smpl, sample_ids_bitpacked);

-
        gpt_sampler_free(smpl);
        auto t_dec_end = ggml_time_us();

        // maybe this needs to be changed
-        if(params.out_file != "imatrix.dat"){
+        if (params.out_file != "imatrix.dat")
+        {
            // dump as string to file
            std::string out_str = ::llama_detokenize(ctx, out);

@ -628,7 +635,6 @@ int main(int argc, char **argv)

        llama_free(ctx);
        llama_free_model(model);
-
    }

    llama_backend_free();