Merge 09279c86ce into 948ff137ec

2023-12-13 20:36:28 +00:00 · 2023-12-13 20:36:28 +00:00 · b5ff3e45ee
commit b5ff3e45ee
parent 948ff137ec 09279c86ce
4 changed files with 30 additions and 30 deletions
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@ -40,13 +40,13 @@ class Model:
        self.ftype = ftype
        self.fname_out = fname_out
        self.is_big_endian = is_big_endian
-        self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
+        self.endianness = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
        self.is_safetensors = self._is_model_safetensors()
        self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin")
        self.part_names = self._get_part_names()
        self.hparams = Model.load_hparams(self.dir_model)
        self.model_arch = self._get_model_architecture()
-        self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess)
+        self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianness=self.endianness)

    def set_vocab(self):
        self._set_vocab_gpt2()
--- a/convert.py
+++ b/convert.py
@ -842,8 +842,8 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:


 class OutputFile:
-    def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
-        self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
+    def __init__(self, fname_out: Path, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
+        self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianness=endianness)

    def add_meta_arch(self, params: Params) -> None:
        name = "LLaMA"
@ -932,10 +932,10 @@ class OutputFile:
        self.gguf.close()

    @staticmethod
-    def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
+    def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
        check_vocab_size(params, vocab)

-        of = OutputFile(fname_out, endianess=endianess)
+        of = OutputFile(fname_out, endianness=endianness)

        # meta data
        of.add_meta_arch(params)
@ -960,10 +960,10 @@ class OutputFile:
        return dt.quantize(arr)

    @staticmethod
-    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
+    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianness: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
        check_vocab_size(params, vocab)

-        of = OutputFile(fname_out, endianess=endianess)
+        of = OutputFile(fname_out, endianness=endianness)

        # meta data
        of.add_meta_arch(params)
@ -1205,9 +1205,9 @@ def main(args_in: list[str] | None = None) -> None:
    if args.dump:
        do_dump_model(model_plus)
        return
-    endianess = gguf.GGUFEndian.LITTLE
+    endianness = gguf.GGUFEndian.LITTLE
    if args.bigendian:
-        endianess = gguf.GGUFEndian.BIG
+        endianness = gguf.GGUFEndian.BIG

    params = Params.load(model_plus)
    if params.n_ctx == -1:
@ -1260,7 +1260,7 @@ def main(args_in: list[str] | None = None) -> None:
    params.ftype = ftype
    print(f"Writing {outfile}, format {ftype}")

-    OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess)
+    OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianness=endianness)
    print(f"Wrote {outfile}")


--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -408,7 +408,7 @@ struct llama_client_slot
    size_t sent_token_probs_index = 0;

    int64_t t_start_process_prompt;
-    int64_t t_start_genereration;
+    int64_t t_start_generation;

    double t_prompt_processing; // ms
    double t_token_generation; // ms
@ -475,12 +475,12 @@ struct llama_client_slot
    void release() {
        if (state == IDLE || state == PROCESSING)
        {
-            t_token_generation = (ggml_time_us() - t_start_genereration) / 1e3;
+            t_token_generation = (ggml_time_us() - t_start_generation) / 1e3;
            command = RELEASE;
        }
    }

-    json get_formated_timings() {
+    json get_formatted_timings() {
        return json
        {
            {"prompt_n",               num_prompt_tokens_processed},
@ -1159,10 +1159,10 @@ struct llama_server_context

    json get_model_props()
    {
-        return get_formated_generation(slots[0]);
+        return get_formatted_generation(slots[0]);
    }

-    json get_formated_generation(llama_client_slot &slot)
+    json get_formatted_generation(llama_client_slot &slot)
    {
        const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
        const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
@ -1253,7 +1253,7 @@ struct llama_server_context
            {"model",               params.model_alias},
            {"tokens_predicted",    slot.n_decoded},
            {"tokens_evaluated",    slot.num_prompt_tokens},
-            {"generation_settings", get_formated_generation(slot)},
+            {"generation_settings", get_formatted_generation(slot)},
            {"prompt",              slot.prompt},
            {"truncated",           slot.truncated},
            {"stopped_eos",         slot.stopped_eos},
@ -1261,7 +1261,7 @@ struct llama_server_context
            {"stopped_limit",       slot.stopped_limit},
            {"stopping_word",       slot.stopping_word},
            {"tokens_cached",       slot.n_past},
-            {"timings",             slot.get_formated_timings()}
+            {"timings",             slot.get_formatted_timings()}
        };

        if (slot.sparams.n_probs > 0)
@ -1680,7 +1680,7 @@ struct llama_server_context
                    slot.command = NONE;
                    std::vector<llama_token> prompt_tokens;
                    slot.t_start_process_prompt = ggml_time_us();
-                    slot.t_start_genereration = 0;
+                    slot.t_start_generation = 0;

                    if (slot.infill)
                    {
@ -1870,8 +1870,8 @@ struct llama_server_context

                if (slot.n_decoded == 1)
                {
-                    slot.t_start_genereration = ggml_time_us();
-                    slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3;
+                    slot.t_start_generation = ggml_time_us();
+                    slot.t_prompt_processing = (slot.t_start_generation - slot.t_start_process_prompt) / 1e3;
                }

                llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
@ -2294,13 +2294,13 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                invalid_param = true;
                break;
            }
-            std::string systm_content;
+            std::string system_content;
            std::copy(
                std::istreambuf_iterator<char>(file),
                std::istreambuf_iterator<char>(),
-                std::back_inserter(systm_content)
+                std::back_inserter(system_content)
            );
-            llama.process_system_prompt_data(json::parse(systm_content));
+            llama.process_system_prompt_data(json::parse(system_content));
        }
        else if(arg == "--mmproj")
        {
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@ -50,11 +50,11 @@ class GGUFWriter:

    def __init__(
        self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True,
-        endianess: GGUFEndian = GGUFEndian.LITTLE,
+        endianness: GGUFEndian = GGUFEndian.LITTLE,
    ):
        self.fout = open(path, "wb")
        self.arch = arch
-        self.endianess = endianess
+        self.endianness = endianness
        self.offset_tensor = 0
        self.data_alignment = GGUF_DEFAULT_ALIGNMENT
        self.kv_data = bytearray()
@ -65,7 +65,7 @@ class GGUFWriter:
        self.temp_file = None
        self.tensors = []
        print("gguf: This GGUF file is for {0} Endian only".format(
-            "Big" if self.endianess == GGUFEndian.BIG else "Little",
+            "Big" if self.endianness == GGUFEndian.BIG else "Little",
        ))
        self.state = WriterState.EMPTY

@ -218,7 +218,7 @@ class GGUFWriter:
        self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
        raw_dtype: GGMLQuantizationType | None = None,
    ) -> None:
-        if self.endianess == GGUFEndian.BIG:
+        if self.endianness == GGUFEndian.BIG:
            tensor.byteswap(inplace=True)
        if self.use_temp_file and self.temp_file is None:
            fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
@ -244,7 +244,7 @@ class GGUFWriter:
        if self.state is not WriterState.TI_DATA:
            raise ValueError(f'Expected output file to contain tensor info, got {self.state}')

-        if self.endianess == GGUFEndian.BIG:
+        if self.endianness == GGUFEndian.BIG:
            tensor.byteswap(inplace=True)
        self.write_padding(self.fout, self.fout.tell())
        tensor.tofile(self.fout)
@ -411,7 +411,7 @@ class GGUFWriter:
    def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
        pack_prefix = ''
        if not skip_pack_prefix:
-            pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
+            pack_prefix = '<' if self.endianness == GGUFEndian.LITTLE else '>'
        return struct.pack(f'{pack_prefix}{fmt}', value)

    def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None: