Fix typos in code.
This commit is contained in:
parent
05cd6e5036
commit
09279c86ce
4 changed files with 30 additions and 30 deletions
|
@ -40,13 +40,13 @@ class Model:
|
||||||
self.ftype = ftype
|
self.ftype = ftype
|
||||||
self.fname_out = fname_out
|
self.fname_out = fname_out
|
||||||
self.is_big_endian = is_big_endian
|
self.is_big_endian = is_big_endian
|
||||||
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
|
self.endianness = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
|
||||||
self.is_safetensors = self._is_model_safetensors()
|
self.is_safetensors = self._is_model_safetensors()
|
||||||
self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin")
|
self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin")
|
||||||
self.part_names = self._get_part_names()
|
self.part_names = self._get_part_names()
|
||||||
self.hparams = Model.load_hparams(self.dir_model)
|
self.hparams = Model.load_hparams(self.dir_model)
|
||||||
self.model_arch = self._get_model_architecture()
|
self.model_arch = self._get_model_architecture()
|
||||||
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess)
|
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianness=self.endianness)
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
self._set_vocab_gpt2()
|
self._set_vocab_gpt2()
|
||||||
|
|
18
convert.py
18
convert.py
|
@ -812,8 +812,8 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
|
||||||
|
|
||||||
|
|
||||||
class OutputFile:
|
class OutputFile:
|
||||||
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
def __init__(self, fname_out: Path, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
||||||
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
|
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianness=endianness)
|
||||||
|
|
||||||
def add_meta_arch(self, params: Params) -> None:
|
def add_meta_arch(self, params: Params) -> None:
|
||||||
name = "LLaMA"
|
name = "LLaMA"
|
||||||
|
@ -892,10 +892,10 @@ class OutputFile:
|
||||||
self.gguf.close()
|
self.gguf.close()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
||||||
check_vocab_size(params, vocab)
|
check_vocab_size(params, vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out, endianess=endianess)
|
of = OutputFile(fname_out, endianness=endianness)
|
||||||
|
|
||||||
# meta data
|
# meta data
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
|
@ -920,10 +920,10 @@ class OutputFile:
|
||||||
return dt.quantize(arr)
|
return dt.quantize(arr)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianness: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
|
||||||
check_vocab_size(params, vocab)
|
check_vocab_size(params, vocab)
|
||||||
|
|
||||||
of = OutputFile(fname_out, endianess=endianess)
|
of = OutputFile(fname_out, endianness=endianness)
|
||||||
|
|
||||||
# meta data
|
# meta data
|
||||||
of.add_meta_arch(params)
|
of.add_meta_arch(params)
|
||||||
|
@ -1165,9 +1165,9 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
if args.dump:
|
if args.dump:
|
||||||
do_dump_model(model_plus)
|
do_dump_model(model_plus)
|
||||||
return
|
return
|
||||||
endianess = gguf.GGUFEndian.LITTLE
|
endianness = gguf.GGUFEndian.LITTLE
|
||||||
if args.bigendian:
|
if args.bigendian:
|
||||||
endianess = gguf.GGUFEndian.BIG
|
endianness = gguf.GGUFEndian.BIG
|
||||||
|
|
||||||
params = Params.load(model_plus)
|
params = Params.load(model_plus)
|
||||||
if params.n_ctx == -1:
|
if params.n_ctx == -1:
|
||||||
|
@ -1220,7 +1220,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||||
params.ftype = ftype
|
params.ftype = ftype
|
||||||
print(f"Writing {outfile}, format {ftype}")
|
print(f"Writing {outfile}, format {ftype}")
|
||||||
|
|
||||||
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess)
|
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianness=endianness)
|
||||||
print(f"Wrote {outfile}")
|
print(f"Wrote {outfile}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -409,7 +409,7 @@ struct llama_client_slot
|
||||||
size_t sent_token_probs_index = 0;
|
size_t sent_token_probs_index = 0;
|
||||||
|
|
||||||
int64_t t_start_process_prompt;
|
int64_t t_start_process_prompt;
|
||||||
int64_t t_start_genereration;
|
int64_t t_start_generation;
|
||||||
|
|
||||||
double t_prompt_processing; // ms
|
double t_prompt_processing; // ms
|
||||||
double t_token_generation; // ms
|
double t_token_generation; // ms
|
||||||
|
@ -477,12 +477,12 @@ struct llama_client_slot
|
||||||
void release() {
|
void release() {
|
||||||
if (state == IDLE || state == PROCESSING)
|
if (state == IDLE || state == PROCESSING)
|
||||||
{
|
{
|
||||||
t_token_generation = (ggml_time_us() - t_start_genereration) / 1e3;
|
t_token_generation = (ggml_time_us() - t_start_generation) / 1e3;
|
||||||
command = RELEASE;
|
command = RELEASE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
json get_formated_timings() {
|
json get_formatted_timings() {
|
||||||
return json
|
return json
|
||||||
{
|
{
|
||||||
{"prompt_n", num_prompt_tokens_processed},
|
{"prompt_n", num_prompt_tokens_processed},
|
||||||
|
@ -1160,10 +1160,10 @@ struct llama_server_context
|
||||||
|
|
||||||
json get_model_props()
|
json get_model_props()
|
||||||
{
|
{
|
||||||
return get_formated_generation(slots[0]);
|
return get_formatted_generation(slots[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
json get_formated_generation(llama_client_slot &slot)
|
json get_formatted_generation(llama_client_slot &slot)
|
||||||
{
|
{
|
||||||
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
||||||
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
||||||
|
@ -1254,7 +1254,7 @@ struct llama_server_context
|
||||||
{"model", params.model_alias},
|
{"model", params.model_alias},
|
||||||
{"tokens_predicted", slot.n_decoded},
|
{"tokens_predicted", slot.n_decoded},
|
||||||
{"tokens_evaluated", slot.num_prompt_tokens},
|
{"tokens_evaluated", slot.num_prompt_tokens},
|
||||||
{"generation_settings", get_formated_generation(slot)},
|
{"generation_settings", get_formatted_generation(slot)},
|
||||||
{"prompt", slot.prompt},
|
{"prompt", slot.prompt},
|
||||||
{"truncated", slot.truncated},
|
{"truncated", slot.truncated},
|
||||||
{"stopped_eos", slot.stopped_eos},
|
{"stopped_eos", slot.stopped_eos},
|
||||||
|
@ -1262,7 +1262,7 @@ struct llama_server_context
|
||||||
{"stopped_limit", slot.stopped_limit},
|
{"stopped_limit", slot.stopped_limit},
|
||||||
{"stopping_word", slot.stopping_word},
|
{"stopping_word", slot.stopping_word},
|
||||||
{"tokens_cached", slot.n_past},
|
{"tokens_cached", slot.n_past},
|
||||||
{"timings", slot.get_formated_timings()}
|
{"timings", slot.get_formatted_timings()}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (slot.sparams.n_probs > 0)
|
if (slot.sparams.n_probs > 0)
|
||||||
|
@ -1681,7 +1681,7 @@ struct llama_server_context
|
||||||
slot.command = NONE;
|
slot.command = NONE;
|
||||||
std::vector<llama_token> prompt_tokens;
|
std::vector<llama_token> prompt_tokens;
|
||||||
slot.t_start_process_prompt = ggml_time_us();
|
slot.t_start_process_prompt = ggml_time_us();
|
||||||
slot.t_start_genereration = 0;
|
slot.t_start_generation = 0;
|
||||||
|
|
||||||
if (slot.infill)
|
if (slot.infill)
|
||||||
{
|
{
|
||||||
|
@ -1871,8 +1871,8 @@ struct llama_server_context
|
||||||
|
|
||||||
if (slot.n_decoded == 1)
|
if (slot.n_decoded == 1)
|
||||||
{
|
{
|
||||||
slot.t_start_genereration = ggml_time_us();
|
slot.t_start_generation = ggml_time_us();
|
||||||
slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3;
|
slot.t_prompt_processing = (slot.t_start_generation - slot.t_start_process_prompt) / 1e3;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
|
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
|
||||||
|
@ -2299,13 +2299,13 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
std::string systm_content;
|
std::string system_content;
|
||||||
std::copy(
|
std::copy(
|
||||||
std::istreambuf_iterator<char>(file),
|
std::istreambuf_iterator<char>(file),
|
||||||
std::istreambuf_iterator<char>(),
|
std::istreambuf_iterator<char>(),
|
||||||
std::back_inserter(systm_content)
|
std::back_inserter(system_content)
|
||||||
);
|
);
|
||||||
llama.process_system_prompt_data(json::parse(systm_content));
|
llama.process_system_prompt_data(json::parse(system_content));
|
||||||
}
|
}
|
||||||
else if(arg == "--mmproj")
|
else if(arg == "--mmproj")
|
||||||
{
|
{
|
||||||
|
|
|
@ -50,11 +50,11 @@ class GGUFWriter:
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True,
|
self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True,
|
||||||
endianess: GGUFEndian = GGUFEndian.LITTLE,
|
endianness: GGUFEndian = GGUFEndian.LITTLE,
|
||||||
):
|
):
|
||||||
self.fout = open(path, "wb")
|
self.fout = open(path, "wb")
|
||||||
self.arch = arch
|
self.arch = arch
|
||||||
self.endianess = endianess
|
self.endianness = endianness
|
||||||
self.offset_tensor = 0
|
self.offset_tensor = 0
|
||||||
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
|
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
|
||||||
self.kv_data = bytearray()
|
self.kv_data = bytearray()
|
||||||
|
@ -65,7 +65,7 @@ class GGUFWriter:
|
||||||
self.temp_file = None
|
self.temp_file = None
|
||||||
self.tensors = []
|
self.tensors = []
|
||||||
print("gguf: This GGUF file is for {0} Endian only".format(
|
print("gguf: This GGUF file is for {0} Endian only".format(
|
||||||
"Big" if self.endianess == GGUFEndian.BIG else "Little",
|
"Big" if self.endianness == GGUFEndian.BIG else "Little",
|
||||||
))
|
))
|
||||||
self.state = WriterState.EMPTY
|
self.state = WriterState.EMPTY
|
||||||
|
|
||||||
|
@ -218,7 +218,7 @@ class GGUFWriter:
|
||||||
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
|
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
|
||||||
raw_dtype: GGMLQuantizationType | None = None,
|
raw_dtype: GGMLQuantizationType | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
if self.endianess == GGUFEndian.BIG:
|
if self.endianness == GGUFEndian.BIG:
|
||||||
tensor.byteswap(inplace=True)
|
tensor.byteswap(inplace=True)
|
||||||
if self.use_temp_file and self.temp_file is None:
|
if self.use_temp_file and self.temp_file is None:
|
||||||
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
|
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
|
||||||
|
@ -244,7 +244,7 @@ class GGUFWriter:
|
||||||
if self.state is not WriterState.TI_DATA:
|
if self.state is not WriterState.TI_DATA:
|
||||||
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
|
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
|
||||||
|
|
||||||
if self.endianess == GGUFEndian.BIG:
|
if self.endianness == GGUFEndian.BIG:
|
||||||
tensor.byteswap(inplace=True)
|
tensor.byteswap(inplace=True)
|
||||||
self.write_padding(self.fout, self.fout.tell())
|
self.write_padding(self.fout, self.fout.tell())
|
||||||
tensor.tofile(self.fout)
|
tensor.tofile(self.fout)
|
||||||
|
@ -405,7 +405,7 @@ class GGUFWriter:
|
||||||
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
||||||
pack_prefix = ''
|
pack_prefix = ''
|
||||||
if not skip_pack_prefix:
|
if not skip_pack_prefix:
|
||||||
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
pack_prefix = '<' if self.endianness == GGUFEndian.LITTLE else '>'
|
||||||
return struct.pack(f'{pack_prefix}{fmt}', value)
|
return struct.pack(f'{pack_prefix}{fmt}', value)
|
||||||
|
|
||||||
def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
|
def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue