blacked quantize
This commit is contained in:
parent
382c0c6100
commit
172febff3f
1 changed files with 25 additions and 18 deletions
43
quantize.py
43
quantize.py
|
@ -25,27 +25,36 @@ def main():
|
||||||
quantize_script_binary = "quantize"
|
quantize_script_binary = "quantize"
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog='python3 quantize.py',
|
prog="python3 quantize.py",
|
||||||
description='This script quantizes the given models by applying the '
|
description="This script quantizes the given models by applying the "
|
||||||
f'"{quantize_script_binary}" script on them.'
|
f'"{quantize_script_binary}" script on them.',
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'models', nargs='+', choices=('7B', '13B', '30B', '65B'),
|
"models",
|
||||||
help='The models to quantize.'
|
nargs="+",
|
||||||
|
choices=("7B", "13B", "30B", "65B"),
|
||||||
|
help="The models to quantize.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-r', '--remove-16', action='store_true', dest='remove_f16',
|
"-r",
|
||||||
help='Remove the f16 model after quantizing it.'
|
"--remove-16",
|
||||||
|
action="store_true",
|
||||||
|
dest="remove_f16",
|
||||||
|
help="Remove the f16 model after quantizing it.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-m', '--models-path', dest='models_path',
|
"-m",
|
||||||
|
"--models-path",
|
||||||
|
dest="models_path",
|
||||||
default=os.path.join(os.getcwd(), "models"),
|
default=os.path.join(os.getcwd(), "models"),
|
||||||
help='Specify the directory where the models are located.'
|
help="Specify the directory where the models are located.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-q', '--quantize-script-path', dest='quantize_script_path',
|
"-q",
|
||||||
|
"--quantize-script-path",
|
||||||
|
dest="quantize_script_path",
|
||||||
default=os.path.join(os.getcwd(), quantize_script_binary),
|
default=os.path.join(os.getcwd(), quantize_script_binary),
|
||||||
help='Specify the path to the "quantize" script.'
|
help='Specify the path to the "quantize" script.',
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: Revise this code
|
# TODO: Revise this code
|
||||||
|
@ -75,12 +84,12 @@ def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
if not os.path.isfile(f16_model_path_base):
|
if not os.path.isfile(f16_model_path_base):
|
||||||
print(f'The file %s was not found' % f16_model_path_base)
|
print(f"The file %s was not found" % f16_model_path_base)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
f16_model_parts_paths = map(
|
f16_model_parts_paths = map(
|
||||||
lambda filename: os.path.join(f16_model_path_base, filename),
|
lambda filename: os.path.join(f16_model_path_base, filename),
|
||||||
glob.glob(f"{f16_model_path_base}*")
|
glob.glob(f"{f16_model_path_base}*"),
|
||||||
)
|
)
|
||||||
|
|
||||||
for f16_model_part_path in f16_model_parts_paths:
|
for f16_model_part_path in f16_model_parts_paths:
|
||||||
|
@ -93,9 +102,7 @@ def main():
|
||||||
)
|
)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
__run_quantize_script(
|
__run_quantize_script(args.quantize_script_path, f16_model_part_path)
|
||||||
args.quantize_script_path, f16_model_part_path
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.remove_f16:
|
if args.remove_f16:
|
||||||
os.remove(f16_model_part_path)
|
os.remove(f16_model_part_path)
|
||||||
|
@ -104,6 +111,7 @@ def main():
|
||||||
# This was extracted to a top-level function for parallelization, if
|
# This was extracted to a top-level function for parallelization, if
|
||||||
# implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
|
# implemented. See https://github.com/ggerganov/llama.cpp/pull/222/commits/f8db3d6cd91bf1a1342db9d29e3092bc12dd783c#r1140496406
|
||||||
|
|
||||||
|
|
||||||
def __run_quantize_script(script_path, f16_model_part_path):
|
def __run_quantize_script(script_path, f16_model_part_path):
|
||||||
"""Run the quantize script specifying the path to it and the path to the
|
"""Run the quantize script specifying the path to it and the path to the
|
||||||
f16 model to quantize.
|
f16 model to quantize.
|
||||||
|
@ -111,8 +119,7 @@ def __run_quantize_script(script_path, f16_model_part_path):
|
||||||
|
|
||||||
new_quantized_model_path = f16_model_part_path.replace("f16", "q4_0")
|
new_quantized_model_path = f16_model_part_path.replace("f16", "q4_0")
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
[script_path, f16_model_part_path, new_quantized_model_path, "2"],
|
[script_path, f16_model_part_path, new_quantized_model_path, "2"], check=True
|
||||||
check=True
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue