Allow "quantizing" to f16 and f32 (#1787)
* Allow "quantizing" to f16 and f32 Fix an issue where quantizing didn't respect LLAMA_NO_K_QUANTS Add brief help to the list of quantization types in the quantize tool Ignore case for quantization type arguments in the quantize tool
This commit is contained in:
parent
74a6d922f1
commit
74d4cfa343
4 changed files with 154 additions and 48 deletions
12
ggml.c
12
ggml.c
|
@ -16301,6 +16301,18 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|||
result = ggml_quantize_q6_K(src + start, block, n, n, hist);
|
||||
} break;
|
||||
#endif
|
||||
case GGML_TYPE_F16:
|
||||
{
|
||||
int elemsize = sizeof(ggml_fp16_t);
|
||||
ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
|
||||
result = n * elemsize;
|
||||
} break;
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
int elemsize = sizeof(float);
|
||||
result = n * elemsize;
|
||||
memcpy((uint8_t *)dst + start * elemsize, src + start, result);
|
||||
} break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue