mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-01-31 11:37:35 +00:00
e7eb0b3070
- Fix UX issues with llama.com - Do housekeeping on libm code - Add more vectorization to GGML - Get GGJT quantizer programs working well - Have the quantizer keep the output layer as f16c - Prefetching improves performance 15% if you use fewer threads
24 lines
674 B
C
24 lines
674 B
C
#ifndef COSMOPOLITAN_THIRD_PARTY_GGML_F16_H_
|
|
#define COSMOPOLITAN_THIRD_PARTY_GGML_F16_H_
|
|
#if !(__ASSEMBLER__ + __LINKER__ + 0)
|
|
COSMOPOLITAN_C_START_
|
|
|
|
#ifdef __ARM_NEON
|
|
// we use the built-in 16-bit float type
|
|
typedef __fp16 ggml_fp16_t;
|
|
#else
|
|
typedef uint16_t ggml_fp16_t;
|
|
#endif
|
|
|
|
void ggml_fp16_init(void);
|
|
|
|
// convert FP16 <-> FP32
|
|
float ggml_fp16_to_fp32(ggml_fp16_t x);
|
|
ggml_fp16_t ggml_fp32_to_fp16(float x);
|
|
|
|
void ggml_fp16_to_fp32_row(const ggml_fp16_t* x, float* y, size_t n);
|
|
void ggml_fp32_to_fp16_row(const float* x, ggml_fp16_t* y, size_t n);
|
|
|
|
COSMOPOLITAN_C_END_
|
|
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */
|
|
#endif /* COSMOPOLITAN_THIRD_PARTY_GGML_F16_H_ */
|