sync : ggml (Metal F32 support + reduce ggml-alloc size) (#3192)
* sync : ggml (Metal F32 support + reduce ggml-alloc size) ggml-ci * llama-bench : fix ggml_cpu_has_metal() duplicate function ggml-ci
This commit is contained in:
		
							parent
							
								
									7e50d34be6
								
							
						
					
					
						commit
						8c00b7a6ff
					
				
					 6 changed files with 193 additions and 90 deletions
				
			
		|  | @ -74,14 +74,6 @@ static T stdev(const std::vector<T> & v) { | ||||||
|     return stdev; |     return stdev; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static bool ggml_cpu_has_metal() { |  | ||||||
| #if defined(GGML_USE_METAL) |  | ||||||
|     return true; |  | ||||||
| #else |  | ||||||
|     return false; |  | ||||||
| #endif |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static std::string get_cpu_info() { | static std::string get_cpu_info() { | ||||||
|     std::string id; |     std::string id; | ||||||
| #ifdef __linux__ | #ifdef __linux__ | ||||||
|  |  | ||||||
							
								
								
									
										12
									
								
								ggml-alloc.c
									
										
									
									
									
								
							
							
						
						
									
										12
									
								
								ggml-alloc.c
									
										
									
									
									
								
							|  | @ -131,6 +131,10 @@ static bool ggml_allocr_is_own(struct ggml_allocr * alloc, const struct ggml_ten | ||||||
|     return ptr >= alloc->data && (char *)ptr < (char *)alloc->data + alloc->max_size; |     return ptr >= alloc->data && (char *)ptr < (char *)alloc->data + alloc->max_size; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static bool ggml_is_view(struct ggml_tensor * t) { | ||||||
|  |     return t->view_src != NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) { | void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) { | ||||||
| #ifdef GGML_ALLOCATOR_DEBUG | #ifdef GGML_ALLOCATOR_DEBUG | ||||||
|     GGML_ASSERT(!ggml_is_view(tensor)); // views generally get data pointer from one of their sources
 |     GGML_ASSERT(!ggml_is_view(tensor)); // views generally get data pointer from one of their sources
 | ||||||
|  | @ -338,8 +342,8 @@ static void free_vmem(void * base_addr, size_t size) { | ||||||
| 
 | 
 | ||||||
| // allocate uncommitted virtual memory to measure the size of the graph
 | // allocate uncommitted virtual memory to measure the size of the graph
 | ||||||
| static void alloc_measure_vmem(void ** base_addr, size_t * size) { | static void alloc_measure_vmem(void ** base_addr, size_t * size) { | ||||||
|     // 1TB for 64-bit, 1GB for 32-bit
 |     // 128GB for 64-bit, 1GB for 32-bit
 | ||||||
|     *size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<40; |     *size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<37; | ||||||
|     do { |     do { | ||||||
|         *base_addr = alloc_vmem(*size); |         *base_addr = alloc_vmem(*size); | ||||||
|         if (*base_addr != NULL) { |         if (*base_addr != NULL) { | ||||||
|  | @ -399,10 +403,6 @@ bool ggml_allocr_is_measure(struct ggml_allocr * alloc) { | ||||||
| 
 | 
 | ||||||
| //////////// compute graph allocator
 | //////////// compute graph allocator
 | ||||||
| 
 | 
 | ||||||
| static bool ggml_is_view(struct ggml_tensor * t) { |  | ||||||
|     return t->view_src != NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { | static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { | ||||||
|     if (a->type != b->type) { |     if (a->type != b->type) { | ||||||
|         return false; |         return false; | ||||||
|  |  | ||||||
							
								
								
									
										12
									
								
								ggml-metal.m
									
										
									
									
									
								
							
							
						
						
									
										12
									
								
								ggml-metal.m
									
										
									
									
									
								
							|  | @ -78,6 +78,7 @@ struct ggml_metal_context { | ||||||
|     GGML_METAL_DECL_KERNEL(get_rows_q6_K); |     GGML_METAL_DECL_KERNEL(get_rows_q6_K); | ||||||
|     GGML_METAL_DECL_KERNEL(rms_norm); |     GGML_METAL_DECL_KERNEL(rms_norm); | ||||||
|     GGML_METAL_DECL_KERNEL(norm); |     GGML_METAL_DECL_KERNEL(norm); | ||||||
|  |     GGML_METAL_DECL_KERNEL(mul_mat_f32_f32); | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mat_f16_f32); |     GGML_METAL_DECL_KERNEL(mul_mat_f16_f32); | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mat_f16_f32_1row); |     GGML_METAL_DECL_KERNEL(mul_mat_f16_f32_1row); | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mat_f16_f32_l4); |     GGML_METAL_DECL_KERNEL(mul_mat_f16_f32_l4); | ||||||
|  | @ -89,6 +90,7 @@ struct ggml_metal_context { | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mat_q4_K_f32); |     GGML_METAL_DECL_KERNEL(mul_mat_q4_K_f32); | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mat_q5_K_f32); |     GGML_METAL_DECL_KERNEL(mul_mat_q5_K_f32); | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mat_q6_K_f32); |     GGML_METAL_DECL_KERNEL(mul_mat_q6_K_f32); | ||||||
|  |     GGML_METAL_DECL_KERNEL(mul_mm_f32_f32); | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mm_f16_f32); |     GGML_METAL_DECL_KERNEL(mul_mm_f16_f32); | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mm_q4_0_f32); |     GGML_METAL_DECL_KERNEL(mul_mm_q4_0_f32); | ||||||
|     GGML_METAL_DECL_KERNEL(mul_mm_q4_1_f32); |     GGML_METAL_DECL_KERNEL(mul_mm_q4_1_f32); | ||||||
|  | @ -237,6 +239,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||||
|         GGML_METAL_ADD_KERNEL(get_rows_q6_K); |         GGML_METAL_ADD_KERNEL(get_rows_q6_K); | ||||||
|         GGML_METAL_ADD_KERNEL(rms_norm); |         GGML_METAL_ADD_KERNEL(rms_norm); | ||||||
|         GGML_METAL_ADD_KERNEL(norm); |         GGML_METAL_ADD_KERNEL(norm); | ||||||
|  |         GGML_METAL_ADD_KERNEL(mul_mat_f32_f32); | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mat_f16_f32); |         GGML_METAL_ADD_KERNEL(mul_mat_f16_f32); | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mat_f16_f32_1row); |         GGML_METAL_ADD_KERNEL(mul_mat_f16_f32_1row); | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mat_f16_f32_l4); |         GGML_METAL_ADD_KERNEL(mul_mat_f16_f32_l4); | ||||||
|  | @ -248,6 +251,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mat_q4_K_f32); |         GGML_METAL_ADD_KERNEL(mul_mat_q4_K_f32); | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mat_q5_K_f32); |         GGML_METAL_ADD_KERNEL(mul_mat_q5_K_f32); | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mat_q6_K_f32); |         GGML_METAL_ADD_KERNEL(mul_mat_q6_K_f32); | ||||||
|  |         GGML_METAL_ADD_KERNEL(mul_mm_f32_f32); | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mm_f16_f32); |         GGML_METAL_ADD_KERNEL(mul_mm_f16_f32); | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mm_q4_0_f32); |         GGML_METAL_ADD_KERNEL(mul_mm_q4_0_f32); | ||||||
|         GGML_METAL_ADD_KERNEL(mul_mm_q8_0_f32); |         GGML_METAL_ADD_KERNEL(mul_mm_q8_0_f32); | ||||||
|  | @ -309,6 +313,7 @@ void ggml_metal_free(struct ggml_metal_context * ctx) { | ||||||
|     GGML_METAL_DEL_KERNEL(get_rows_q6_K); |     GGML_METAL_DEL_KERNEL(get_rows_q6_K); | ||||||
|     GGML_METAL_DEL_KERNEL(rms_norm); |     GGML_METAL_DEL_KERNEL(rms_norm); | ||||||
|     GGML_METAL_DEL_KERNEL(norm); |     GGML_METAL_DEL_KERNEL(norm); | ||||||
|  |     GGML_METAL_DEL_KERNEL(mul_mat_f32_f32); | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mat_f16_f32); |     GGML_METAL_DEL_KERNEL(mul_mat_f16_f32); | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mat_f16_f32_1row); |     GGML_METAL_DEL_KERNEL(mul_mat_f16_f32_1row); | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mat_f16_f32_l4); |     GGML_METAL_DEL_KERNEL(mul_mat_f16_f32_l4); | ||||||
|  | @ -320,6 +325,7 @@ void ggml_metal_free(struct ggml_metal_context * ctx) { | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mat_q4_K_f32); |     GGML_METAL_DEL_KERNEL(mul_mat_q4_K_f32); | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mat_q5_K_f32); |     GGML_METAL_DEL_KERNEL(mul_mat_q5_K_f32); | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mat_q6_K_f32); |     GGML_METAL_DEL_KERNEL(mul_mat_q6_K_f32); | ||||||
|  |     GGML_METAL_DEL_KERNEL(mul_mm_f32_f32); | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mm_f16_f32); |     GGML_METAL_DEL_KERNEL(mul_mm_f16_f32); | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mm_q4_0_f32); |     GGML_METAL_DEL_KERNEL(mul_mm_q4_0_f32); | ||||||
|     GGML_METAL_DEL_KERNEL(mul_mm_q8_0_f32); |     GGML_METAL_DEL_KERNEL(mul_mm_q8_0_f32); | ||||||
|  | @ -885,6 +891,7 @@ void ggml_metal_graph_compute( | ||||||
|                                 ne00%32 == 0 && |                                 ne00%32 == 0 && | ||||||
|                                 ne11 > 1) { |                                 ne11 > 1) { | ||||||
|                                 switch (src0->type) { |                                 switch (src0->type) { | ||||||
|  |                                     case GGML_TYPE_F32:  [encoder setComputePipelineState:ctx->pipeline_mul_mm_f32_f32];  break; | ||||||
|                                     case GGML_TYPE_F16:  [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32];  break; |                                     case GGML_TYPE_F16:  [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32];  break; | ||||||
|                                     case GGML_TYPE_Q4_0: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_0_f32]; break; |                                     case GGML_TYPE_Q4_0: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_0_f32]; break; | ||||||
|                                     case GGML_TYPE_Q4_1: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_1_f32]; break; |                                     case GGML_TYPE_Q4_1: [encoder setComputePipelineState:ctx->pipeline_mul_mm_q4_1_f32]; break; | ||||||
|  | @ -919,6 +926,11 @@ void ggml_metal_graph_compute( | ||||||
| 
 | 
 | ||||||
|                                 // use custom matrix x vector kernel |                                 // use custom matrix x vector kernel | ||||||
|                                 switch (src0t) { |                                 switch (src0t) { | ||||||
|  |                                     case GGML_TYPE_F32: | ||||||
|  |                                         { | ||||||
|  |                                             [encoder setComputePipelineState:ctx->pipeline_mul_mat_f32_f32]; | ||||||
|  |                                             nrows = 4; | ||||||
|  |                                         } break; | ||||||
|                                     case GGML_TYPE_F16: |                                     case GGML_TYPE_F16: | ||||||
|                                         { |                                         { | ||||||
|                                             nth0 = 32; |                                             nth0 = 32; | ||||||
|  |  | ||||||
|  | @ -523,6 +523,79 @@ kernel void kernel_mul_mat_q8_0_f32( | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #define N_F32_F32 4 | ||||||
|  | 
 | ||||||
|  | kernel void kernel_mul_mat_f32_f32( | ||||||
|  |         device const  char * src0, | ||||||
|  |         device const  char * src1, | ||||||
|  |         device       float * dst, | ||||||
|  |         constant   int64_t & ne00, | ||||||
|  |         constant   int64_t & ne01, | ||||||
|  |         constant   int64_t & ne02, | ||||||
|  |         constant  uint64_t & nb00, | ||||||
|  |         constant  uint64_t & nb01, | ||||||
|  |         constant  uint64_t & nb02, | ||||||
|  |         constant   int64_t & ne10, | ||||||
|  |         constant   int64_t & ne11, | ||||||
|  |         constant   int64_t & ne12, | ||||||
|  |         constant  uint64_t & nb10, | ||||||
|  |         constant  uint64_t & nb11, | ||||||
|  |         constant  uint64_t & nb12, | ||||||
|  |         constant   int64_t & ne0, | ||||||
|  |         constant   int64_t & ne1, | ||||||
|  |         uint3 tgpig[[threadgroup_position_in_grid]], | ||||||
|  |         uint tiisg[[thread_index_in_simdgroup]]) { | ||||||
|  | 
 | ||||||
|  |     const int64_t r0 = tgpig.x; | ||||||
|  |     const int64_t rb = tgpig.y*N_F32_F32; | ||||||
|  |     const int64_t im = tgpig.z; | ||||||
|  | 
 | ||||||
|  |     device const float * x = (device const float *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02); | ||||||
|  | 
 | ||||||
|  |     if (ne00 < 128) { | ||||||
|  |         for (int row = 0; row < N_F32_F32; ++row) { | ||||||
|  |             int r1 = rb + row; | ||||||
|  |             if (r1 >= ne11) { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             device const float * y = (device const float *) (src1 + r1*nb11 + im*nb12); | ||||||
|  | 
 | ||||||
|  |             float sumf = 0; | ||||||
|  |             for (int i = tiisg; i < ne00; i += 32) { | ||||||
|  |                 sumf += (float) x[i] * (float) y[i]; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             float all_sum = simd_sum(sumf); | ||||||
|  |             if (tiisg == 0) { | ||||||
|  |                 dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         device const float4 * x4 = (device const float4 *)x; | ||||||
|  |         for (int row = 0; row < N_F32_F32; ++row) { | ||||||
|  |             int r1 = rb + row; | ||||||
|  |             if (r1 >= ne11) { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             device const float  * y  = (device const float  *) (src1 + r1*nb11 + im*nb12); | ||||||
|  |             device const float4 * y4 = (device const float4 *) y; | ||||||
|  | 
 | ||||||
|  |             float sumf = 0; | ||||||
|  |             for (int i = tiisg; i < ne00/4; i += 32) { | ||||||
|  |                 for (int k = 0; k < 4; ++k) sumf += (float) x4[i][k] * y4[i][k]; | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             float all_sum = simd_sum(sumf); | ||||||
|  |             if (tiisg == 0) { | ||||||
|  |                 for (int i = 4*(ne00/4); i < ne00; ++i) all_sum += (float) x[i] * y[i]; | ||||||
|  |                 dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| kernel void kernel_mul_mat_f16_f32_1row( | kernel void kernel_mul_mat_f16_f32_1row( | ||||||
|         device const  char * src0, |         device const  char * src0, | ||||||
|         device const  char * src1, |         device const  char * src1, | ||||||
|  | @ -1399,13 +1472,13 @@ kernel void kernel_mul_mat_q4_K_f32( | ||||||
|         device const float * src1, |         device const float * src1, | ||||||
|         device       float * dst, |         device       float * dst, | ||||||
|         constant   int64_t & ne00, |         constant   int64_t & ne00, | ||||||
|         constant   int64_t & ne01[[buffer(4)]], |         constant   int64_t & ne01 [[buffer(4)]], | ||||||
|         constant   int64_t & ne02[[buffer(5)]], |         constant   int64_t & ne02 [[buffer(5)]], | ||||||
|         constant   int64_t & ne10[[buffer(9)]], |         constant   int64_t & ne10 [[buffer(9)]], | ||||||
|         constant   int64_t & ne12[[buffer(11)]], |         constant   int64_t & ne12 [[buffer(11)]], | ||||||
|         constant   int64_t & ne0[[buffer(15)]], |         constant   int64_t & ne0  [[buffer(15)]], | ||||||
|         constant   int64_t & ne1[[buffer(16)]], |         constant   int64_t & ne1  [[buffer(16)]], | ||||||
|         constant   uint    & gqa[[buffer(17)]], |         constant   uint    & gqa  [[buffer(17)]], | ||||||
|         uint3 tgpig[[threadgroup_position_in_grid]], |         uint3 tgpig[[threadgroup_position_in_grid]], | ||||||
|         uint tiisg[[thread_index_in_simdgroup]], |         uint tiisg[[thread_index_in_simdgroup]], | ||||||
|         uint sgitg[[simdgroup_index_in_threadgroup]]) { |         uint sgitg[[simdgroup_index_in_threadgroup]]) { | ||||||
|  | @ -2012,7 +2085,6 @@ void dequantize_q4_K(device const block_q4_K *xb, short il, thread type4x4 & reg | ||||||
|     for (int i = 0; i < 16; ++i) { |     for (int i = 0; i < 16; ++i) { | ||||||
|         reg[i/4][i%4] = dl * (q[i] & mask) - ml; |         reg[i/4][i%4] = dl * (q[i] & mask) - ml; | ||||||
|     } |     } | ||||||
| 
 |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| template <typename type4x4> | template <typename type4x4> | ||||||
|  | @ -2269,6 +2341,7 @@ typedef void (mat_mm_t)( | ||||||
|         constant       uint & gqa, |         constant       uint & gqa, | ||||||
|         threadgroup uchar *, uint3, uint, uint); |         threadgroup uchar *, uint3, uint, uint); | ||||||
| 
 | 
 | ||||||
|  | template [[host_name("kernel_mul_mm_f32_f32")]]  kernel mat_mm_t kernel_mul_mm<float4x4,   1,     dequantize_f32>; | ||||||
| template [[host_name("kernel_mul_mm_f16_f32")]]  kernel mat_mm_t kernel_mul_mm<half4x4,    1,     dequantize_f16>; | template [[host_name("kernel_mul_mm_f16_f32")]]  kernel mat_mm_t kernel_mul_mm<half4x4,    1,     dequantize_f16>; | ||||||
| template [[host_name("kernel_mul_mm_q4_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_0, 2,     dequantize_q4_0>; | template [[host_name("kernel_mul_mm_q4_0_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_0, 2,     dequantize_q4_0>; | ||||||
| template [[host_name("kernel_mul_mm_q4_1_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_1, 2,     dequantize_q4_1>; | template [[host_name("kernel_mul_mm_q4_1_f32")]] kernel mat_mm_t kernel_mul_mm<block_q4_1, 2,     dequantize_q4_1>; | ||||||
|  |  | ||||||
							
								
								
									
										88
									
								
								ggml.c
									
										
									
									
									
								
							
							
						
						
									
										88
									
								
								ggml.c
									
										
									
									
									
								
							|  | @ -17294,10 +17294,18 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { | ||||||
|         } else { |         } else { | ||||||
|             // wait for other threads to finish
 |             // wait for other threads to finish
 | ||||||
|             const int last = node_n; |             const int last = node_n; | ||||||
|             do { |             while (true) { | ||||||
|                 //sched_yield();
 |                 // TODO: this sched_yield can have significant impact on the performance - either positive or negative
 | ||||||
|  |                 //       depending on the workload and the operating system.
 | ||||||
|  |                 //       since it is not clear what is the best approach, it should potentially become user-configurable
 | ||||||
|  |                 //       ref: https://github.com/ggerganov/ggml/issues/291
 | ||||||
|  | #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) | ||||||
|  |                 sched_yield(); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|                 node_n = atomic_load(&state->shared->node_n); |                 node_n = atomic_load(&state->shared->node_n); | ||||||
|             } while (node_n == last); |                 if (node_n != last) break; | ||||||
|  |             }; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // check if we should stop
 |         // check if we should stop
 | ||||||
|  | @ -18348,7 +18356,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) { | ||||||
|     for (int i = 0; i < cgraph->n_leafs; i++) { |     for (int i = 0; i < cgraph->n_leafs; i++) { | ||||||
|         struct ggml_tensor * node = cgraph->leafs[i]; |         struct ggml_tensor * node = cgraph->leafs[i]; | ||||||
| 
 | 
 | ||||||
|         GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n", |         GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n", | ||||||
|                 i, |                 i, | ||||||
|                 node->ne[0], node->ne[1], |                 node->ne[0], node->ne[1], | ||||||
|                 ggml_op_name(node->op), |                 ggml_op_name(node->op), | ||||||
|  | @ -20111,27 +20119,27 @@ const char * gguf_type_name(enum gguf_type type) { | ||||||
|     return GGUF_TYPE_NAME[type]; |     return GGUF_TYPE_NAME[type]; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int gguf_get_version(struct gguf_context * ctx) { | int gguf_get_version(const struct gguf_context * ctx) { | ||||||
|     return ctx->header.version; |     return ctx->header.version; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| size_t gguf_get_alignment(struct gguf_context * ctx) { | size_t gguf_get_alignment(const struct gguf_context * ctx) { | ||||||
|     return ctx->alignment; |     return ctx->alignment; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| size_t gguf_get_data_offset(struct gguf_context * ctx) { | size_t gguf_get_data_offset(const struct gguf_context * ctx) { | ||||||
|     return ctx->offset; |     return ctx->offset; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void * gguf_get_data(struct gguf_context * ctx) { | void * gguf_get_data(const struct gguf_context * ctx) { | ||||||
|     return ctx->data; |     return ctx->data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int gguf_get_n_kv(struct gguf_context * ctx) { | int gguf_get_n_kv(const struct gguf_context * ctx) { | ||||||
|     return ctx->header.n_kv; |     return ctx->header.n_kv; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int gguf_find_key(struct gguf_context * ctx, const char * key) { | int gguf_find_key(const struct gguf_context * ctx, const char * key) { | ||||||
|     // return -1 if key not found
 |     // return -1 if key not found
 | ||||||
|     int keyfound = -1; |     int keyfound = -1; | ||||||
| 
 | 
 | ||||||
|  | @ -20147,85 +20155,85 @@ int gguf_find_key(struct gguf_context * ctx, const char * key) { | ||||||
|     return keyfound; |     return keyfound; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const char * gguf_get_key(struct gguf_context * ctx, int i) { | const char * gguf_get_key(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].key.data; |     return ctx->kv[i].key.data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) { | enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].type; |     return ctx->kv[i].type; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) { | enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.arr.type; |     return ctx->kv[i].value.arr.type; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const void * gguf_get_arr_data(struct gguf_context * ctx, int i) { | const void * gguf_get_arr_data(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.arr.data; |     return ctx->kv[i].value.arr.data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) { | const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) { | ||||||
|     struct gguf_kv * kv = &ctx->kv[key_id]; |     struct gguf_kv * kv = &ctx->kv[key_id]; | ||||||
|     struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; |     struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; | ||||||
|     return str->data; |     return str->data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int gguf_get_arr_n(struct gguf_context * ctx, int i) { | int gguf_get_arr_n(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.arr.n; |     return ctx->kv[i].value.arr.n; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) { | uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.uint8; |     return ctx->kv[i].value.uint8; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) { | int8_t gguf_get_val_i8(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.int8; |     return ctx->kv[i].value.int8; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) { | uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.uint16; |     return ctx->kv[i].value.uint16; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) { | int16_t gguf_get_val_i16(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.int16; |     return ctx->kv[i].value.int16; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) { | uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.uint32; |     return ctx->kv[i].value.uint32; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) { | int32_t gguf_get_val_i32(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.int32; |     return ctx->kv[i].value.int32; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| float gguf_get_val_f32(struct gguf_context * ctx, int i) { | float gguf_get_val_f32(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.float32; |     return ctx->kv[i].value.float32; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) { | uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.uint64; |     return ctx->kv[i].value.uint64; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) { | int64_t gguf_get_val_i64(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.int64; |     return ctx->kv[i].value.int64; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| double gguf_get_val_f64(struct gguf_context * ctx, int i) { | double gguf_get_val_f64(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.float64; |     return ctx->kv[i].value.float64; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool gguf_get_val_bool(struct gguf_context * ctx, int i) { | bool gguf_get_val_bool(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.bool_; |     return ctx->kv[i].value.bool_; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const char * gguf_get_val_str (struct gguf_context * ctx, int i) { | const char * gguf_get_val_str (const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->kv[i].value.str.data; |     return ctx->kv[i].value.str.data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int gguf_get_n_tensors(struct gguf_context * ctx) { | int gguf_get_n_tensors(const struct gguf_context * ctx) { | ||||||
|     return ctx->header.n_tensors; |     return ctx->header.n_tensors; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int gguf_find_tensor(struct gguf_context * ctx, const char * name) { | int gguf_find_tensor(const struct gguf_context * ctx, const char * name) { | ||||||
|     // return -1 if tensor not found
 |     // return -1 if tensor not found
 | ||||||
|     int tensorfound = -1; |     int tensorfound = -1; | ||||||
| 
 | 
 | ||||||
|  | @ -20241,11 +20249,11 @@ int gguf_find_tensor(struct gguf_context * ctx, const char * name) { | ||||||
|     return tensorfound; |     return tensorfound; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i) { | size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->infos[i].offset; |     return ctx->infos[i].offset; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| char * gguf_get_tensor_name(struct gguf_context * ctx, int i) { | char * gguf_get_tensor_name(const struct gguf_context * ctx, int i) { | ||||||
|     return ctx->infos[i].name.data; |     return ctx->infos[i].name.data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -20528,7 +20536,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si | ||||||
|     buf->offset += el_size; |     buf->offset += el_size; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) { | static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) { | ||||||
|     // write header
 |     // write header
 | ||||||
|     gguf_bwrite_el(buf, &ctx->header.magic,     sizeof(ctx->header.magic)); |     gguf_bwrite_el(buf, &ctx->header.magic,     sizeof(ctx->header.magic)); | ||||||
|     gguf_bwrite_el(buf, &ctx->header.version,   sizeof(ctx->header.version)); |     gguf_bwrite_el(buf, &ctx->header.version,   sizeof(ctx->header.version)); | ||||||
|  | @ -20643,7 +20651,7 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf, | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta) { | void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) { | ||||||
|     FILE * file = fopen(fname, "wb"); |     FILE * file = fopen(fname, "wb"); | ||||||
|     if (!file) { |     if (!file) { | ||||||
|         GGML_ASSERT(false && "failed to open file for writing"); |         GGML_ASSERT(false && "failed to open file for writing"); | ||||||
|  | @ -20660,7 +20668,7 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only | ||||||
|     fclose(file); |     fclose(file); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| size_t gguf_get_meta_size(struct gguf_context * ctx) { | size_t gguf_get_meta_size(const struct gguf_context * ctx) { | ||||||
|     // no allocs - only compute size
 |     // no allocs - only compute size
 | ||||||
|     struct gguf_buf buf = gguf_buf_init(0); |     struct gguf_buf buf = gguf_buf_init(0); | ||||||
| 
 | 
 | ||||||
|  | @ -20669,7 +20677,7 @@ size_t gguf_get_meta_size(struct gguf_context * ctx) { | ||||||
|     return buf.offset; |     return buf.offset; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void gguf_get_meta_data(struct gguf_context * ctx, void * data) { | void gguf_get_meta_data(const struct gguf_context * ctx, void * data) { | ||||||
|     struct gguf_buf buf = gguf_buf_init(16*1024); |     struct gguf_buf buf = gguf_buf_init(16*1024); | ||||||
| 
 | 
 | ||||||
|     gguf_write_to_buf(ctx, &buf, true); |     gguf_write_to_buf(ctx, &buf, true); | ||||||
|  | @ -20745,6 +20753,14 @@ int ggml_cpu_has_arm_fma(void) { | ||||||
| #endif | #endif | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | int ggml_cpu_has_metal(void) { | ||||||
|  | #if defined(GGML_USE_METAL) | ||||||
|  |     return 1; | ||||||
|  | #else | ||||||
|  |     return 0; | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
| int ggml_cpu_has_f16c(void) { | int ggml_cpu_has_f16c(void) { | ||||||
| #if defined(__F16C__) | #if defined(__F16C__) | ||||||
|     return 1; |     return 1; | ||||||
|  |  | ||||||
							
								
								
									
										74
									
								
								ggml.h
									
										
									
									
									
								
							
							
						
						
									
										74
									
								
								ggml.h
									
										
									
									
									
								
							|  | @ -195,6 +195,14 @@ | ||||||
| #    define GGML_DEPRECATED(func, hint) func | #    define GGML_DEPRECATED(func, hint) func | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | #ifndef __GNUC__ | ||||||
|  | #    define GGML_ATTRIBUTE_FORMAT(...) | ||||||
|  | #elif defined(__MINGW32__) | ||||||
|  | #    define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) | ||||||
|  | #else | ||||||
|  | #    define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| #include <stdint.h> | #include <stdint.h> | ||||||
| #include <stddef.h> | #include <stddef.h> | ||||||
| #include <stdbool.h> | #include <stdbool.h> | ||||||
|  | @ -270,7 +278,7 @@ extern "C" { | ||||||
| 
 | 
 | ||||||
| #if defined(__ARM_NEON) && defined(__CUDACC__) | #if defined(__ARM_NEON) && defined(__CUDACC__) | ||||||
|     typedef half ggml_fp16_t; |     typedef half ggml_fp16_t; | ||||||
| #elif defined(__ARM_NEON) && !defined(_MSC_VER) | #elif defined(__ARM_NEON) | ||||||
|     typedef __fp16 ggml_fp16_t; |     typedef __fp16 ggml_fp16_t; | ||||||
| #else | #else | ||||||
|     typedef uint16_t ggml_fp16_t; |     typedef uint16_t ggml_fp16_t; | ||||||
|  | @ -685,6 +693,7 @@ extern "C" { | ||||||
| 
 | 
 | ||||||
|     GGML_API const char *         ggml_get_name   (const struct ggml_tensor * tensor); |     GGML_API const char *         ggml_get_name   (const struct ggml_tensor * tensor); | ||||||
|     GGML_API struct ggml_tensor * ggml_set_name   (      struct ggml_tensor * tensor, const char * name); |     GGML_API struct ggml_tensor * ggml_set_name   (      struct ggml_tensor * tensor, const char * name); | ||||||
|  |     GGML_ATTRIBUTE_FORMAT(2, 3) | ||||||
|     GGML_API struct ggml_tensor * ggml_format_name(      struct ggml_tensor * tensor, const char * fmt, ...); |     GGML_API struct ggml_tensor * ggml_format_name(      struct ggml_tensor * tensor, const char * fmt, ...); | ||||||
| 
 | 
 | ||||||
|     //
 |     //
 | ||||||
|  | @ -1866,39 +1875,39 @@ extern "C" { | ||||||
| 
 | 
 | ||||||
|     GGML_API const char * gguf_type_name(enum gguf_type type); |     GGML_API const char * gguf_type_name(enum gguf_type type); | ||||||
| 
 | 
 | ||||||
|     GGML_API int    gguf_get_version    (struct gguf_context * ctx); |     GGML_API int    gguf_get_version    (const struct gguf_context * ctx); | ||||||
|     GGML_API size_t gguf_get_alignment  (struct gguf_context * ctx); |     GGML_API size_t gguf_get_alignment  (const struct gguf_context * ctx); | ||||||
|     GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx); |     GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx); | ||||||
|     GGML_API void * gguf_get_data       (struct gguf_context * ctx); |     GGML_API void * gguf_get_data       (const struct gguf_context * ctx); | ||||||
| 
 | 
 | ||||||
|     GGML_API int          gguf_get_n_kv(struct gguf_context * ctx); |     GGML_API int          gguf_get_n_kv(const struct gguf_context * ctx); | ||||||
|     GGML_API int          gguf_find_key(struct gguf_context * ctx, const char * key); |     GGML_API int          gguf_find_key(const struct gguf_context * ctx, const char * key); | ||||||
|     GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i); |     GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i); | ||||||
| 
 | 
 | ||||||
|     GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i); |     GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i); |     GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i); | ||||||
| 
 | 
 | ||||||
|     // results are undefined if the wrong type is used for the key
 |     // results are undefined if the wrong type is used for the key
 | ||||||
|     GGML_API uint8_t      gguf_get_val_u8  (struct gguf_context * ctx, int i); |     GGML_API uint8_t      gguf_get_val_u8  (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API int8_t       gguf_get_val_i8  (struct gguf_context * ctx, int i); |     GGML_API int8_t       gguf_get_val_i8  (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API uint16_t     gguf_get_val_u16 (struct gguf_context * ctx, int i); |     GGML_API uint16_t     gguf_get_val_u16 (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API int16_t      gguf_get_val_i16 (struct gguf_context * ctx, int i); |     GGML_API int16_t      gguf_get_val_i16 (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API uint32_t     gguf_get_val_u32 (struct gguf_context * ctx, int i); |     GGML_API uint32_t     gguf_get_val_u32 (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API int32_t      gguf_get_val_i32 (struct gguf_context * ctx, int i); |     GGML_API int32_t      gguf_get_val_i32 (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API float        gguf_get_val_f32 (struct gguf_context * ctx, int i); |     GGML_API float        gguf_get_val_f32 (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API uint64_t     gguf_get_val_u64 (struct gguf_context * ctx, int i); |     GGML_API uint64_t     gguf_get_val_u64 (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API int64_t      gguf_get_val_i64 (struct gguf_context * ctx, int i); |     GGML_API int64_t      gguf_get_val_i64 (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API double       gguf_get_val_f64 (struct gguf_context * ctx, int i); |     GGML_API double       gguf_get_val_f64 (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API bool         gguf_get_val_bool(struct gguf_context * ctx, int i); |     GGML_API bool         gguf_get_val_bool(const struct gguf_context * ctx, int i); | ||||||
|     GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i); |     GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API int          gguf_get_arr_n   (struct gguf_context * ctx, int i); |     GGML_API int          gguf_get_arr_n   (const struct gguf_context * ctx, int i); | ||||||
|     GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i); |     GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i); | ||||||
|     GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i); |     GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i); | ||||||
| 
 | 
 | ||||||
|     GGML_API int    gguf_get_n_tensors    (struct gguf_context * ctx); |     GGML_API int    gguf_get_n_tensors    (const struct gguf_context * ctx); | ||||||
|     GGML_API int    gguf_find_tensor      (struct gguf_context * ctx, const char * name); |     GGML_API int    gguf_find_tensor      (const struct gguf_context * ctx, const char * name); | ||||||
|     GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i); |     GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i); | ||||||
|     GGML_API char * gguf_get_tensor_name  (struct gguf_context * ctx, int i); |     GGML_API char * gguf_get_tensor_name  (const struct gguf_context * ctx, int i); | ||||||
| 
 | 
 | ||||||
|     // overrides existing values or adds a new one
 |     // overrides existing values or adds a new one
 | ||||||
|     GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t  val); |     GGML_API void gguf_set_val_u8  (struct gguf_context * ctx, const char * key, uint8_t  val); | ||||||
|  | @ -1943,11 +1952,11 @@ extern "C" { | ||||||
|     //
 |     //
 | ||||||
| 
 | 
 | ||||||
|     // write the entire context to a binary file
 |     // write the entire context to a binary file
 | ||||||
|     GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta); |     GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta); | ||||||
| 
 | 
 | ||||||
|     // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
 |     // get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
 | ||||||
|     GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx); |     GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx); | ||||||
|     GGML_API void   gguf_get_meta_data(struct gguf_context * ctx, void * data); |     GGML_API void   gguf_get_meta_data(const struct gguf_context * ctx, void * data); | ||||||
| 
 | 
 | ||||||
|     //
 |     //
 | ||||||
|     // system info
 |     // system info
 | ||||||
|  | @ -1961,6 +1970,7 @@ extern "C" { | ||||||
|     GGML_API int ggml_cpu_has_fma        (void); |     GGML_API int ggml_cpu_has_fma        (void); | ||||||
|     GGML_API int ggml_cpu_has_neon       (void); |     GGML_API int ggml_cpu_has_neon       (void); | ||||||
|     GGML_API int ggml_cpu_has_arm_fma    (void); |     GGML_API int ggml_cpu_has_arm_fma    (void); | ||||||
|  |     GGML_API int ggml_cpu_has_metal      (void); | ||||||
|     GGML_API int ggml_cpu_has_f16c       (void); |     GGML_API int ggml_cpu_has_f16c       (void); | ||||||
|     GGML_API int ggml_cpu_has_fp16_va    (void); |     GGML_API int ggml_cpu_has_fp16_va    (void); | ||||||
|     GGML_API int ggml_cpu_has_wasm_simd  (void); |     GGML_API int ggml_cpu_has_wasm_simd  (void); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue