sync : ggml
This commit is contained in:
		
							parent
							
								
									3246fe84d7
								
							
						
					
					
						commit
						231cff5f6f
					
				
					 21 changed files with 1422 additions and 178 deletions
				
			
		|  | @ -9,8 +9,10 @@ | |||
| #include "ggml-cuda/binbcast.cuh" | ||||
| #include "ggml-cuda/clamp.cuh" | ||||
| #include "ggml-cuda/concat.cuh" | ||||
| #include "ggml-cuda/conv-transpose-1d.cuh" | ||||
| #include "ggml-cuda/convert.cuh" | ||||
| #include "ggml-cuda/cpy.cuh" | ||||
| #include "ggml-cuda/cross-entropy-loss.cuh" | ||||
| #include "ggml-cuda/diagmask.cuh" | ||||
| #include "ggml-cuda/dmmv.cuh" | ||||
| #include "ggml-cuda/fattn.cuh" | ||||
|  | @ -29,7 +31,6 @@ | |||
| #include "ggml-cuda/tsembd.cuh" | ||||
| #include "ggml-cuda/unary.cuh" | ||||
| #include "ggml-cuda/upscale.cuh" | ||||
| #include "ggml-cuda/conv-transpose-1d.cuh" | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
|  | @ -2181,6 +2182,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg | |||
|         case GGML_OP_ADD: | ||||
|             ggml_cuda_op_add(ctx, dst); | ||||
|             break; | ||||
|         case GGML_OP_SUB: | ||||
|             ggml_cuda_op_sub(ctx, dst); | ||||
|             break; | ||||
|         case GGML_OP_ACC: | ||||
|             ggml_cuda_op_acc(ctx, dst); | ||||
|             break; | ||||
|  | @ -2267,6 +2271,12 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg | |||
|         case GGML_OP_SQRT: | ||||
|             ggml_cuda_op_sqrt(ctx, dst); | ||||
|             break; | ||||
|         case GGML_OP_SIN: | ||||
|             ggml_cuda_op_sin(ctx, dst); | ||||
|             break; | ||||
|         case GGML_OP_COS: | ||||
|             ggml_cuda_op_cos(ctx, dst); | ||||
|             break; | ||||
|         case GGML_OP_CLAMP: | ||||
|             ggml_cuda_op_clamp(ctx, dst); | ||||
|             break; | ||||
|  | @ -2303,6 +2313,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg | |||
|         case GGML_OP_FLASH_ATTN_EXT: | ||||
|             ggml_cuda_flash_attn_ext(ctx, dst); | ||||
|             break; | ||||
|         case GGML_OP_CROSS_ENTROPY_LOSS: | ||||
|             ggml_cuda_cross_entropy_loss(ctx, dst); | ||||
|             break; | ||||
|         default: | ||||
|             return false; | ||||
|     } | ||||
|  | @ -2610,6 +2623,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t | |||
|                 assert(node->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device)); | ||||
|                 for (int j = 0; j < GGML_MAX_SRC; j++) { | ||||
|                     if (node->src[j] != nullptr) { | ||||
|                         assert(node->src[j]->buffer); | ||||
|                         assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) || ggml_backend_buffer_is_cuda_split(node->src[j]->buffer)); | ||||
|                     } | ||||
|                 } | ||||
|  | @ -2853,12 +2867,15 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons | |||
|         case GGML_OP_TRANSPOSE: | ||||
|         case GGML_OP_NORM: | ||||
|         case GGML_OP_ADD: | ||||
|         case GGML_OP_SUB: | ||||
|         case GGML_OP_MUL: | ||||
|         case GGML_OP_DIV: | ||||
|         case GGML_OP_RMS_NORM: | ||||
|         case GGML_OP_SCALE: | ||||
|         case GGML_OP_SQR: | ||||
|         case GGML_OP_SQRT: | ||||
|         case GGML_OP_SIN: | ||||
|         case GGML_OP_COS: | ||||
|         case GGML_OP_CLAMP: | ||||
|         case GGML_OP_CONT: | ||||
|         case GGML_OP_DIAG_MASK_INF: | ||||
|  | @ -2890,6 +2907,8 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons | |||
|             } | ||||
|             return ggml_cuda_info().devices[cuda_ctx->device].cc >= CC_VOLTA && | ||||
|                 op->src[1]->type == GGML_TYPE_F16 && op->src[2]->type == GGML_TYPE_F16; | ||||
|         case GGML_OP_CROSS_ENTROPY_LOSS: | ||||
|             return true; | ||||
| #endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) | ||||
|         default: | ||||
|             return false; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue