cuda : fix jetson compile error (#4560)

* fix old jetson compile error

* Update Makefile

* update jetson detect and cuda version detect

* update cuda marco define

* update makefile and cuda,fix some issue

* Update README.md

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update Makefile

* Update README.md

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
FantasyGmm 2023-12-22 23:11:12 +08:00 committed by GitHub
parent 6724ef1657
commit a55876955b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 5 deletions

View file

@ -3677,7 +3677,7 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
const uint8x16_t mins = vshrq_n_u8(mins_and_scales, 4);
const ggml_int16x8x2_t q8sums = ggml_vld1q_s16_x2(y[i].bsums);
const ggml_int16x8x2_t mins16 = {vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mins))), vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mins)))};
const ggml_int16x8x2_t mins16 = {{vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(mins))), vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(mins)))}};
const int32x4_t s0 = vaddq_s32(vmull_s16(vget_low_s16 (mins16.val[0]), vget_low_s16 (q8sums.val[0])),
vmull_s16(vget_high_s16(mins16.val[0]), vget_high_s16(q8sums.val[0])));
const int32x4_t s1 = vaddq_s32(vmull_s16(vget_low_s16 (mins16.val[1]), vget_low_s16 (q8sums.val[1])),
@ -6626,7 +6626,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
const ggml_int16x8x2_t q8sums = ggml_vld1q_s16_x2(y[i].bsums);
const int8x16_t scales = vld1q_s8(scale);
const ggml_int16x8x2_t q6scales = {vmovl_s8(vget_low_s8(scales)), vmovl_s8(vget_high_s8(scales))};
const ggml_int16x8x2_t q6scales = {{vmovl_s8(vget_low_s8(scales)), vmovl_s8(vget_high_s8(scales))}};
const int32x4_t prod = vaddq_s32(vaddq_s32(vmull_s16(vget_low_s16 (q8sums.val[0]), vget_low_s16 (q6scales.val[0])),
vmull_s16(vget_high_s16(q8sums.val[0]), vget_high_s16(q6scales.val[0]))),