diff --git a/common/train.h b/common/train.h index 0545be288..d86c93cc4 100644 --- a/common/train.h +++ b/common/train.h @@ -44,7 +44,7 @@ struct train_params_common { int n_batch; int n_gradient_accumulation; int n_epochs; - int n_gpu_layers; + int n_gpu_layers; bool custom_n_ctx; diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp index 29354db20..60c7faa79 100644 --- a/examples/finetune/finetune.cpp +++ b/examples/finetune/finetune.cpp @@ -1459,16 +1459,16 @@ static bool train_params_parse(int argc, char ** argv, struct train_params * par } params->n_rank_w3 = std::stoi(argv[i]); params->custom_n_rank_w3 = true; - } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") { - if (++i >= argc) { - invalid_param = true; - break; - } + } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") { + if (++i >= argc) { + invalid_param = true; + break; + } #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD - params->common.n_gpu_layers = std::stoi(argv[i]); + params->common.n_gpu_layers = std::stoi(argv[i]); #else - fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); - fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + fprintf(stderr, "warning: not compiled with GPU offload support, --n-gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); #endif } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); @@ -1556,7 +1556,7 @@ int main(int argc, char ** argv) { srand(params.common.seed); struct llama_model_params llama_mparams = llama_model_default_params(); - llama_mparams.n_gpu_layers = params.common.n_gpu_layers; + llama_mparams.n_gpu_layers = params.common.n_gpu_layers; llama_mparams.vocab_only = false; printf("%s: model base = '%s'\n", __func__, params.fn_model_base); diff --git a/examples/finetune/finetune.sh b/examples/finetune/finetune.sh index f567d4736..079bfa113 100644 --- a/examples/finetune/finetune.sh +++ b/examples/finetune/finetune.sh @@ -24,7 +24,7 @@ done $DEBUGGER $EXE \ --model-base $MODEL \ - $GPUARG \ + $GPUARG \ --checkpoint-in chk-ol3b-shakespeare-LATEST.gguf \ --checkpoint-out chk-ol3b-shakespeare-ITERATION.gguf \ --lora-out lora-ol3b-shakespeare-ITERATION.bin \ diff --git a/ggml.c b/ggml.c index a0501fbdd..96e0fad6f 100644 --- a/ggml.c +++ b/ggml.c @@ -1276,7 +1276,7 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int __riscv_vse8_v_i8m1(y[i].qs , vs, vl); } #else - UNUSED(nb); + UNUSED(nb); // scalar quantize_row_q8_0_reference(x, y, k); #endif @@ -1530,7 +1530,7 @@ static void quantize_row_q8_1(const float * restrict x, void * restrict vy, int y[i].s = sum*d; } #else - UNUSED(nb); + UNUSED(nb); // scalar quantize_row_q8_1_reference(x, y, k); #endif @@ -9359,13 +9359,13 @@ static void ggml_compute_forward_add_f16_f32( GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); - if (dst->type == GGML_TYPE_F32) { - GGML_ASSERT( nb0 == sizeof(float)); - } - else { - GGML_ASSERT(dst->type == GGML_TYPE_F16); - GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); - } + if (dst->type == GGML_TYPE_F32) { + GGML_ASSERT( nb0 == sizeof(float)); + } + else { + GGML_ASSERT(dst->type == GGML_TYPE_F16); + GGML_ASSERT( nb0 == sizeof(ggml_fp16_t)); + } GGML_ASSERT(nb00 == sizeof(ggml_fp16_t)); @@ -9377,37 +9377,37 @@ static void ggml_compute_forward_add_f16_f32( const int ir1 = MIN(ir0 + dr, nr); if (nb10 == sizeof(float)) { - if (dst->type == GGML_TYPE_F16) { - for (int ir = ir0; ir < ir1; ++ir) { - // src0, src1 and dst are same shape => same indices - const int i3 = ir/(ne2*ne1); - const int i2 = (ir - i3*ne2*ne1)/ne1; - const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + if (dst->type == GGML_TYPE_F16) { + for (int ir = ir0; ir < ir1; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); - ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); - ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); - float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); + ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); + ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); - for (int i = 0; i < ne0; i++) { - dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]); - } - } - } else { - for (int ir = ir0; ir < ir1; ++ir) { - // src0, src1 and dst are same shape => same indices - const int i3 = ir/(ne2*ne1); - const int i2 = (ir - i3*ne2*ne1)/ne1; - const int i1 = (ir - i3*ne2*ne1 - i2*ne1); + for (int i = 0; i < ne0; i++) { + dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]); + } + } + } else { + for (int ir = ir0; ir < ir1; ++ir) { + // src0, src1 and dst are same shape => same indices + const int i3 = ir/(ne2*ne1); + const int i2 = (ir - i3*ne2*ne1)/ne1; + const int i1 = (ir - i3*ne2*ne1 - i2*ne1); - float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); - ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); - float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); + float * dst_ptr = (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); + ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); + float * src1_ptr = (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11); - for (int i = 0; i < ne0; i++) { - dst_ptr[i] = GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]; - } - } - } + for (int i = 0; i < ne0; i++) { + dst_ptr[i] = GGML_FP16_TO_FP32(src0_ptr[i]) + src1_ptr[i]; + } + } + } } else { // src1 is not contiguous