diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 000000000..1a42b9abc --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,18 @@ +--- +Checks: > + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-narrowing-conversions, + readability-*, + -readability-avoid-unconditional-preprocessor-if, + -readability-function-cognitive-complexity, + -readability-identifier-length, + -readability-implicit-bool-conversion, + -readability-magic-numbers, + -readability-uppercase-literal-suffix, + clang-analyzer-*, + -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, + performance-*, + portability-*, +FormatStyle: none diff --git a/.github/workflows/tidy-post.yml b/.github/workflows/tidy-post.yml new file mode 100644 index 000000000..a58da0cd6 --- /dev/null +++ b/.github/workflows/tidy-post.yml @@ -0,0 +1,20 @@ +name: clang-tidy review post comments + +on: + workflow_run: + workflows: ["clang-tidy-review"] + types: + - completed + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: ZedThree/clang-tidy-review/post@v0.13.0 + # lgtm_comment_body, max_comments, and annotations need to be set on the posting workflow in a split setup + with: + # adjust options as necessary + lgtm_comment_body: '' + annotations: false + max_comments: 25 diff --git a/.github/workflows/tidy-review.yml b/.github/workflows/tidy-review.yml new file mode 100644 index 000000000..a4bc8d976 --- /dev/null +++ b/.github/workflows/tidy-review.yml @@ -0,0 +1,23 @@ +name: clang-tidy-review + +on: + pull_request: + branches: + - master + +jobs: + clang-tidy-review: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - uses: ZedThree/clang-tidy-review@v0.13.0 + id: review + with: + lgtm_comment_body: '' + build_dir: build + cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on + split_workflow: true + + - uses: ZedThree/clang-tidy-review/upload@v0.13.0 diff --git a/examples/common.cpp b/examples/common.cpp index f3085b08e..80e35d2e9 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -91,9 +91,13 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { bool escape_prompt = false; std::string arg; gpt_params default_params; + const std::string arg_prefix = "--"; for (int i = 1; i < argc; i++) { arg = argv[i]; + if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { + std::replace(arg.begin(), arg.end(), '_', '-'); + } if (arg == "-s" || arg == "--seed") { #if defined(GGML_USE_CUBLAS) @@ -141,27 +145,27 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { if (params.prompt.back() == '\n') { params.prompt.pop_back(); } - } else if (arg == "-n" || arg == "--n_predict") { + } else if (arg == "-n" || arg == "--n-predict") { if (++i >= argc) { invalid_param = true; break; } params.n_predict = std::stoi(argv[i]); - } else if (arg == "--top_k") { + } else if (arg == "--top-k") { if (++i >= argc) { invalid_param = true; break; } params.top_k = std::stoi(argv[i]); - } else if (arg == "-c" || arg == "--ctx_size") { + } else if (arg == "-c" || arg == "--ctx-size") { if (++i >= argc) { invalid_param = true; break; } params.n_ctx = std::stoi(argv[i]); - } else if (arg == "--memory_f32") { + } else if (arg == "--memory-f32") { params.memory_f16 = false; - } else if (arg == "--top_p") { + } else if (arg == "--top-p") { if (++i >= argc) { invalid_param = true; break; @@ -185,25 +189,25 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { break; } params.typical_p = std::stof(argv[i]); - } else if (arg == "--repeat_last_n") { + } else if (arg == "--repeat-last-n") { if (++i >= argc) { invalid_param = true; break; } params.repeat_last_n = std::stoi(argv[i]); - } else if (arg == "--repeat_penalty") { + } else if (arg == "--repeat-penalty") { if (++i >= argc) { invalid_param = true; break; } params.repeat_penalty = std::stof(argv[i]); - } else if (arg == "--frequency_penalty") { + } else if (arg == "--frequency-penalty") { if (++i >= argc) { invalid_param = true; break; } params.frequency_penalty = std::stof(argv[i]); - } else if (arg == "--presence_penalty") { + } else if (arg == "--presence-penalty") { if (++i >= argc) { invalid_param = true; break; @@ -215,19 +219,19 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { break; } params.mirostat = std::stoi(argv[i]); - } else if (arg == "--mirostat_lr") { + } else if (arg == "--mirostat-lr") { if (++i >= argc) { invalid_param = true; break; } params.mirostat_eta = std::stof(argv[i]); - } else if (arg == "--mirostat_ent") { + } else if (arg == "--mirostat-ent") { if (++i >= argc) { invalid_param = true; break; } params.mirostat_tau = std::stof(argv[i]); - } else if (arg == "-b" || arg == "--batch_size") { + } else if (arg == "-b" || arg == "--batch-size") { if (++i >= argc) { invalid_param = true; break; @@ -310,7 +314,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - } else if (arg == "--n_parts") { + } else if (arg == "--n-parts") { if (++i >= argc) { invalid_param = true; break; @@ -384,31 +388,31 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n"); fprintf(stderr, " -f FNAME, --file FNAME\n"); fprintf(stderr, " prompt file to start generation.\n"); - fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict); - fprintf(stderr, " --top_k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); - fprintf(stderr, " --top_p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); + fprintf(stderr, " -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity)\n", params.n_predict); + fprintf(stderr, " --top-k N top-k sampling (default: %d, 0 = disabled)\n", params.top_k); + fprintf(stderr, " --top-p N top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); fprintf(stderr, " --tfs N tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); fprintf(stderr, " --typical N locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)params.typical_p); - fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); - fprintf(stderr, " --repeat_penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); - fprintf(stderr, " --presence_penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); - fprintf(stderr, " --frequency_penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); + fprintf(stderr, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", params.repeat_last_n); + fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)params.repeat_penalty); + fprintf(stderr, " --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)params.presence_penalty); + fprintf(stderr, " --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)params.frequency_penalty); fprintf(stderr, " --mirostat N use Mirostat sampling.\n"); fprintf(stderr, " Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"); fprintf(stderr, " (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", params.mirostat); - fprintf(stderr, " --mirostat_lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); - fprintf(stderr, " --mirostat_ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); + fprintf(stderr, " --mirostat-lr N Mirostat learning rate, parameter eta (default: %.1f)\n", (double)params.mirostat_eta); + fprintf(stderr, " --mirostat-ent N Mirostat target entropy, parameter tau (default: %.1f)\n", (double)params.mirostat_tau); fprintf(stderr, " -l TOKEN_ID(+/-)BIAS, --logit-bias TOKEN_ID(+/-)BIAS\n"); fprintf(stderr, " modifies the likelihood of token appearing in the completion,\n"); fprintf(stderr, " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"); fprintf(stderr, " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); - fprintf(stderr, " -c N, --ctx_size N size of the prompt context (default: %d)\n", params.n_ctx); + fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx); fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); fprintf(stderr, " --no-penalize-nl do not penalize newline token\n"); - fprintf(stderr, " --memory_f32 use f32 instead of f16 for memory key+value\n"); + fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n"); fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp); - fprintf(stderr, " --n_parts N number of model parts (default: -1 = determine from dimensions)\n"); - fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); + fprintf(stderr, " --n-parts N number of model parts (default: -1 = determine from dimensions)\n"); + fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " --perplexity compute perplexity over the prompt\n"); fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); if (llama_mlock_supported()) { diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index e4b729128..bb3fd50a9 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -56,9 +56,6 @@ int main(int argc, char ** argv) { // tokenize the prompt auto embd_inp = ::llama_tokenize(ctx, params.prompt, true); - // determine newline token - auto llama_token_newline = ::llama_tokenize(ctx, "\n", false); - if (params.verbose_prompt) { fprintf(stderr, "\n"); fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index bd1c4ab55..8543414dd 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -121,7 +121,7 @@ int main(int argc, char ** argv) { // uncomment the "used_mem" line in llama.cpp to see the results if (params.mem_test) { { - const std::vector tmp(params.n_batch, 0); + const std::vector tmp(params.n_batch, llama_token_bos()); llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads); } diff --git a/ggml-opencl.c b/ggml-opencl.c index 74b0294db..31ab13b25 100644 --- a/ggml-opencl.c +++ b/ggml-opencl.c @@ -358,4 +358,4 @@ void ggml_cl_sgemm_wrapper( clWaitForEvents(1, &ev_c); clReleaseEvent(ev_sgemm); clReleaseEvent(ev_c); -} \ No newline at end of file +} diff --git a/llama.cpp b/llama.cpp index 8f600c77f..cf28ef82b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2466,8 +2466,8 @@ size_t llama_get_state_size(const struct llama_context * ctx) { } // Copies the state to the specified destination address -size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dest) { - uint8_t * out = dest; +size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) { + uint8_t * out = dst; // copy rng { @@ -2527,7 +2527,9 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dest) { if (kv_size) { const size_t elt_size = ggml_element_size(kv_self.k); + char buffer[4096]; + ggml_context * cpy_ctx = ggml_init({ sizeof(buffer), buffer, /* no_alloc */ true }); ggml_cgraph gf{}; gf.n_threads = 1; @@ -2551,10 +2553,12 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dest) { ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d)); ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d)); ggml_graph_compute(cpy_ctx, &gf); + + ggml_free(cpy_ctx); } } - const size_t written = out - dest; + const size_t written = out - dst; const size_t max_size = llama_get_state_size(ctx); LLAMA_ASSERT(written <= max_size); @@ -2564,15 +2568,15 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dest) { // Sets the state reading from the specified source address size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src) { - const uint8_t * in = src; + const uint8_t * inp = src; // set rng { size_t rng_size; char rng_buf[LLAMA_MAX_RNG_STATE]; - memcpy(&rng_size, in, sizeof(rng_size)); in += sizeof(rng_size); - memcpy(&rng_buf[0], in, LLAMA_MAX_RNG_STATE); in += LLAMA_MAX_RNG_STATE; + memcpy(&rng_size, inp, sizeof(rng_size)); inp += sizeof(rng_size); + memcpy(&rng_buf[0], inp, LLAMA_MAX_RNG_STATE); inp += LLAMA_MAX_RNG_STATE; std::stringstream rng_ss; rng_ss.str(std::string(&rng_buf[0], rng_size)); @@ -2586,30 +2590,30 @@ size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src) { size_t logits_cap; size_t logits_size; - memcpy(&logits_cap, in, sizeof(logits_cap)); in += sizeof(logits_cap); - memcpy(&logits_size, in, sizeof(logits_size)); in += sizeof(logits_size); + memcpy(&logits_cap, inp, sizeof(logits_cap)); inp += sizeof(logits_cap); + memcpy(&logits_size, inp, sizeof(logits_size)); inp += sizeof(logits_size); LLAMA_ASSERT(ctx->logits.capacity() == logits_cap); if (logits_size) { ctx->logits.resize(logits_size); - memcpy(ctx->logits.data(), in, logits_size * sizeof(float)); + memcpy(ctx->logits.data(), inp, logits_size * sizeof(float)); } - in += logits_cap * sizeof(float); + inp += logits_cap * sizeof(float); } // set embeddings { size_t embedding_size; - memcpy(&embedding_size, in, sizeof(embedding_size)); in += sizeof(embedding_size); + memcpy(&embedding_size, inp, sizeof(embedding_size)); inp += sizeof(embedding_size); LLAMA_ASSERT(ctx->embedding.capacity() == embedding_size); if (embedding_size) { - memcpy(ctx->embedding.data(), in, embedding_size * sizeof(float)); - in += embedding_size * sizeof(float); + memcpy(ctx->embedding.data(), inp, embedding_size * sizeof(float)); + inp += embedding_size * sizeof(float); } } @@ -2624,25 +2628,27 @@ size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src) { size_t kv_size; int kv_ntok; - memcpy(&kv_size, in, sizeof(kv_size)); in += sizeof(kv_size); - memcpy(&kv_ntok, in, sizeof(kv_ntok)); in += sizeof(kv_ntok); + memcpy(&kv_size, inp, sizeof(kv_size)); inp += sizeof(kv_size); + memcpy(&kv_ntok, inp, sizeof(kv_ntok)); inp += sizeof(kv_ntok); if (kv_size) { LLAMA_ASSERT(kv_self.buf.size == kv_size); const size_t elt_size = ggml_element_size(kv_self.k); + char buffer[4096]; + ggml_context * cpy_ctx = ggml_init({ sizeof(buffer), buffer, /* no_alloc */ true }); ggml_cgraph gf{}; gf.n_threads = 1; ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_ntok, n_layer); - kin3d->data = (void *) in; - in += ggml_nbytes(kin3d); + kin3d->data = (void *) inp; + inp += ggml_nbytes(kin3d); ggml_tensor * vin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, kv_ntok, n_embd, n_layer); - vin3d->data = (void *) in; - in += ggml_nbytes(vin3d); + vin3d->data = (void *) inp; + inp += ggml_nbytes(vin3d); ggml_tensor * k3d = ggml_view_3d(cpy_ctx, kv_self.k, n_embd, kv_ntok, n_layer, @@ -2655,12 +2661,14 @@ size_t llama_set_state_data(struct llama_context * ctx, const uint8_t * src) { ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d)); ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d)); ggml_graph_compute(cpy_ctx, &gf); + + ggml_free(cpy_ctx); } ctx->model.kv_self.n = kv_ntok; } - const size_t nread = in - src; + const size_t nread = inp - src; const size_t max_size = llama_get_state_size(ctx); LLAMA_ASSERT(nread <= max_size); diff --git a/llama.h b/llama.h index 195f4e9fd..8299e9485 100644 --- a/llama.h +++ b/llama.h @@ -134,7 +134,7 @@ extern "C" { // Copies the state to the specified destination address. // Destination needs to have allocated enough memory. // Returns the number of bytes copied - LLAMA_API size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dest); + LLAMA_API size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst); // Set the state reading from the specified address // Returns the number of bytes read