cmake : enable warnings in llama (#10474)

* cmake : enable warnings in llama ggml-ci * cmake : add llama_get_flags and respect LLAMA_FATAL_WARNINGS * cmake : get_flags -> ggml_get_flags * speculative-simple : fix warnings * cmake : reuse ggml_get_flags ggml-ci * speculative-simple : fix compile warning ggml-ci
2024-11-26 14:18:08 +02:00 · 2024-11-26 14:18:08 +02:00 · ab96610b1e
commit ab96610b1e
parent 7db3846a94
8 changed files with 49 additions and 6 deletions
--- a/examples/speculative-simple/speculative-simple.cpp
+++ b/examples/speculative-simple/speculative-simple.cpp
@ -70,13 +70,13 @@ int main(int argc, char ** argv) {
    std::vector<llama_token> inp;
    inp = common_tokenize(ctx_tgt, params.prompt, true, true);

-    if (llama_n_ctx(ctx_tgt) < (int) inp.size()) {
+    if (llama_n_ctx(ctx_tgt) < (uint32_t) inp.size()) {
        LOG_ERR("%s: the prompt exceeds the context size (%d tokens, ctx %d)\n", __func__, (int) inp.size(), llama_n_ctx(ctx_tgt));

        return 1;
    }

-    if (llama_n_batch(ctx_tgt) < (int) inp.size()) {
+    if (llama_n_batch(ctx_tgt) < (uint32_t) inp.size()) {
        LOG_ERR("%s: the prompt exceeds the batch size (%d tokens, batch %d)\n", __func__, (int) inp.size(), llama_n_batch(ctx_tgt));

        return 1;
@ -155,7 +155,7 @@ int main(int argc, char ** argv) {
        // evaluate the target model on [id_last, draft0, draft1, ..., draftN-1]
        {
            // do not waste time on small drafts
-            if (draft.size() < n_draft_min) {
+            if (draft.size() < (size_t) n_draft_min) {
                draft.clear();
            }