cmake : enable warnings in llama (#10474)

* cmake : enable warnings in llama

ggml-ci

* cmake : add llama_get_flags and respect LLAMA_FATAL_WARNINGS

* cmake : get_flags -> ggml_get_flags

* speculative-simple : fix warnings

* cmake : reuse ggml_get_flags

ggml-ci

* speculative-simple : fix compile warning

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-11-26 14:18:08 +02:00 committed by GitHub
parent 7db3846a94
commit ab96610b1e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 49 additions and 6 deletions

View file

@ -70,13 +70,13 @@ int main(int argc, char ** argv) {
std::vector<llama_token> inp;
inp = common_tokenize(ctx_tgt, params.prompt, true, true);
if (llama_n_ctx(ctx_tgt) < (int) inp.size()) {
if (llama_n_ctx(ctx_tgt) < (uint32_t) inp.size()) {
LOG_ERR("%s: the prompt exceeds the context size (%d tokens, ctx %d)\n", __func__, (int) inp.size(), llama_n_ctx(ctx_tgt));
return 1;
}
if (llama_n_batch(ctx_tgt) < (int) inp.size()) {
if (llama_n_batch(ctx_tgt) < (uint32_t) inp.size()) {
LOG_ERR("%s: the prompt exceeds the batch size (%d tokens, batch %d)\n", __func__, (int) inp.size(), llama_n_batch(ctx_tgt));
return 1;
@ -155,7 +155,7 @@ int main(int argc, char ** argv) {
// evaluate the target model on [id_last, draft0, draft1, ..., draftN-1]
{
// do not waste time on small drafts
if (draft.size() < n_draft_min) {
if (draft.size() < (size_t) n_draft_min) {
draft.clear();
}