diff --git a/common/common.cpp b/common/common.cpp index 459d352ae..e2f6656a6 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1056,7 +1056,7 @@ std::string get_system_info(const gpt_params & params) { } std::string gpt_random_prompt(std::mt19937 & rng) { - const int r = static_cast(rng() % 10); + const int r = int(rng() % 10); switch (r) { case 0: return "So"; case 1: return "Once upon a time"; diff --git a/common/console.cpp b/common/console.cpp index 957416fd2..f48748a1a 100644 --- a/common/console.cpp +++ b/common/console.cpp @@ -250,7 +250,7 @@ namespace console { return expectedWidth; } COORD initialPosition = bufferInfo.dwCursorPosition; - DWORD nNumberOfChars = static_cast(length); + DWORD nNumberOfChars = DWORD(length); WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL); CONSOLE_SCREEN_BUFFER_INFO newBufferInfo; @@ -404,7 +404,7 @@ namespace console { } while (count == 0 && !widths.empty()); } } else { - int offset = static_cast(line.length()); + int offset = int(line.length()); append_utf8(input_char, line); int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char)); if (width < 0) { diff --git a/common/sampling.cpp b/common/sampling.cpp index 62e636669..8ff2009af 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -73,7 +73,7 @@ llama_token llama_sampling_last(llama_sampling_context * ctx) { } std::string llama_sampling_prev_str(llama_sampling_context * ctx_sampling, llama_context * ctx_main, int n) { - const int size = static_cast(ctx_sampling->prev.size()); + const int size = int(ctx_sampling->prev.size()); n = std::min(n, size); diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 87b32ddf2..e523961fb 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -70,7 +70,7 @@ int main(int argc, char ** argv) { std::vector tokens_list; tokens_list = ::llama_tokenize(model, params.prompt, true); - const int n_kv_req = static_cast(tokens_list.size() + (n_len - tokens_list.size())*n_parallel); + const int n_kv_req = int(tokens_list.size() + (n_len - tokens_list.size())*n_parallel); // initialize the context @@ -112,11 +112,11 @@ int main(int argc, char ** argv) { // create a llama_batch // we use this object to submit token data for decoding - llama_batch batch = llama_batch_init(std::max(static_cast(tokens_list.size()), n_parallel), 0, 1); + llama_batch batch = llama_batch_init(std::max(int32_t(tokens_list.size()), n_parallel), 0, 1); // evaluate the initial prompt for (size_t i = 0; i < tokens_list.size(); ++i) { - llama_batch_add(batch, tokens_list[i], static_cast(i), { 0 }, false); + llama_batch_add(batch, tokens_list[i], llama_pos(i), { 0 }, false); } GGML_ASSERT(batch.n_tokens == (int) tokens_list.size()); diff --git a/examples/beam-search/beam-search.cpp b/examples/beam-search/beam-search.cpp index d3335abb5..799b345e8 100644 --- a/examples/beam-search/beam-search.cpp +++ b/examples/beam-search/beam-search.cpp @@ -160,12 +160,12 @@ int main(int argc, char ** argv) int n_past = 0; - if (llama_decode(ctx, llama_batch_get_one(tokens_list.data(), static_cast(tokens_list.size()), n_past, 0))) + if (llama_decode(ctx, llama_batch_get_one(tokens_list.data(), int32_t(tokens_list.size()), n_past, 0))) { fprintf(stderr, "%s : failed to eval prompt.\n" , __func__ ); return 1; } - n_past += static_cast(tokens_list.size()); + n_past += int(tokens_list.size()); beam_search_callback_data callback_data{ctx, {}}; size_t const beam_width = static_cast(params.n_beams); diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 35a0cb912..efc15e676 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -64,8 +64,8 @@ int main(int argc, char ** argv) { fprintf(stderr, "\n"); fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i : embd_inp) { - fprintf(stderr, "%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); + for (int embd : embd_inp) { + fprintf(stderr, "%6d -> '%s'\n", embd, llama_token_to_piece(ctx, embd).c_str()); } fprintf(stderr, "\n"); } diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 4cf05f7c8..bfb7b4579 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -217,7 +217,7 @@ static std::vector softmax(const std::vector& logits) { probs[i] = exp_logit; } for (float& prob : probs) { - prob /= static_cast(sum_exp); + prob /= float(sum_exp); } return probs; } diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index bebed4433..5889164b0 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -313,16 +313,16 @@ int main(int argc, char ** argv) { LOG_TEE("\n"); LOG_TEE("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); LOG_TEE("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i : embd_inp) { - LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); + for (int embd : embd_inp) { + LOG_TEE("%6d -> '%s'\n", embd, llama_token_to_piece(ctx, embd).c_str()); } if (ctx_guidance) { LOG_TEE("\n"); LOG_TEE("%s: negative prompt: '%s'\n", __func__, sparams.cfg_negative_prompt.c_str()); LOG_TEE("%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size()); - for (int i : guidance_inp) { - LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); + for (int inp : guidance_inp) { + LOG_TEE("%6d -> '%s'\n", inp, llama_token_to_piece(ctx, inp).c_str()); } } diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 4cf432496..a6437ac16 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -76,7 +76,7 @@ static T stdev(const std::vector & v) { } T mean = avg(v); T sq_sum = std::inner_product(v.begin(), v.end(), v.begin(), T(0)); - T stdev = static_cast(std::sqrt(sq_sum / (T)(v.size() - 1) - mean * mean * (T)v.size() / (T)(v.size() - 1))); + T stdev = T(std::sqrt(sq_sum / (T)(v.size() - 1) - mean * mean * (T)v.size() / (T)(v.size() - 1))); return stdev; } diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index c4a18748f..461663991 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -381,7 +381,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 //const int n_intermediate = hparams.n_intermediate; //const int projection_dim = hparams.projection_dim; const float eps = hparams.eps; - int batch_size = static_cast(imgs->size); + int batch_size = int(imgs->size); if (ctx->has_llava_projector) { GGML_ASSERT(batch_size == 1); } @@ -607,8 +607,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 // hardswish struct ggml_tensor * block_1_hw = ggml_hardswish(ctx0, block_1); - block_1 = ggml_pool_2d(ctx0, block_1_hw, GGML_OP_POOL_AVG, static_cast(block_1_hw->ne[0]), static_cast(block_1_hw->ne[1]), - static_cast(block_1_hw->ne[0]), static_cast(block_1_hw->ne[1]), 0, 0); + block_1 = ggml_pool_2d(ctx0, block_1_hw, GGML_OP_POOL_AVG, int(block_1_hw->ne[0]), int(block_1_hw->ne[1]), + int(block_1_hw->ne[0]), int(block_1_hw->ne[1]), 0, 0); // block_1 shape = [1, 2048, 1, 1], ne = [1, 1, 2048, 1] // pointwise conv block_1 = ggml_reshape_2d(ctx0, block_1, block_1->ne[0]*block_1->ne[1]*block_1->ne[2], block_1->ne[3]); @@ -622,8 +622,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 block_1 = ggml_reshape_4d(ctx0, block_1, 1, 1, block_1->ne[0], block_1->ne[1]); block_1 = ggml_mul(ctx0, block_1_hw, block_1); - int w = static_cast(block_1->ne[0]); - int h = static_cast(block_1->ne[1]); + int w = int(block_1->ne[0]); + int h = int(block_1->ne[1]); block_1 = ggml_reshape_3d(ctx0, block_1, w*h, block_1->ne[2], block_1->ne[3]); block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 0, 2, 3)); @@ -657,8 +657,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 struct ggml_tensor * block_1_hw = ggml_hardswish(ctx0, block_1); // not sure the parameters is right for globalAvgPooling - block_1 = ggml_pool_2d(ctx0, block_1_hw, GGML_OP_POOL_AVG, static_cast(block_1_hw->ne[0]), static_cast(block_1_hw->ne[1]), - static_cast(block_1_hw->ne[0]), static_cast(block_1_hw->ne[1]), 0, 0); + block_1 = ggml_pool_2d(ctx0, block_1_hw, GGML_OP_POOL_AVG, int(block_1_hw->ne[0]), int(block_1_hw->ne[1]), + int(block_1_hw->ne[0]), int(block_1_hw->ne[1]), 0, 0); // block_1 shape = [1, 2048, 1, 1], ne = [1, 1, 2048, 1] // pointwise conv block_1 = ggml_reshape_2d(ctx0, block_1, block_1->ne[0]*block_1->ne[1]*block_1->ne[2], block_1->ne[3]); @@ -673,8 +673,8 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 block_1 = ggml_reshape_4d(ctx0, block_1, 1, 1, block_1->ne[0], block_1->ne[1]); block_1 = ggml_mul(ctx0, block_1_hw, block_1); - int w = static_cast(block_1->ne[0]); - int h = static_cast(block_1->ne[1]); + int w = int(block_1->ne[0]); + int h = int(block_1->ne[1]); block_1 = ggml_reshape_3d(ctx0, block_1, w*h, block_1->ne[2], block_1->ne[3]); block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 0, 2, 3)); // block_1 shape = [1, 24*24, 2048], ne = [24*24, 2048, 1] @@ -906,7 +906,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) { clip_free(new_clip); return nullptr; } - int num_bytes = static_cast(ggml_nbytes(cur)); + int num_bytes = int(ggml_nbytes(cur)); if (ggml_backend_buffer_is_host(new_clip->params_buffer)) { // for the CPU and Metal backend, we can read directly into the tensor fin.read(reinterpret_cast(cur->data), num_bytes); @@ -1074,7 +1074,7 @@ bool clip_image_load_from_file(const char * fname, clip_image_u8 * img) { bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img) { int nx, ny, nc; - auto * data = stbi_load_from_memory(bytes, static_cast(bytes_length), &nx, &ny, &nc, 3); + auto * data = stbi_load_from_memory(bytes, int(bytes_length), &nx, &ny, &nc, 3); if (!data) { fprintf(stderr, "%s: failed to decode image bytes\n", __func__); return false; @@ -1174,7 +1174,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli const float v = v0 * (1.0f - dy) + v1 * dy; - const uint8_t v2 = static_cast(std::min(std::max(std::round(v), 0.0f), 255.0f)); + const uint8_t v2 = std::uint8_t(std::min(std::max(std::round(v), 0.0f), 255.0f)); const int i = 3 * (y * nx3 + x) + c; @@ -1212,7 +1212,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima return false; } - int batch_size = static_cast(imgs->size); + int batch_size = int(imgs->size); if(ctx->has_llava_projector) { GGML_ASSERT(batch_size == 1); // TODO: support multiple images } @@ -1342,34 +1342,34 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i switch (new_type) { case GGML_TYPE_Q4_0: { - new_size = ggml_quantize_q4_0(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q4_0(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q4_1: { - new_size = ggml_quantize_q4_1(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q4_1(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q5_0: { - new_size = ggml_quantize_q5_0(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q5_0(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q5_1: { - new_size = ggml_quantize_q5_1(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q5_1(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q8_0: { - new_size = ggml_quantize_q8_0(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q8_0(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q2_K: { - new_size = ggml_quantize_q2_K(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q2_K(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q3_K: { - new_size = ggml_quantize_q3_K(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q3_K(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q4_K: { - new_size = ggml_quantize_q4_K(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q4_K(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q5_K: { - new_size = ggml_quantize_q5_K(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q5_K(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; case GGML_TYPE_Q6_K: { - new_size = ggml_quantize_q6_K(f32_data, new_data, static_cast(n_elms), static_cast(cur->ne[0]), hist_cur.data()); + new_size = ggml_quantize_q6_K(f32_data, new_data, int(n_elms), int(cur->ne[0]), hist_cur.data()); } break; default: { fprintf(stderr, "%s: unsupported quantization type %d\n", __func__, new_type); @@ -1432,10 +1432,10 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i int clip_n_mmproj_embd(const struct clip_ctx * ctx) { if (ctx->proj_type == PROJECTOR_TYPE_LDP) { - return static_cast(ctx->vision_model.mm_model_block_1_block_2_1_b->ne[0]); + return int(ctx->vision_model.mm_model_block_1_block_2_1_b->ne[0]); } else if (ctx->proj_type == PROJECTOR_TYPE_MLP) { - return static_cast(ctx->vision_model.mm_2_b->ne[0]); + return int(ctx->vision_model.mm_2_b->ne[0]); } else { std::string proj_type = PROJECTOR_TYPE_NAMES[ctx->proj_type]; diff --git a/examples/llava/llava-cli.cpp b/examples/llava/llava-cli.cpp index 98dbc1098..a32ec1c03 100644 --- a/examples/llava/llava-cli.cpp +++ b/examples/llava/llava-cli.cpp @@ -85,7 +85,7 @@ static llava_image_embed * llava_image_embed_make_with_prompt_base64(struct clip auto img_bytes = std::vector(required_bytes); base64::decode(base64_str.begin(), base64_str.end(), img_bytes.begin()); - auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, img_bytes.data(), static_cast(img_bytes.size())); + auto embed = llava_image_embed_make_with_bytes(ctx_clip, n_threads, img_bytes.data(), int(img_bytes.size())); if (!embed) { fprintf(stderr, "%s: could not load image from base64 string.\n", __func__); return NULL; diff --git a/examples/lookahead/lookahead.cpp b/examples/lookahead/lookahead.cpp index d0adfec12..f5488eded 100644 --- a/examples/lookahead/lookahead.cpp +++ b/examples/lookahead/lookahead.cpp @@ -88,7 +88,7 @@ int main(int argc, char ** argv) { fflush(stderr); - const int n_input = static_cast(inp.size()); + const int n_input = int(inp.size()); const auto t_enc_start = ggml_time_us(); @@ -105,7 +105,7 @@ int main(int argc, char ** argv) { int n_predict = 0; int n_accept = 0; - int n_past = static_cast(inp.size()); + int n_past = int(inp.size()); llama_token id = 0; @@ -362,7 +362,7 @@ int main(int argc, char ** argv) { if (v == 0) { // sample from the last level for (int i = 0; i < W; i++) { - tokens_j[N - 2][i] = llama_sampling_sample(ctx_sampling, ctx, NULL, static_cast(ngrams_cur.size()*(N-1) + W*(N - 2) + i)); + tokens_j[N - 2][i] = llama_sampling_sample(ctx_sampling, ctx, NULL, int(ngrams_cur.size()*(N-1) + W*(N - 2) + i)); } } else { for (int i = 0; i < W; i++) { diff --git a/examples/lookup/lookup.cpp b/examples/lookup/lookup.cpp index b19c35c0b..bdab52498 100644 --- a/examples/lookup/lookup.cpp +++ b/examples/lookup/lookup.cpp @@ -60,7 +60,7 @@ int main(int argc, char ** argv){ fflush(stderr); - const int n_input = static_cast(inp.size()); + const int n_input = int(inp.size()); const auto t_enc_start = ggml_time_us(); @@ -73,7 +73,7 @@ int main(int argc, char ** argv){ int n_drafted = 0; int n_accept = 0; - int n_past = static_cast(inp.size()); + int n_past = int(inp.size()); bool has_eos = false; @@ -160,7 +160,7 @@ int main(int argc, char ** argv){ // generate n_pred tokens through prompt lookup auto prompt_lookup = [&]() -> void { - int inp_size = static_cast(inp.size()); + int inp_size = int(inp.size()); for (int ngram_size = ngram_max ; ngram_size > ngram_min; --ngram_size){ const llama_token * ngram = &inp[inp_size - ngram_size]; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 4d52cf284..900483b67 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -361,16 +361,16 @@ int main(int argc, char ** argv) { LOG_TEE("\n"); LOG_TEE("%s: prompt: '%s'\n", __func__, params.prompt.c_str()); LOG_TEE("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i : embd_inp) { - LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); + for (int embd : embd_inp) { + LOG_TEE("%6d -> '%s'\n", embd, llama_token_to_piece(ctx, embd).c_str()); } if (ctx_guidance) { LOG_TEE("\n"); LOG_TEE("%s: negative prompt: '%s'\n", __func__, sparams.cfg_negative_prompt.c_str()); LOG_TEE("%s: number of tokens in negative prompt = %zu\n", __func__, guidance_inp.size()); - for (int i : guidance_inp) { - LOG_TEE("%6d -> '%s'\n", i, llama_token_to_piece(ctx, i).c_str()); + for (int inp : guidance_inp) { + LOG_TEE("%6d -> '%s'\n", inp, llama_token_to_piece(ctx, inp).c_str()); } } diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index 3b1290229..1468cfdc6 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -156,13 +156,13 @@ int main(int argc, char ** argv) { std::vector clients(n_clients); for (size_t i = 0; i < clients.size(); ++i) { auto & client = clients[i]; - client.id = static_cast(i); + client.id = int32_t(i); client.ctx_sampling = llama_sampling_init(params.sparams); } std::vector tokens_system; tokens_system = ::llama_tokenize(ctx, k_system, true); - const int32_t n_tokens_system = static_cast(tokens_system.size()); + const int32_t n_tokens_system = int32_t(tokens_system.size()); llama_seq_id g_seq_id = 0; @@ -254,7 +254,7 @@ int main(int argc, char ** argv) { tokens_prompt = ::llama_tokenize(ctx, client.prompt, false); for (size_t i = 0; i < tokens_prompt.size(); ++i) { - llama_batch_add(batch, tokens_prompt[i], static_cast(i + n_tokens_system), { client.id }, false); + llama_batch_add(batch, tokens_prompt[i], llama_pos(i + n_tokens_system), { client.id }, false); } // extract the logits only for the last token @@ -262,7 +262,7 @@ int main(int argc, char ** argv) { batch.logits[batch.n_tokens - 1] = true; } - client.n_prompt = static_cast(tokens_prompt.size()); + client.n_prompt = int32_t(tokens_prompt.size()); client.n_decoded = 0; client.i_batch = batch.n_tokens - 1; diff --git a/examples/passkey/passkey.cpp b/examples/passkey/passkey.cpp index c4c527269..96f3ca166 100644 --- a/examples/passkey/passkey.cpp +++ b/examples/passkey/passkey.cpp @@ -42,7 +42,7 @@ int main(int argc, char ** argv) { } if (seed == -1) { - seed = static_cast(time(NULL)); + seed = int(time(NULL)); } srand(seed); @@ -110,9 +110,9 @@ int main(int argc, char ** argv) { tokens_list = ::llama_tokenize(ctx, params.prompt, true); // tokenize the prefix and use it as a sink - const int n_tokens_prefix = static_cast(::llama_tokenize(ctx, prompt_prefix, true).size()); + const int n_tokens_prefix = int(::llama_tokenize(ctx, prompt_prefix, true).size()); - const int n_tokens_all = static_cast(tokens_list.size()); + const int n_tokens_all = int(tokens_list.size()); // we leave a margin of 16 tokens for the generated text - it should contain just the passkey const int n_predict = 16; diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index 2c0ff9fbd..9389fc41e 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -95,7 +95,7 @@ static std::vector softmax(const std::vector& logits) { probs[i] = exp_logit; } for (float& prob : probs) { - prob /= static_cast(sum_exp); + prob /= float(sum_exp); } return probs; } diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index ba21a50c0..436f73814 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -39,8 +39,8 @@ int main(int argc, char ** argv) { auto tokens = llama_tokenize(ctx, params.prompt, true); // evaluate prompt - llama_decode(ctx, llama_batch_get_one(tokens.data(), static_cast(tokens.size()), n_past, 0)); - n_past += static_cast(tokens.size()); + llama_decode(ctx, llama_batch_get_one(tokens.data(), llama_pos(tokens.size()), n_past, 0)); + n_past += int(tokens.size()); // save state (rng, logits, embedding and kv_cache) to file { diff --git a/examples/server/httplib.h b/examples/server/httplib.h index 49f9e1241..f8a93d95b 100644 --- a/examples/server/httplib.h +++ b/examples/server/httplib.h @@ -3495,7 +3495,7 @@ inline bool read_content_with_length(Stream &strm, uint64_t len, uint64_t r = 0; while (r < len) { - auto read_len = static_cast(len - r); + auto read_len = static_cast(len - r); auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ)); if (n <= 0) { return false; } @@ -3514,7 +3514,7 @@ inline void skip_content_with_length(Stream &strm, uint64_t len) { char buf[CPPHTTPLIB_RECV_BUFSIZ]; uint64_t r = 0; while (r < len) { - auto read_len = static_cast(len - r); + auto read_len = static_cast(len - r); auto n = strm.read(buf, (std::min)(read_len, CPPHTTPLIB_RECV_BUFSIZ)); if (n <= 0) { return; } r += static_cast(n); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b7bd9d4df..bd67c04b6 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -636,7 +636,7 @@ struct llama_server_context const std::vector image_buffer = base64_decode(img["data"].get()); slot_image img_sl; - img_sl.id = img.count("id") != 0 ? img["id"].get() : static_cast(slot->images.size()); + img_sl.id = img.count("id") != 0 ? img["id"].get() : int(slot->images.size()); img_sl.img_data = clip_image_u8_init(); if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data)) { @@ -736,7 +736,7 @@ struct llama_server_context // assign the system KV cache to all parallel sequences for (int32_t i = 1; i < params.n_parallel; ++i) { - llama_kv_cache_seq_cp(ctx, 0, i, 0, static_cast(system_tokens.size())); + llama_kv_cache_seq_cp(ctx, 0, i, 0, llama_pos(system_tokens.size())); } LOG_TEE("system prompt updated\n"); @@ -1401,7 +1401,7 @@ struct llama_server_context slot.i_batch = batch.n_tokens; - llama_batch_add(batch, slot.sampled, static_cast(system_tokens.size() + slot.n_past), { slot.id }, true); + llama_batch_add(batch, slot.sampled, llama_pos(system_tokens.size() + slot.n_past), { slot.id }, true); slot.n_past += 1; } @@ -1463,7 +1463,7 @@ struct llama_server_context prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt } - slot.num_prompt_tokens = static_cast(prompt_tokens.size()); + slot.num_prompt_tokens = int32_t(prompt_tokens.size()); if (slot.params.n_keep < 0) { @@ -1490,7 +1490,7 @@ struct llama_server_context slot.truncated = true; prompt_tokens = new_tokens; - slot.num_prompt_tokens = static_cast(prompt_tokens.size()); + slot.num_prompt_tokens = int32_t(prompt_tokens.size()); GGML_ASSERT(slot.num_prompt_tokens < slot.n_ctx); } @@ -1509,7 +1509,7 @@ struct llama_server_context llama_sampling_accept(slot.ctx_sampling, ctx, token, false); } - slot.n_past = static_cast(common_part(slot.cache_tokens, prompt_tokens)); + slot.n_past = int32_t(common_part(slot.cache_tokens, prompt_tokens)); slot.num_prompt_tokens_processed = slot.num_prompt_tokens - slot.n_past; LOG_TEE("slot %d : in cache: %i tokens | to process: %i tokens\n", slot.id, slot.n_past, slot.num_prompt_tokens_processed); @@ -1517,7 +1517,7 @@ struct llama_server_context LOG_TEE("slot %d : kv cache rm - [%d, end)\n", slot.id, (int) system_tokens.size() + slot.n_past); - llama_kv_cache_seq_rm(ctx, slot.id, static_cast(system_tokens.size() + slot.n_past), -1); + llama_kv_cache_seq_rm(ctx, slot.id, llama_pos(system_tokens.size() + slot.n_past), -1); slot.cache_tokens = prompt_tokens; @@ -1540,7 +1540,7 @@ struct llama_server_context std::vector prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens; for (; slot.n_past < (int) prefix_tokens.size(); ++slot.n_past) { - llama_batch_add(batch, prefix_tokens[slot.n_past], static_cast(system_tokens.size() + slot.n_past), { slot.id }, false); + llama_batch_add(batch, prefix_tokens[slot.n_past], llama_pos(system_tokens.size() + slot.n_past), { slot.id }, false); } if (has_images && !ingest_images(slot, n_batch)) diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index b65bc007f..40c909589 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -441,7 +441,7 @@ static inline std::vector base64_decode(const std::string & encoded_str { for (i = 0; i <4; i++) { - char_array_4[i] = static_cast(base64_chars.find(char_array_4[i])); + char_array_4[i] = uint8_t(base64_chars.find(char_array_4[i])); } char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4); @@ -465,7 +465,7 @@ static inline std::vector base64_decode(const std::string & encoded_str for (j = 0; j <4; j++) { - char_array_4[j] = static_cast(base64_chars.find(char_array_4[j])); + char_array_4[j] = uint8_t(base64_chars.find(char_array_4[j])); } char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4); diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index af7f1b706..3e6882023 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -67,8 +67,8 @@ int main(int argc, char ** argv) { std::vector tokens_list; tokens_list = ::llama_tokenize(ctx, params.prompt, true); - const int n_ctx = static_cast(llama_n_ctx(ctx)); - const int n_kv_req = static_cast(tokens_list.size() + (n_len - tokens_list.size())); + const int n_ctx = int(llama_n_ctx(ctx)); + const int n_kv_req = int(tokens_list.size() + (n_len - tokens_list.size())); LOG_TEE("\n%s: n_len = %d, n_ctx = %d, n_kv_req = %d\n", __func__, n_len, n_ctx, n_kv_req); @@ -96,7 +96,7 @@ int main(int argc, char ** argv) { // evaluate the initial prompt for (size_t i = 0; i < tokens_list.size(); i++) { - llama_batch_add(batch, tokens_list[i], static_cast(i), { 0 }, false); + llama_batch_add(batch, tokens_list[i], llama_pos(i), { 0 }, false); } // llama_decode will output logits only for the last token of the prompt diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp index 188805b53..7c7c5ba41 100644 --- a/examples/speculative/speculative.cpp +++ b/examples/speculative/speculative.cpp @@ -131,7 +131,7 @@ int main(int argc, char ** argv) { fflush(stderr); - const int n_input = static_cast(inp.size()); + const int n_input = int(inp.size()); const auto t_enc_start = ggml_time_us(); @@ -152,8 +152,8 @@ int main(int argc, char ** argv) { int n_drafted = 0; int n_accept = 0; - int n_past_tgt = static_cast(inp.size()); - int n_past_dft = static_cast(inp.size()); + int n_past_tgt = int(inp.size()); + int n_past_dft = int(inp.size()); // used to determine end of generation bool has_eos = false; diff --git a/llama.cpp b/llama.cpp index 4ba83d40b..f5eb57f34 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10992,15 +10992,15 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token if (llama_is_normal_token(model->vocab, token)) { std::string result = model->vocab.id_to_token[token].text; llama_unescape_whitespace(result); - if (length < static_cast(result.length())) { - return -static_cast(result.length()); + if (length < int32_t(result.length())) { + return -int32_t(result.length()); } memcpy(buf, result.c_str(), result.length()); return result.length(); } else if (llama_is_user_defined_token(model->vocab, token)) { std::string result = model->vocab.id_to_token[token].text; - if (length < static_cast(result.length())) { - return -static_cast(result.length()); + if (length < int32_t(result.length())) { + return -int32_t(result.length()); } memcpy(buf, result.c_str(), result.length()); return result.length(); @@ -11027,15 +11027,15 @@ int32_t llama_token_to_piece(const struct llama_model * model, llama_token token if (llama_is_normal_token(model->vocab, token)) { std::string result = model->vocab.id_to_token[token].text; result = llama_decode_text(result); - if (length < static_cast(result.length())) { - return -static_cast(result.length()); + if (length < int32_t(result.length())) { + return -int32_t(result.length()); } memcpy(buf, result.c_str(), result.length()); return result.length(); } else if (llama_is_user_defined_token(model->vocab, token)) { std::string result = model->vocab.id_to_token[token].text; - if (length < static_cast(result.length())) { - return -static_cast(result.length()); + if (length < int32_t(result.length())) { + return -int32_t(result.length()); } memcpy(buf, result.c_str(), result.length()); return result.length(); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index a2f5c62aa..0f5b91562 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -63,7 +63,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m im = nullptr; } } - ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, static_cast(size/tensor->ne[0]), + ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, int(size/tensor->ne[0]), static_cast(tensor->ne[0]), hist, im); ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size()); } else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) { @@ -553,7 +553,7 @@ struct test_case { // duplicate the op size_t target_size = ggml_backend_is_cpu(backend) ? 1ULL << 33 : 1ULL << 35; // 8 GB CPU, 32 GB GPU - int n_runs = static_cast(std::min((size_t)gf->size - gf->n_nodes, target_size / op_size(out)) + 1); + int n_runs = int(std::min((size_t)gf->size - gf->n_nodes, target_size / op_size(out)) + 1); for (int i = 1; i < n_runs; i++) { gf->nodes[gf->n_nodes++] = out; } @@ -584,7 +584,7 @@ struct test_case { ggml_backend_graph_compute(backend, gf); ggml_backend_synchronize(backend); int64_t end_time = ggml_time_us(); - double time_us = static_cast(end_time - start_time); + double time_us = double(end_time - start_time); printf(" %5d runs - %8.2f us/run - %8zu kB/run - \033[1;34m%7.2f GB/s\033[0m\n", n_runs, @@ -714,8 +714,7 @@ struct test_dup : public test_case { ggml_tensor * build_graph(ggml_context * ctx) override { ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data()); if (_use_permute) { - src = ggml_permute(ctx, src, static_cast(permute[0]), static_cast(permute[1]), - static_cast(permute[2]), static_cast(permute[3])); + src = ggml_permute(ctx, src, int(permute[0]), int(permute[1]), int(permute[2]), int(permute[3])); } ggml_tensor * out = ggml_dup(ctx, src); return out; @@ -1241,7 +1240,7 @@ struct test_argsort : public test_case { for (int64_t r = 0; r < ggml_nrows(t); r++) { std::vector data(t->ne[0]); for (int i = 0; i < t->ne[0]; i++) { - data[i] = static_cast(i); + data[i] = float(i); } std::shuffle(data.begin(), data.end(), rng); ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float)); @@ -1423,7 +1422,7 @@ struct test_moe : public test_case { ggml_tensor * cur = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_tokens); ggml_tensor * logits = ggml_mul_mat(ctx, ffn_gate_inp, cur); - ggml_tensor * probs = ggml_soft_max_ext(ctx, logits, nullptr, 1.0f/sqrtf(static_cast(n_embd))); + ggml_tensor * probs = ggml_soft_max_ext(ctx, logits, nullptr, 1.0f/sqrtf(float(n_embd))); // select experts ggml_tensor * selected_experts = ggml_top_k(ctx, probs, n_experts_per_tok);