llama : fix platforms without mmap (#4578)

* llama : fix platforms without mmap * win32 : limit prefetch size to the file size * fix win32 error clobber, unnecessary std::string in std::runtime_error
2023-12-22 12:12:53 +01:00 · 2023-12-22 12:12:53 +01:00 · 48b7ff193e
commit 48b7ff193e
parent 48b24b170e
3 changed files with 24 additions and 21 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -7702,7 +7702,8 @@ inline void ggml_cuda_op_scale(
    GGML_ASSERT(src0->type == GGML_TYPE_F32);
    GGML_ASSERT( dst->type == GGML_TYPE_F32);

-    const float scale = ((float *) dst->op_params)[0];
+    float scale;
+    memcpy(&scale, dst->op_params, sizeof(float));

    scale_f32_cuda(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream);
    CUDA_CHECK(cudaGetLastError());