diff --git a/llama.cpp b/llama.cpp index 857e90a88..beebb2992 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4639,8 +4639,14 @@ void llama_beam_search(llama_context * ctx, // quantization // +template +struct no_init { + T value; + no_init() { /* do nothing */ } +}; + static void llama_convert_tensor_internal( - struct ggml_tensor * tensor, std::vector & output, std::vector & workers, + struct ggml_tensor * tensor, std::vector> & output, std::vector & workers, const size_t nelements, const int nthread ) { if (output.size() < nelements) { @@ -4895,9 +4901,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s int idx = 0; - std::vector read_data; - std::vector work; - std::vector f32_conv_buf; + std::vector> read_data; + std::vector> work; + std::vector> f32_conv_buf; // populate the original tensors so we get an initial meta data for (int i = 0; i < ml->n_tensors; ++i) {