Apply fixes suggested to build on windows
Issue: https://github.com/ggerganov/llama.cpp/issues/22
This commit is contained in:
parent
da1a4ff01f
commit
2d29d4b89e
4 changed files with 15 additions and 12 deletions
20
ggml.c
20
ggml.c
|
@ -407,8 +407,8 @@ void quantize_row_q4_0(const float * restrict x, void * restrict y, int k) {
|
|||
const int nb = k / QK;
|
||||
const size_t bs = sizeof(float) + QK/2;
|
||||
|
||||
uint8_t * restrict pd = (uint8_t *) (y + 0*bs);
|
||||
uint8_t * restrict pb = (uint8_t *) (y + 0*bs + sizeof(float));
|
||||
uint8_t * restrict pd = ((uint8_t *)y + 0*bs);
|
||||
uint8_t * restrict pb = ((uint8_t *)y + 0*bs + sizeof(float));
|
||||
|
||||
uint8_t pp[QK/2];
|
||||
|
||||
|
@ -654,8 +654,8 @@ void dequantize_row_q4_0(const void * restrict x, float * restrict y, int k) {
|
|||
const int nb = k / QK;
|
||||
const size_t bs = sizeof(float) + QK/2;
|
||||
|
||||
const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
|
||||
const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
|
||||
const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs);
|
||||
const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + sizeof(float));
|
||||
|
||||
// scalar
|
||||
for (int i = 0; i < nb; i++) {
|
||||
|
@ -1301,11 +1301,11 @@ inline static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void
|
|||
|
||||
const size_t bs = sizeof(float) + QK/2;
|
||||
|
||||
const uint8_t * restrict pd0 = (const uint8_t *) (x + 0*bs);
|
||||
const uint8_t * restrict pd1 = (const uint8_t *) (y + 0*bs);
|
||||
const uint8_t * restrict pd0 = ((const uint8_t *)x + 0*bs);
|
||||
const uint8_t * restrict pd1 = ((const uint8_t *)y + 0*bs);
|
||||
|
||||
const uint8_t * restrict pb0 = (const uint8_t *) (x + 0*bs + sizeof(float));
|
||||
const uint8_t * restrict pb1 = (const uint8_t *) (y + 0*bs + sizeof(float));
|
||||
const uint8_t * restrict pb0 = ((const uint8_t *)x + 0*bs + sizeof(float));
|
||||
const uint8_t * restrict pb1 = ((const uint8_t *)y + 0*bs + sizeof(float));
|
||||
|
||||
float sumf = 0.0;
|
||||
|
||||
|
@ -1731,8 +1731,8 @@ inline static void ggml_vec_mad_q4_0(const int n, float * restrict y, void * res
|
|||
const int nb = n / QK;
|
||||
const size_t bs = sizeof(float) + QK/2;
|
||||
|
||||
const uint8_t * restrict pd = (const uint8_t *) (x + 0*bs);
|
||||
const uint8_t * restrict pb = (const uint8_t *) (x + 0*bs + sizeof(float));
|
||||
const uint8_t * restrict pd = ((const uint8_t *)x + 0*bs);
|
||||
const uint8_t * restrict pb = ((const uint8_t *)x + 0*bs + sizeof(float));
|
||||
|
||||
#if __ARM_NEON
|
||||
#if QK == 32
|
||||
|
|
1
main.cpp
1
main.cpp
|
@ -733,6 +733,7 @@ bool llama_eval(
|
|||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
ggml_time_init();
|
||||
const int64_t t_main_start_us = ggml_time_us();
|
||||
|
||||
gpt_params params;
|
||||
|
|
|
@ -289,6 +289,7 @@ bool llama_model_quantize(const std::string & fname_inp, const std::string & fna
|
|||
// ./llama-quantize models/llama/ggml-model.bin models/llama/ggml-model-quant.bin type
|
||||
//
|
||||
int main(int argc, char ** argv) {
|
||||
ggml_time_init();
|
||||
if (argc != 4) {
|
||||
fprintf(stderr, "usage: %s model-f32.bin model-quant.bin type\n", argv[0]);
|
||||
fprintf(stderr, " type = 2 - q4_0\n");
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#include "utils.h"
|
||||
#define QK 32
|
||||
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
@ -453,7 +454,7 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
|
|||
|
||||
assert(k % qk == 0);
|
||||
|
||||
uint8_t pp[qk/2];
|
||||
uint8_t pp[QK/2];
|
||||
|
||||
char * pdst = (char *) dst;
|
||||
|
||||
|
@ -507,7 +508,7 @@ size_t ggml_quantize_q4_1(float * src, void * dst, int n, int k, int qk, int64_t
|
|||
|
||||
assert(k % qk == 0);
|
||||
|
||||
uint8_t pp[qk/2];
|
||||
uint8_t pp[QK/2];
|
||||
|
||||
char * pdst = (char *) dst;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue