sync : ggml

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-03-27 19:03:43 +02:00
parent e5b89a441a
commit a1968c2e63
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
5 changed files with 20 additions and 9 deletions

View file

@ -705,8 +705,13 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
struct ggml_tensor * leaf = graph->leafs[i]; struct ggml_tensor * leaf = graph->leafs[i];
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf); struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
galloc->leaf_allocs[i].buffer_id = hn->buffer_id; galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
galloc->leaf_allocs[i].leaf.offset = hn->offset; if (leaf->view_src || leaf->data) {
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf); galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
galloc->leaf_allocs[i].leaf.size_max = 0;
} else {
galloc->leaf_allocs[i].leaf.offset = hn->offset;
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
}
} }
// reallocate buffers if needed // reallocate buffers if needed

View file

@ -1,7 +1,8 @@
#pragma once #pragma once
#include "../ggml.h" #include "ggml.h"
#include "../ggml-cuda.h" #include "ggml-cuda.h"
#include <memory> #include <memory>
#if defined(GGML_USE_HIPBLAS) #if defined(GGML_USE_HIPBLAS)
@ -11,7 +12,7 @@
#define GGML_COMMON_DECL_CUDA #define GGML_COMMON_DECL_CUDA
#define GGML_COMMON_IMPL_CUDA #define GGML_COMMON_IMPL_CUDA
#endif #endif
#include "../ggml-common.h" #include "ggml-common.h"
#include <cstdio> #include <cstdio>
#include <array> #include <array>
@ -230,6 +231,12 @@ typedef float dfloat; // dequantize float
typedef float2 dfloat2; typedef float2 dfloat2;
#endif //GGML_CUDA_F16 #endif //GGML_CUDA_F16
// dmmv = dequantize_mul_mat_vec
// TODO: remove this?
#ifndef GGML_CUDA_DMMV_X
#define GGML_CUDA_DMMV_X 32
#endif
[[noreturn]] [[noreturn]]
static __device__ void no_device_code( static __device__ void no_device_code(
const char * file_name, const int line, const char * function_name, const int arch, const char * arch_list) { const char * file_name, const int line, const char * function_name, const int arch, const char * arch_list) {

View file

@ -2,10 +2,6 @@
#include "dequantize.cuh" #include "dequantize.cuh"
#include "convert.cuh" #include "convert.cuh"
// dmmv = dequantize_mul_mat_vec
#ifndef GGML_CUDA_DMMV_X
#define GGML_CUDA_DMMV_X 32
#endif
#ifndef GGML_CUDA_MMV_Y #ifndef GGML_CUDA_MMV_Y
#define GGML_CUDA_MMV_Y 1 #define GGML_CUDA_MMV_Y 1
#endif #endif

View file

@ -95,6 +95,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
# src/ggml-backend-impl.h -> ggml-backend-impl.h # src/ggml-backend-impl.h -> ggml-backend-impl.h
# src/ggml-backend.c -> ggml-backend.c # src/ggml-backend.c -> ggml-backend.c
# src/ggml-common.h -> ggml-common.h # src/ggml-common.h -> ggml-common.h
# src/ggml-cuda/* -> ggml-cuda/
# src/ggml-cuda.cu -> ggml-cuda.cu # src/ggml-cuda.cu -> ggml-cuda.cu
# src/ggml-cuda.h -> ggml-cuda.h # src/ggml-cuda.h -> ggml-cuda.h
# src/ggml-impl.h -> ggml-impl.h # src/ggml-impl.h -> ggml-impl.h
@ -128,6 +129,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \ -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \ -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
-e 's/src\/ggml-common\.h/ggml-common.h/g' \ -e 's/src\/ggml-common\.h/ggml-common.h/g' \
-e 's/src\/ggml-cuda\//ggml-cuda\//g' \
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \ -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \ -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \ -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \

View file

@ -5,6 +5,7 @@ cp -rpv ../ggml/src/ggml-alloc.c ./ggml-alloc.c
cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml-backend-impl.h
cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c cp -rpv ../ggml/src/ggml-backend.c ./ggml-backend.c
cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h cp -rpv ../ggml/src/ggml-common.h ./ggml-common.h
cp -rpv ../ggml/src/ggml-cuda/* ./ggml-cuda/
cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h cp -rpv ../ggml/src/ggml-impl.h ./ggml-impl.h