cmake : respect LLAMA_QKK_64 option
This commit is contained in:
parent
049a32fffa
commit
a8b9bb4566
2 changed files with 11 additions and 6 deletions
|
@ -182,10 +182,15 @@ if (LLAMA_METAL)
|
||||||
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
|
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
|
||||||
# disabling fast math is needed in order to pass tests/test-backend-ops
|
# disabling fast math is needed in order to pass tests/test-backend-ops
|
||||||
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
|
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
|
||||||
|
set(XC_FLAGS -fno-fast-math -fno-inline -g)
|
||||||
|
if (LLAMA_QKK_64)
|
||||||
|
set(XC_FLAGS ${XC_FLAGS} -DQK_K=64)
|
||||||
|
endif()
|
||||||
|
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib
|
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib
|
||||||
COMMAND xcrun -sdk macosx metal -fno-fast-math -fno-inline -g -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
||||||
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib
|
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib
|
||||||
DEPENDS ggml-metal.metal
|
DEPENDS ggml-metal.metal
|
||||||
COMMENT "Compiling Metal kernels"
|
COMMENT "Compiling Metal kernels"
|
||||||
)
|
)
|
||||||
|
|
|
@ -15,19 +15,18 @@
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
||||||
size_t size = ggml_nelements(tensor);
|
size_t size = ggml_nelements(tensor);
|
||||||
std::vector<float> data(size);
|
std::vector<float> data(size);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
std::default_random_engine generator(rd());
|
static std::default_random_engine generator(1234);
|
||||||
std::uniform_real_distribution<float> distribution(min, max);
|
std::uniform_real_distribution<float> distribution(min, max);
|
||||||
|
|
||||||
for (size_t i = 0; i < size; i++) {
|
for (size_t i = 0; i < size; i++) {
|
||||||
data[i] = distribution(generator);
|
data[i] = distribution(generator);
|
||||||
}
|
}
|
||||||
#endif
|
#else
|
||||||
auto init_thread = [&](size_t start, size_t end) {
|
auto init_thread = [&](size_t start, size_t end) {
|
||||||
std::random_device rd;
|
std::random_device rd;
|
||||||
std::default_random_engine generator(rd());
|
std::default_random_engine generator(rd());
|
||||||
|
@ -49,6 +48,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
||||||
for (auto & t : threads) {
|
for (auto & t : threads) {
|
||||||
t.join();
|
t.join();
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
|
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
|
||||||
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
|
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
|
||||||
|
@ -437,7 +437,7 @@ struct test_case {
|
||||||
double err = nmse(f1.data(), f2.data(), f1.size());
|
double err = nmse(f1.data(), f2.data(), f1.size());
|
||||||
if (err > ud->max_err) {
|
if (err > ud->max_err) {
|
||||||
printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
|
printf("[%s] NMSE = %f ", ggml_op_desc(t1), err);
|
||||||
//for (int i = 0; i < f1.size(); i++) {
|
//for (int i = 0; i < (int) f1.size(); i++) {
|
||||||
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
||||||
//}
|
//}
|
||||||
//printf("\n");
|
//printf("\n");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue