Merge branch 'ggerganov:master' into master

This commit is contained in:
WangHaoranRobin 2023-06-25 08:51:59 -07:00 committed by GitHub
commit af058cf820
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 466 additions and 81 deletions

View file

@ -5,12 +5,15 @@
[![Actions Status](https://github.com/ggerganov/llama.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/llama.cpp/actions) [![Actions Status](https://github.com/ggerganov/llama.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/llama.cpp/actions)
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml)
Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
**Hot topics:** **Hot topics:**
- New roadmap: https://github.com/users/ggerganov/projects/7
- Azure CI brainstorming: https://github.com/ggerganov/llama.cpp/discussions/1985
- p1 : LLM-based code completion engine at the edge : https://github.com/ggml-org/p1/discussions/1 - p1 : LLM-based code completion engine at the edge : https://github.com/ggml-org/p1/discussions/1
- Roadmap June 2023: https://github.com/ggerganov/llama.cpp/discussions/1729
<details> <details>
<summary>Table of Contents</summary> <summary>Table of Contents</summary>

View file

@ -1,61 +1,58 @@
const std = @import("std"); const std = @import("std");
// Zig Version: 0.11.0-dev.3379+629f0d23b
pub fn build(b: *std.build.Builder) void { pub fn build(b: *std.build.Builder) void {
const target = b.standardTargetOptions(.{}); const target = b.standardTargetOptions(.{});
const optimize = b.standardReleaseOptions(); const optimize = b.standardOptimizeOption(.{});
const want_lto = b.option(bool, "lto", "Want -fLTO"); const lib = b.addStaticLibrary(.{
.name = "llama",
const lib = b.addStaticLibrary("llama", null); .target = target,
lib.want_lto = want_lto; .optimize = optimize,
lib.setTarget(target); });
lib.setBuildMode(optimize); lib.linkLibC();
lib.linkLibCpp(); lib.linkLibCpp();
lib.addIncludePath("."); lib.addIncludePath(".");
lib.addIncludePath("examples"); lib.addIncludePath("./examples");
lib.addCSourceFiles(&.{ lib.addCSourceFiles(&.{
"ggml.c", "ggml.c",
}, &.{"-std=c11"}); }, &.{"-std=c11"});
lib.addCSourceFiles(&.{ lib.addCSourceFiles(&.{
"llama.cpp", "llama.cpp",
}, &.{"-std=c++11"}); }, &.{"-std=c++11"});
lib.install(); b.installArtifact(lib);
const build_args = .{ .b = b, .lib = lib, .target = target, .optimize = optimize, .want_lto = want_lto }; const examples = .{
"main",
"baby-llama",
"embedding",
// "metal",
"perplexity",
"quantize",
"quantize-stats",
"save-load-state",
// "server",
"simple",
"train-text-from-scratch",
};
const exe = build_example("main", build_args); inline for (examples) |example_name| {
_ = build_example("quantize", build_args); const exe = b.addExecutable(.{
_ = build_example("perplexity", build_args); .name = example_name,
_ = build_example("embedding", build_args); .target = target,
.optimize = optimize,
// create "zig build run" command for ./main });
exe.addIncludePath(".");
const run_cmd = exe.run(); exe.addIncludePath("./examples");
run_cmd.step.dependOn(b.getInstallStep()); exe.addCSourceFiles(&.{
if (b.args) |args| { std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{example_name, example_name}),
run_cmd.addArgs(args); "examples/common.cpp",
}, &.{"-std=c++11"});
exe.linkLibrary(lib);
b.installArtifact(exe);
const run_cmd = b.addRunArtifact(exe);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| run_cmd.addArgs(args);
const run_step = b.step("run_" ++ example_name, "Run the app");
run_step.dependOn(&run_cmd.step);
} }
const run_step = b.step("run", "Run the app");
run_step.dependOn(&run_cmd.step);
}
fn build_example(comptime name: []const u8, args: anytype) *std.build.LibExeObjStep {
const b = args.b;
const lib = args.lib;
const want_lto = args.want_lto;
const exe = b.addExecutable(name, null);
exe.want_lto = want_lto;
lib.setTarget(args.target);
lib.setBuildMode(args.optimize);
exe.addIncludePath(".");
exe.addIncludePath("examples");
exe.addCSourceFiles(&.{
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{name, name}),
"examples/common.cpp",
}, &.{"-std=c++11"});
exe.linkLibrary(lib);
exe.install();
return exe;
} }

View file

@ -374,10 +374,10 @@ struct llama_server_context {
result.tok = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu); result.tok = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
} else { } else {
// Temperature sampling // Temperature sampling
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1); llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
llama_sample_typical(ctx, &candidates_p, typical_p, 1); llama_sample_typical(ctx, &candidates_p, typical_p, 1);
llama_sample_top_p(ctx, &candidates_p, top_p, 1); llama_sample_top_p(ctx, &candidates_p, top_p, 1);
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
llama_sample_temperature(ctx, &candidates_p, temp); llama_sample_temperature(ctx, &candidates_p, temp);
result.tok = llama_sample_token(ctx, &candidates_p); result.tok = llama_sample_token(ctx, &candidates_p);
} }

View file

@ -2635,7 +2635,7 @@ void ggml_cuda_free_scratch() {
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
ggml_cuda_func_t func; ggml_cuda_func_t func;
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|| tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT || (tensor->src0 != nullptr && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT))
|| (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU); || (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU);
switch (tensor->op) { switch (tensor->op) {

371
ggml.c
View file

@ -1,5 +1,5 @@
// Defines CLOCK_MONOTONIC on Linux #define _GNU_SOURCE // Defines CLOCK_MONOTONIC on Linux
#define _GNU_SOURCE #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml.h" #include "ggml.h"
@ -131,6 +131,34 @@ typedef void* thread_ret_t;
#define GGML_MEM_ALIGN 16 #define GGML_MEM_ALIGN 16
#endif #endif
//
// logging
//
#if (GGML_DEBUG >= 1)
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG(...)
#endif
#if (GGML_DEBUG >= 5)
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_5(...)
#endif
#if (GGML_DEBUG >= 10)
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_10(...)
#endif
#define GGML_PRINT(...) printf(__VA_ARGS__)
//
// end of logging block
//
#if defined(_MSC_VER) || defined(__MINGW32__) #if defined(_MSC_VER) || defined(__MINGW32__)
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) #define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr) #define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
@ -144,6 +172,17 @@ inline static void* ggml_aligned_malloc(size_t size) {
#endif #endif
if (result != 0) { if (result != 0) {
// Handle allocation failure // Handle allocation failure
const char *error_desc = "unknown allocation error";
switch (result) {
case EINVAL:
error_desc = "invalid alignment value";
break;
case ENOMEM:
error_desc = "insufficient memory";
break;
}
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n",
__func__, error_desc, size/(1024.0*1024.0));
return NULL; return NULL;
} }
return aligned_memory; return aligned_memory;
@ -3530,30 +3569,6 @@ inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x
*s = 1.f/(*s); *s = 1.f/(*s);
} }
//
// logging
//
#if (GGML_DEBUG >= 1)
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG(...)
#endif
#if (GGML_DEBUG >= 5)
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_5(...)
#endif
#if (GGML_DEBUG >= 10)
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_10(...)
#endif
#define GGML_PRINT(...) printf(__VA_ARGS__)
// //
// data types // data types
// //
@ -3713,11 +3728,15 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"MAP_UNARY", "MAP_UNARY",
"MAP_BINARY", "MAP_BINARY",
"MAP_CUSTOM1",
"MAP_CUSTOM2",
"MAP_CUSTOM3",
"CROSS_ENTROPY_LOSS", "CROSS_ENTROPY_LOSS",
"CROSS_ENTROPY_LOSS_BACK", "CROSS_ENTROPY_LOSS_BACK",
}; };
static_assert(GGML_OP_COUNT == 61, "GGML_OP_COUNT != 61"); static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none", "none",
@ -3785,11 +3804,15 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"f(x)", "f(x)",
"f(x,y)", "f(x,y)",
"custom(x)",
"custom(x,y)",
"custom(x,y,z)",
"cross_entropy_loss(x,y)", "cross_entropy_loss(x,y)",
"cross_entropy_loss_back(x,y)", "cross_entropy_loss_back(x,y)",
}; };
static_assert(GGML_OP_COUNT == 61, "GGML_OP_COUNT != 61"); static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
@ -7094,9 +7117,14 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
is_node = true; is_node = true;
} }
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t)); struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun; *((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_UNARY; result->op = GGML_OP_MAP_UNARY;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -7136,9 +7164,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
is_node = true; is_node = true;
} }
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t)); struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun; *((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_BINARY; result->op = GGML_OP_MAP_BINARY;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -7165,6 +7198,150 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
return ggml_map_binary_impl_f32(ctx, a, b, fun, true); return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
} }
// ggml_map_custom1
struct ggml_tensor * ggml_map_custom1_impl_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_f32_t fun,
bool inplace) {
bool is_node = false;
if (!inplace && a->grad) {
is_node = true;
}
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_CUSTOM1;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->opt[0] = addr_tensor;
return result;
}
struct ggml_tensor * ggml_map_custom1_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_f32_t fun) {
return ggml_map_custom1_impl_f32(ctx, a, fun, false);
}
struct ggml_tensor * ggml_map_custom1_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_f32_t fun) {
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
}
// ggml_map_custom2
struct ggml_tensor * ggml_map_custom2_impl_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_f32_t fun,
bool inplace) {
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
is_node = true;
}
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_CUSTOM2;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = b;
result->opt[0] = addr_tensor;
return result;
}
struct ggml_tensor * ggml_map_custom2_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_f32_t fun) {
return ggml_map_custom2_impl_f32(ctx, a, b, fun, false);
}
struct ggml_tensor * ggml_map_custom2_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_f32_t fun) {
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
}
// ggml_map_custom3
struct ggml_tensor * ggml_map_custom3_impl_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_f32_t fun,
bool inplace) {
bool is_node = false;
if (!inplace && (a->grad || b->grad || c->grad)) {
is_node = true;
}
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
ggml_scratch_save(ctx);
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
ggml_scratch_load(ctx);
result->op = GGML_OP_MAP_CUSTOM3;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = b;
result->opt[0] = addr_tensor;
result->opt[1] = c;
return result;
}
struct ggml_tensor * ggml_map_custom3_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_f32_t fun) {
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, false);
}
struct ggml_tensor * ggml_map_custom3_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_f32_t fun) {
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
}
// ggml_cross_entropy_loss // ggml_cross_entropy_loss
struct ggml_tensor * ggml_cross_entropy_loss( struct ggml_tensor * ggml_cross_entropy_loss(
@ -14621,6 +14798,114 @@ static void ggml_compute_forward_map_binary(
} }
} }
// ggml_compute_forward_map_custom1
static void ggml_compute_forward_map_custom1_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
struct ggml_tensor * dst,
const ggml_custom1_op_f32_t fun) {
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
fun(dst, a);
}
static void ggml_compute_forward_map_custom1(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
struct ggml_tensor * dst,
const ggml_custom1_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_map_custom2
static void ggml_compute_forward_map_custom2_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
struct ggml_tensor * dst,
const ggml_custom2_op_f32_t fun) {
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
fun(dst, a, b);
}
static void ggml_compute_forward_map_custom2(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
struct ggml_tensor * dst,
const ggml_custom2_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_map_custom3
static void ggml_compute_forward_map_custom3_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
const struct ggml_tensor * c,
struct ggml_tensor * dst,
const ggml_custom3_op_f32_t fun) {
assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
fun(dst, a, b, c);
}
static void ggml_compute_forward_map_custom3(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
const struct ggml_tensor * c,
struct ggml_tensor * dst,
const ggml_custom3_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_cross_entropy_loss // ggml_compute_forward_cross_entropy_loss
static void ggml_compute_forward_cross_entropy_loss_f32( static void ggml_compute_forward_cross_entropy_loss_f32(
@ -14911,7 +15196,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
if (skip_cpu) { if (skip_cpu) {
return; return;
} }
GGML_ASSERT(tensor->src0->backend == GGML_BACKEND_CPU); GGML_ASSERT(tensor->src0 == NULL || tensor->src0->backend == GGML_BACKEND_CPU);
GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU); GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU);
#endif // GGML_USE_CUBLAS #endif // GGML_USE_CUBLAS
@ -15158,6 +15443,24 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun); ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun);
} }
break; break;
case GGML_OP_MAP_CUSTOM1:
{
const ggml_custom1_op_f32_t fun = *((ggml_custom1_op_f32_t *)tensor->opt[0]->data);
ggml_compute_forward_map_custom1(params, tensor->src0, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM2:
{
const ggml_custom2_op_f32_t fun = *((ggml_custom2_op_f32_t *)tensor->opt[0]->data);
ggml_compute_forward_map_custom2(params, tensor->src0, tensor->src1, tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM3:
{
const ggml_custom3_op_f32_t fun = *((ggml_custom3_op_f32_t *)tensor->opt[0]->data);
ggml_compute_forward_map_custom3(params, tensor->src0, tensor->src1, tensor->opt[1], tensor, fun);
}
break;
case GGML_OP_CROSS_ENTROPY_LOSS: case GGML_OP_CROSS_ENTROPY_LOSS:
{ {
ggml_compute_forward_cross_entropy_loss(params, tensor->src0, tensor->src1, tensor); ggml_compute_forward_cross_entropy_loss(params, tensor->src0, tensor->src1, tensor);
@ -15964,6 +16267,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
case GGML_OP_WIN_UNPART: case GGML_OP_WIN_UNPART:
case GGML_OP_MAP_UNARY: case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY: case GGML_OP_MAP_BINARY:
case GGML_OP_MAP_CUSTOM1:
case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3:
{ {
GGML_ASSERT(false); // not supported GGML_ASSERT(false); // not supported
} break; } break;
@ -16605,6 +16911,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
case GGML_OP_WIN_UNPART: case GGML_OP_WIN_UNPART:
case GGML_OP_MAP_UNARY: case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY: case GGML_OP_MAP_BINARY:
case GGML_OP_MAP_CUSTOM1:
case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3:
{ {
node->n_tasks = 1; node->n_tasks = 1;
} break; } break;

60
ggml.h
View file

@ -345,6 +345,10 @@ extern "C" {
GGML_OP_MAP_UNARY, GGML_OP_MAP_UNARY,
GGML_OP_MAP_BINARY, GGML_OP_MAP_BINARY,
GGML_OP_MAP_CUSTOM1,
GGML_OP_MAP_CUSTOM2,
GGML_OP_MAP_CUSTOM3,
GGML_OP_CROSS_ENTROPY_LOSS, GGML_OP_CROSS_ENTROPY_LOSS,
GGML_OP_CROSS_ENTROPY_LOSS_BACK, GGML_OP_CROSS_ENTROPY_LOSS_BACK,
@ -1167,21 +1171,73 @@ extern "C" {
int h0, int h0,
int w); int w);
// Mapping operations // custom operators
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *); typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
GGML_API struct ggml_tensor * ggml_map_unary_f32( GGML_API struct ggml_tensor * ggml_map_unary_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_unary_op_f32_t fun); ggml_unary_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_unary_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_binary_f32( GGML_API struct ggml_tensor * ggml_map_binary_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_binary_op_f32_t fun); ggml_binary_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_binary_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_custom1_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_custom1_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_custom2_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_custom2_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
ggml_custom3_op_f32_t fun);
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
ggml_custom3_op_f32_t fun);
// loss function // loss function
GGML_API struct ggml_tensor * ggml_cross_entropy_loss( GGML_API struct ggml_tensor * ggml_cross_entropy_loss(

View file

@ -1,3 +1,4 @@
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml.h" #include "ggml.h"
#include <math.h> #include <math.h>
@ -5,6 +6,10 @@
#include <stdlib.h> #include <stdlib.h>
#include <assert.h> #include <assert.h>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
#define MAX_NARGS 3 #define MAX_NARGS 3
#undef MIN #undef MIN
@ -197,8 +202,23 @@ bool check_gradient(
float max_error_abs, float max_error_abs,
float max_error_rel) { float max_error_rel) {
static int n_threads = -1;
if (n_threads < 0) {
n_threads = GGML_DEFAULT_N_THREADS;
const char *env = getenv("GGML_N_THREADS");
if (env) {
n_threads = atoi(env);
}
printf("GGML_N_THREADS = %d\n", n_threads);
}
struct ggml_cgraph gf = ggml_build_forward (f); struct ggml_cgraph gf = ggml_build_forward (f);
gf.n_threads = n_threads;
struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false); struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
gb.n_threads = n_threads;
ggml_graph_compute(ctx0, &gf); ggml_graph_compute(ctx0, &gf);
ggml_graph_reset (&gf); ggml_graph_reset (&gf);