Merge branch 'ggerganov:master' into master
This commit is contained in:
commit
af058cf820
7 changed files with 466 additions and 81 deletions
|
@ -5,12 +5,15 @@
|
|||
[](https://github.com/ggerganov/llama.cpp/actions)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
|
||||
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml)
|
||||
|
||||
Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
||||
|
||||
**Hot topics:**
|
||||
|
||||
- New roadmap: https://github.com/users/ggerganov/projects/7
|
||||
- Azure CI brainstorming: https://github.com/ggerganov/llama.cpp/discussions/1985
|
||||
- p1 : LLM-based code completion engine at the edge : https://github.com/ggml-org/p1/discussions/1
|
||||
- Roadmap June 2023: https://github.com/ggerganov/llama.cpp/discussions/1729
|
||||
|
||||
<details>
|
||||
<summary>Table of Contents</summary>
|
||||
|
|
79
build.zig
79
build.zig
|
@ -1,61 +1,58 @@
|
|||
const std = @import("std");
|
||||
|
||||
// Zig Version: 0.11.0-dev.3379+629f0d23b
|
||||
pub fn build(b: *std.build.Builder) void {
|
||||
const target = b.standardTargetOptions(.{});
|
||||
const optimize = b.standardReleaseOptions();
|
||||
const want_lto = b.option(bool, "lto", "Want -fLTO");
|
||||
|
||||
const lib = b.addStaticLibrary("llama", null);
|
||||
lib.want_lto = want_lto;
|
||||
lib.setTarget(target);
|
||||
lib.setBuildMode(optimize);
|
||||
const optimize = b.standardOptimizeOption(.{});
|
||||
const lib = b.addStaticLibrary(.{
|
||||
.name = "llama",
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
lib.linkLibC();
|
||||
lib.linkLibCpp();
|
||||
lib.addIncludePath(".");
|
||||
lib.addIncludePath("examples");
|
||||
lib.addIncludePath("./examples");
|
||||
lib.addCSourceFiles(&.{
|
||||
"ggml.c",
|
||||
}, &.{"-std=c11"});
|
||||
lib.addCSourceFiles(&.{
|
||||
"llama.cpp",
|
||||
}, &.{"-std=c++11"});
|
||||
lib.install();
|
||||
b.installArtifact(lib);
|
||||
|
||||
const build_args = .{ .b = b, .lib = lib, .target = target, .optimize = optimize, .want_lto = want_lto };
|
||||
const examples = .{
|
||||
"main",
|
||||
"baby-llama",
|
||||
"embedding",
|
||||
// "metal",
|
||||
"perplexity",
|
||||
"quantize",
|
||||
"quantize-stats",
|
||||
"save-load-state",
|
||||
// "server",
|
||||
"simple",
|
||||
"train-text-from-scratch",
|
||||
};
|
||||
|
||||
const exe = build_example("main", build_args);
|
||||
_ = build_example("quantize", build_args);
|
||||
_ = build_example("perplexity", build_args);
|
||||
_ = build_example("embedding", build_args);
|
||||
|
||||
// create "zig build run" command for ./main
|
||||
|
||||
const run_cmd = exe.run();
|
||||
run_cmd.step.dependOn(b.getInstallStep());
|
||||
if (b.args) |args| {
|
||||
run_cmd.addArgs(args);
|
||||
}
|
||||
|
||||
const run_step = b.step("run", "Run the app");
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
}
|
||||
|
||||
fn build_example(comptime name: []const u8, args: anytype) *std.build.LibExeObjStep {
|
||||
const b = args.b;
|
||||
const lib = args.lib;
|
||||
const want_lto = args.want_lto;
|
||||
|
||||
const exe = b.addExecutable(name, null);
|
||||
exe.want_lto = want_lto;
|
||||
lib.setTarget(args.target);
|
||||
lib.setBuildMode(args.optimize);
|
||||
inline for (examples) |example_name| {
|
||||
const exe = b.addExecutable(.{
|
||||
.name = example_name,
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
});
|
||||
exe.addIncludePath(".");
|
||||
exe.addIncludePath("examples");
|
||||
exe.addIncludePath("./examples");
|
||||
exe.addCSourceFiles(&.{
|
||||
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{name, name}),
|
||||
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{example_name, example_name}),
|
||||
"examples/common.cpp",
|
||||
}, &.{"-std=c++11"});
|
||||
exe.linkLibrary(lib);
|
||||
exe.install();
|
||||
|
||||
return exe;
|
||||
b.installArtifact(exe);
|
||||
const run_cmd = b.addRunArtifact(exe);
|
||||
run_cmd.step.dependOn(b.getInstallStep());
|
||||
if (b.args) |args| run_cmd.addArgs(args);
|
||||
const run_step = b.step("run_" ++ example_name, "Run the app");
|
||||
run_step.dependOn(&run_cmd.step);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -374,10 +374,10 @@ struct llama_server_context {
|
|||
result.tok = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||
} else {
|
||||
// Temperature sampling
|
||||
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
|
||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
|
||||
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
|
||||
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
|
||||
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
result.tok = llama_sample_token(ctx, &candidates_p);
|
||||
}
|
||||
|
|
|
@ -2635,7 +2635,7 @@ void ggml_cuda_free_scratch() {
|
|||
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
|
||||
ggml_cuda_func_t func;
|
||||
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|
||||
|| tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT
|
||||
|| (tensor->src0 != nullptr && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT))
|
||||
|| (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU);
|
||||
|
||||
switch (tensor->op) {
|
||||
|
|
371
ggml.c
371
ggml.c
|
@ -1,5 +1,5 @@
|
|||
// Defines CLOCK_MONOTONIC on Linux
|
||||
#define _GNU_SOURCE
|
||||
#define _GNU_SOURCE // Defines CLOCK_MONOTONIC on Linux
|
||||
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
|
@ -131,6 +131,34 @@ typedef void* thread_ret_t;
|
|||
#define GGML_MEM_ALIGN 16
|
||||
#endif
|
||||
|
||||
//
|
||||
// logging
|
||||
//
|
||||
|
||||
#if (GGML_DEBUG >= 1)
|
||||
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define GGML_PRINT_DEBUG(...)
|
||||
#endif
|
||||
|
||||
#if (GGML_DEBUG >= 5)
|
||||
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define GGML_PRINT_DEBUG_5(...)
|
||||
#endif
|
||||
|
||||
#if (GGML_DEBUG >= 10)
|
||||
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define GGML_PRINT_DEBUG_10(...)
|
||||
#endif
|
||||
|
||||
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
||||
|
||||
//
|
||||
// end of logging block
|
||||
//
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
||||
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||||
|
@ -144,6 +172,17 @@ inline static void* ggml_aligned_malloc(size_t size) {
|
|||
#endif
|
||||
if (result != 0) {
|
||||
// Handle allocation failure
|
||||
const char *error_desc = "unknown allocation error";
|
||||
switch (result) {
|
||||
case EINVAL:
|
||||
error_desc = "invalid alignment value";
|
||||
break;
|
||||
case ENOMEM:
|
||||
error_desc = "insufficient memory";
|
||||
break;
|
||||
}
|
||||
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n",
|
||||
__func__, error_desc, size/(1024.0*1024.0));
|
||||
return NULL;
|
||||
}
|
||||
return aligned_memory;
|
||||
|
@ -3530,30 +3569,6 @@ inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x
|
|||
*s = 1.f/(*s);
|
||||
}
|
||||
|
||||
//
|
||||
// logging
|
||||
//
|
||||
|
||||
#if (GGML_DEBUG >= 1)
|
||||
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define GGML_PRINT_DEBUG(...)
|
||||
#endif
|
||||
|
||||
#if (GGML_DEBUG >= 5)
|
||||
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define GGML_PRINT_DEBUG_5(...)
|
||||
#endif
|
||||
|
||||
#if (GGML_DEBUG >= 10)
|
||||
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
|
||||
#else
|
||||
#define GGML_PRINT_DEBUG_10(...)
|
||||
#endif
|
||||
|
||||
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
||||
|
||||
//
|
||||
// data types
|
||||
//
|
||||
|
@ -3713,11 +3728,15 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||
"MAP_UNARY",
|
||||
"MAP_BINARY",
|
||||
|
||||
"MAP_CUSTOM1",
|
||||
"MAP_CUSTOM2",
|
||||
"MAP_CUSTOM3",
|
||||
|
||||
"CROSS_ENTROPY_LOSS",
|
||||
"CROSS_ENTROPY_LOSS_BACK",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 61, "GGML_OP_COUNT != 61");
|
||||
static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
|
||||
|
||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||
"none",
|
||||
|
@ -3785,11 +3804,15 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||
"f(x)",
|
||||
"f(x,y)",
|
||||
|
||||
"custom(x)",
|
||||
"custom(x,y)",
|
||||
"custom(x,y,z)",
|
||||
|
||||
"cross_entropy_loss(x,y)",
|
||||
"cross_entropy_loss_back(x,y)",
|
||||
};
|
||||
|
||||
static_assert(GGML_OP_COUNT == 61, "GGML_OP_COUNT != 61");
|
||||
static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
|
||||
|
||||
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
||||
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
|
||||
|
@ -7094,9 +7117,14 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
|
|||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_scratch_save(ctx);
|
||||
|
||||
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_scratch_load(ctx);
|
||||
|
||||
result->op = GGML_OP_MAP_UNARY;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7136,9 +7164,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
|
|||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_scratch_save(ctx);
|
||||
|
||||
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_scratch_load(ctx);
|
||||
|
||||
result->op = GGML_OP_MAP_BINARY;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
|
@ -7165,6 +7198,150 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|||
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom1
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_f32_t fun,
|
||||
bool inplace) {
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && a->grad) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_scratch_save(ctx);
|
||||
|
||||
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||
|
||||
ggml_scratch_load(ctx);
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM1;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src0 = a;
|
||||
result->opt[0] = addr_tensor;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
return ggml_map_custom1_impl_f32(ctx, a, fun, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom2
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_f32_t fun,
|
||||
bool inplace) {
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad)) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_scratch_save(ctx);
|
||||
|
||||
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||
|
||||
ggml_scratch_load(ctx);
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM2;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src0 = a;
|
||||
result->src1 = b;
|
||||
result->opt[0] = addr_tensor;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
return ggml_map_custom2_impl_f32(ctx, a, b, fun, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
|
||||
}
|
||||
|
||||
// ggml_map_custom3
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3_impl_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_f32_t fun,
|
||||
bool inplace) {
|
||||
bool is_node = false;
|
||||
|
||||
if (!inplace && (a->grad || b->grad || c->grad)) {
|
||||
is_node = true;
|
||||
}
|
||||
|
||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||
|
||||
ggml_scratch_save(ctx);
|
||||
|
||||
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||
|
||||
ggml_scratch_load(ctx);
|
||||
|
||||
result->op = GGML_OP_MAP_CUSTOM3;
|
||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||
result->src0 = a;
|
||||
result->src1 = b;
|
||||
result->opt[0] = addr_tensor;
|
||||
result->opt[1] = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, false);
|
||||
}
|
||||
|
||||
struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
|
||||
}
|
||||
|
||||
// ggml_cross_entropy_loss
|
||||
|
||||
struct ggml_tensor * ggml_cross_entropy_loss(
|
||||
|
@ -14621,6 +14798,114 @@ static void ggml_compute_forward_map_binary(
|
|||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom1
|
||||
|
||||
static void ggml_compute_forward_map_custom1_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
assert(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
fun(dst, a);
|
||||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom1(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom1_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom2
|
||||
|
||||
static void ggml_compute_forward_map_custom2_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
assert(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
fun(dst, a, b);
|
||||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom2(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom2_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_map_custom3
|
||||
|
||||
static void ggml_compute_forward_map_custom3_f32(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
const struct ggml_tensor * c,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
assert(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
||||
fun(dst, a, b, c);
|
||||
}
|
||||
|
||||
|
||||
static void ggml_compute_forward_map_custom3(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * a,
|
||||
const struct ggml_tensor * b,
|
||||
const struct ggml_tensor * c,
|
||||
struct ggml_tensor * dst,
|
||||
const ggml_custom3_op_f32_t fun) {
|
||||
switch (a->type) {
|
||||
case GGML_TYPE_F32:
|
||||
{
|
||||
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
|
||||
} break;
|
||||
default:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_cross_entropy_loss
|
||||
|
||||
static void ggml_compute_forward_cross_entropy_loss_f32(
|
||||
|
@ -14911,7 +15196,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
if (skip_cpu) {
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(tensor->src0->backend == GGML_BACKEND_CPU);
|
||||
GGML_ASSERT(tensor->src0 == NULL || tensor->src0->backend == GGML_BACKEND_CPU);
|
||||
GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU);
|
||||
#endif // GGML_USE_CUBLAS
|
||||
|
||||
|
@ -15158,6 +15443,24 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
{
|
||||
const ggml_custom1_op_f32_t fun = *((ggml_custom1_op_f32_t *)tensor->opt[0]->data);
|
||||
ggml_compute_forward_map_custom1(params, tensor->src0, tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
{
|
||||
const ggml_custom2_op_f32_t fun = *((ggml_custom2_op_f32_t *)tensor->opt[0]->data);
|
||||
ggml_compute_forward_map_custom2(params, tensor->src0, tensor->src1, tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
const ggml_custom3_op_f32_t fun = *((ggml_custom3_op_f32_t *)tensor->opt[0]->data);
|
||||
ggml_compute_forward_map_custom3(params, tensor->src0, tensor->src1, tensor->opt[1], tensor, fun);
|
||||
}
|
||||
break;
|
||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||
{
|
||||
ggml_compute_forward_cross_entropy_loss(params, tensor->src0, tensor->src1, tensor);
|
||||
|
@ -15964,6 +16267,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|||
case GGML_OP_WIN_UNPART:
|
||||
case GGML_OP_MAP_UNARY:
|
||||
case GGML_OP_MAP_BINARY:
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
GGML_ASSERT(false); // not supported
|
||||
} break;
|
||||
|
@ -16605,6 +16911,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|||
case GGML_OP_WIN_UNPART:
|
||||
case GGML_OP_MAP_UNARY:
|
||||
case GGML_OP_MAP_BINARY:
|
||||
case GGML_OP_MAP_CUSTOM1:
|
||||
case GGML_OP_MAP_CUSTOM2:
|
||||
case GGML_OP_MAP_CUSTOM3:
|
||||
{
|
||||
node->n_tasks = 1;
|
||||
} break;
|
||||
|
|
60
ggml.h
60
ggml.h
|
@ -345,6 +345,10 @@ extern "C" {
|
|||
GGML_OP_MAP_UNARY,
|
||||
GGML_OP_MAP_BINARY,
|
||||
|
||||
GGML_OP_MAP_CUSTOM1,
|
||||
GGML_OP_MAP_CUSTOM2,
|
||||
GGML_OP_MAP_CUSTOM3,
|
||||
|
||||
GGML_OP_CROSS_ENTROPY_LOSS,
|
||||
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
||||
|
||||
|
@ -1167,21 +1171,73 @@ extern "C" {
|
|||
int h0,
|
||||
int w);
|
||||
|
||||
// Mapping operations
|
||||
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
|
||||
// custom operators
|
||||
|
||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||
|
||||
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_unary_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_unary_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_binary_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_binary_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
ggml_custom1_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
ggml_custom2_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_f32_t fun);
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||
struct ggml_context * ctx,
|
||||
struct ggml_tensor * a,
|
||||
struct ggml_tensor * b,
|
||||
struct ggml_tensor * c,
|
||||
ggml_custom3_op_f32_t fun);
|
||||
|
||||
// loss function
|
||||
|
||||
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
|
||||
#include "ggml.h"
|
||||
|
||||
#include <math.h>
|
||||
|
@ -5,6 +6,10 @@
|
|||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
#endif
|
||||
|
||||
#define MAX_NARGS 3
|
||||
|
||||
#undef MIN
|
||||
|
@ -197,8 +202,23 @@ bool check_gradient(
|
|||
float max_error_abs,
|
||||
float max_error_rel) {
|
||||
|
||||
static int n_threads = -1;
|
||||
if (n_threads < 0) {
|
||||
n_threads = GGML_DEFAULT_N_THREADS;
|
||||
|
||||
const char *env = getenv("GGML_N_THREADS");
|
||||
if (env) {
|
||||
n_threads = atoi(env);
|
||||
}
|
||||
|
||||
printf("GGML_N_THREADS = %d\n", n_threads);
|
||||
}
|
||||
|
||||
struct ggml_cgraph gf = ggml_build_forward (f);
|
||||
gf.n_threads = n_threads;
|
||||
|
||||
struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
|
||||
gb.n_threads = n_threads;
|
||||
|
||||
ggml_graph_compute(ctx0, &gf);
|
||||
ggml_graph_reset (&gf);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue