Merge branch 'ggerganov:master' into master
This commit is contained in:
commit
af058cf820
7 changed files with 466 additions and 81 deletions
|
@ -5,12 +5,15 @@
|
||||||
[](https://github.com/ggerganov/llama.cpp/actions)
|
[](https://github.com/ggerganov/llama.cpp/actions)
|
||||||
[](https://opensource.org/licenses/MIT)
|
[](https://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml)
|
||||||
|
|
||||||
Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
||||||
|
|
||||||
**Hot topics:**
|
**Hot topics:**
|
||||||
|
|
||||||
|
- New roadmap: https://github.com/users/ggerganov/projects/7
|
||||||
|
- Azure CI brainstorming: https://github.com/ggerganov/llama.cpp/discussions/1985
|
||||||
- p1 : LLM-based code completion engine at the edge : https://github.com/ggml-org/p1/discussions/1
|
- p1 : LLM-based code completion engine at the edge : https://github.com/ggml-org/p1/discussions/1
|
||||||
- Roadmap June 2023: https://github.com/ggerganov/llama.cpp/discussions/1729
|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>Table of Contents</summary>
|
<summary>Table of Contents</summary>
|
||||||
|
|
79
build.zig
79
build.zig
|
@ -1,61 +1,58 @@
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
|
|
||||||
|
// Zig Version: 0.11.0-dev.3379+629f0d23b
|
||||||
pub fn build(b: *std.build.Builder) void {
|
pub fn build(b: *std.build.Builder) void {
|
||||||
const target = b.standardTargetOptions(.{});
|
const target = b.standardTargetOptions(.{});
|
||||||
const optimize = b.standardReleaseOptions();
|
const optimize = b.standardOptimizeOption(.{});
|
||||||
const want_lto = b.option(bool, "lto", "Want -fLTO");
|
const lib = b.addStaticLibrary(.{
|
||||||
|
.name = "llama",
|
||||||
const lib = b.addStaticLibrary("llama", null);
|
.target = target,
|
||||||
lib.want_lto = want_lto;
|
.optimize = optimize,
|
||||||
lib.setTarget(target);
|
});
|
||||||
lib.setBuildMode(optimize);
|
lib.linkLibC();
|
||||||
lib.linkLibCpp();
|
lib.linkLibCpp();
|
||||||
lib.addIncludePath(".");
|
lib.addIncludePath(".");
|
||||||
lib.addIncludePath("examples");
|
lib.addIncludePath("./examples");
|
||||||
lib.addCSourceFiles(&.{
|
lib.addCSourceFiles(&.{
|
||||||
"ggml.c",
|
"ggml.c",
|
||||||
}, &.{"-std=c11"});
|
}, &.{"-std=c11"});
|
||||||
lib.addCSourceFiles(&.{
|
lib.addCSourceFiles(&.{
|
||||||
"llama.cpp",
|
"llama.cpp",
|
||||||
}, &.{"-std=c++11"});
|
}, &.{"-std=c++11"});
|
||||||
lib.install();
|
b.installArtifact(lib);
|
||||||
|
|
||||||
const build_args = .{ .b = b, .lib = lib, .target = target, .optimize = optimize, .want_lto = want_lto };
|
const examples = .{
|
||||||
|
"main",
|
||||||
|
"baby-llama",
|
||||||
|
"embedding",
|
||||||
|
// "metal",
|
||||||
|
"perplexity",
|
||||||
|
"quantize",
|
||||||
|
"quantize-stats",
|
||||||
|
"save-load-state",
|
||||||
|
// "server",
|
||||||
|
"simple",
|
||||||
|
"train-text-from-scratch",
|
||||||
|
};
|
||||||
|
|
||||||
const exe = build_example("main", build_args);
|
inline for (examples) |example_name| {
|
||||||
_ = build_example("quantize", build_args);
|
const exe = b.addExecutable(.{
|
||||||
_ = build_example("perplexity", build_args);
|
.name = example_name,
|
||||||
_ = build_example("embedding", build_args);
|
.target = target,
|
||||||
|
.optimize = optimize,
|
||||||
// create "zig build run" command for ./main
|
});
|
||||||
|
|
||||||
const run_cmd = exe.run();
|
|
||||||
run_cmd.step.dependOn(b.getInstallStep());
|
|
||||||
if (b.args) |args| {
|
|
||||||
run_cmd.addArgs(args);
|
|
||||||
}
|
|
||||||
|
|
||||||
const run_step = b.step("run", "Run the app");
|
|
||||||
run_step.dependOn(&run_cmd.step);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn build_example(comptime name: []const u8, args: anytype) *std.build.LibExeObjStep {
|
|
||||||
const b = args.b;
|
|
||||||
const lib = args.lib;
|
|
||||||
const want_lto = args.want_lto;
|
|
||||||
|
|
||||||
const exe = b.addExecutable(name, null);
|
|
||||||
exe.want_lto = want_lto;
|
|
||||||
lib.setTarget(args.target);
|
|
||||||
lib.setBuildMode(args.optimize);
|
|
||||||
exe.addIncludePath(".");
|
exe.addIncludePath(".");
|
||||||
exe.addIncludePath("examples");
|
exe.addIncludePath("./examples");
|
||||||
exe.addCSourceFiles(&.{
|
exe.addCSourceFiles(&.{
|
||||||
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{name, name}),
|
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{example_name, example_name}),
|
||||||
"examples/common.cpp",
|
"examples/common.cpp",
|
||||||
}, &.{"-std=c++11"});
|
}, &.{"-std=c++11"});
|
||||||
exe.linkLibrary(lib);
|
exe.linkLibrary(lib);
|
||||||
exe.install();
|
b.installArtifact(exe);
|
||||||
|
const run_cmd = b.addRunArtifact(exe);
|
||||||
return exe;
|
run_cmd.step.dependOn(b.getInstallStep());
|
||||||
|
if (b.args) |args| run_cmd.addArgs(args);
|
||||||
|
const run_step = b.step("run_" ++ example_name, "Run the app");
|
||||||
|
run_step.dependOn(&run_cmd.step);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -374,10 +374,10 @@ struct llama_server_context {
|
||||||
result.tok = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
result.tok = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||||
} else {
|
} else {
|
||||||
// Temperature sampling
|
// Temperature sampling
|
||||||
|
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
|
||||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
|
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
|
||||||
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
|
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
|
||||||
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
|
llama_sample_top_p(ctx, &candidates_p, top_p, 1);
|
||||||
llama_sample_top_k(ctx, &candidates_p, top_k, 1);
|
|
||||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||||
result.tok = llama_sample_token(ctx, &candidates_p);
|
result.tok = llama_sample_token(ctx, &candidates_p);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2635,7 +2635,7 @@ void ggml_cuda_free_scratch() {
|
||||||
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
|
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
|
||||||
ggml_cuda_func_t func;
|
ggml_cuda_func_t func;
|
||||||
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|
||||||
|| tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT
|
|| (tensor->src0 != nullptr && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT))
|
||||||
|| (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU);
|
|| (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU);
|
||||||
|
|
||||||
switch (tensor->op) {
|
switch (tensor->op) {
|
||||||
|
|
371
ggml.c
371
ggml.c
|
@ -1,5 +1,5 @@
|
||||||
// Defines CLOCK_MONOTONIC on Linux
|
#define _GNU_SOURCE // Defines CLOCK_MONOTONIC on Linux
|
||||||
#define _GNU_SOURCE
|
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
|
||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
|
@ -131,6 +131,34 @@ typedef void* thread_ret_t;
|
||||||
#define GGML_MEM_ALIGN 16
|
#define GGML_MEM_ALIGN 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// logging
|
||||||
|
//
|
||||||
|
|
||||||
|
#if (GGML_DEBUG >= 1)
|
||||||
|
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define GGML_PRINT_DEBUG(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (GGML_DEBUG >= 5)
|
||||||
|
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define GGML_PRINT_DEBUG_5(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (GGML_DEBUG >= 10)
|
||||||
|
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define GGML_PRINT_DEBUG_10(...)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
||||||
|
|
||||||
|
//
|
||||||
|
// end of logging block
|
||||||
|
//
|
||||||
|
|
||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
||||||
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||||||
|
@ -144,6 +172,17 @@ inline static void* ggml_aligned_malloc(size_t size) {
|
||||||
#endif
|
#endif
|
||||||
if (result != 0) {
|
if (result != 0) {
|
||||||
// Handle allocation failure
|
// Handle allocation failure
|
||||||
|
const char *error_desc = "unknown allocation error";
|
||||||
|
switch (result) {
|
||||||
|
case EINVAL:
|
||||||
|
error_desc = "invalid alignment value";
|
||||||
|
break;
|
||||||
|
case ENOMEM:
|
||||||
|
error_desc = "insufficient memory";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n",
|
||||||
|
__func__, error_desc, size/(1024.0*1024.0));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return aligned_memory;
|
return aligned_memory;
|
||||||
|
@ -3530,30 +3569,6 @@ inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x
|
||||||
*s = 1.f/(*s);
|
*s = 1.f/(*s);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// logging
|
|
||||||
//
|
|
||||||
|
|
||||||
#if (GGML_DEBUG >= 1)
|
|
||||||
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
|
|
||||||
#else
|
|
||||||
#define GGML_PRINT_DEBUG(...)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (GGML_DEBUG >= 5)
|
|
||||||
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
|
|
||||||
#else
|
|
||||||
#define GGML_PRINT_DEBUG_5(...)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (GGML_DEBUG >= 10)
|
|
||||||
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
|
|
||||||
#else
|
|
||||||
#define GGML_PRINT_DEBUG_10(...)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// data types
|
// data types
|
||||||
//
|
//
|
||||||
|
@ -3713,11 +3728,15 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
||||||
"MAP_UNARY",
|
"MAP_UNARY",
|
||||||
"MAP_BINARY",
|
"MAP_BINARY",
|
||||||
|
|
||||||
|
"MAP_CUSTOM1",
|
||||||
|
"MAP_CUSTOM2",
|
||||||
|
"MAP_CUSTOM3",
|
||||||
|
|
||||||
"CROSS_ENTROPY_LOSS",
|
"CROSS_ENTROPY_LOSS",
|
||||||
"CROSS_ENTROPY_LOSS_BACK",
|
"CROSS_ENTROPY_LOSS_BACK",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GGML_OP_COUNT == 61, "GGML_OP_COUNT != 61");
|
static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
|
||||||
|
|
||||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||||
"none",
|
"none",
|
||||||
|
@ -3785,11 +3804,15 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||||
"f(x)",
|
"f(x)",
|
||||||
"f(x,y)",
|
"f(x,y)",
|
||||||
|
|
||||||
|
"custom(x)",
|
||||||
|
"custom(x,y)",
|
||||||
|
"custom(x,y,z)",
|
||||||
|
|
||||||
"cross_entropy_loss(x,y)",
|
"cross_entropy_loss(x,y)",
|
||||||
"cross_entropy_loss_back(x,y)",
|
"cross_entropy_loss_back(x,y)",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GGML_OP_COUNT == 61, "GGML_OP_COUNT != 61");
|
static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
|
||||||
|
|
||||||
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
||||||
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
|
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
|
||||||
|
@ -7094,9 +7117,14 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
|
||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
|
ggml_scratch_save(ctx);
|
||||||
|
|
||||||
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||||
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
||||||
|
ggml_scratch_load(ctx);
|
||||||
|
|
||||||
result->op = GGML_OP_MAP_UNARY;
|
result->op = GGML_OP_MAP_UNARY;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
@ -7136,9 +7164,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
|
||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
|
ggml_scratch_save(ctx);
|
||||||
|
|
||||||
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||||
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||||
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
||||||
|
ggml_scratch_load(ctx);
|
||||||
|
|
||||||
result->op = GGML_OP_MAP_BINARY;
|
result->op = GGML_OP_MAP_BINARY;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
@ -7165,6 +7198,150 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||||
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_map_custom1
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom1_impl_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
const ggml_custom1_op_f32_t fun,
|
||||||
|
bool inplace) {
|
||||||
|
bool is_node = false;
|
||||||
|
|
||||||
|
if (!inplace && a->grad) {
|
||||||
|
is_node = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
|
ggml_scratch_save(ctx);
|
||||||
|
|
||||||
|
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||||
|
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||||
|
|
||||||
|
ggml_scratch_load(ctx);
|
||||||
|
|
||||||
|
result->op = GGML_OP_MAP_CUSTOM1;
|
||||||
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
result->src0 = a;
|
||||||
|
result->opt[0] = addr_tensor;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom1_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
const ggml_custom1_op_f32_t fun) {
|
||||||
|
return ggml_map_custom1_impl_f32(ctx, a, fun, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
const ggml_custom1_op_f32_t fun) {
|
||||||
|
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_map_custom2
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom2_impl_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
const ggml_custom2_op_f32_t fun,
|
||||||
|
bool inplace) {
|
||||||
|
bool is_node = false;
|
||||||
|
|
||||||
|
if (!inplace && (a->grad || b->grad)) {
|
||||||
|
is_node = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
|
ggml_scratch_save(ctx);
|
||||||
|
|
||||||
|
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||||
|
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||||
|
|
||||||
|
ggml_scratch_load(ctx);
|
||||||
|
|
||||||
|
result->op = GGML_OP_MAP_CUSTOM2;
|
||||||
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
result->src0 = a;
|
||||||
|
result->src1 = b;
|
||||||
|
result->opt[0] = addr_tensor;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom2_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
const ggml_custom2_op_f32_t fun) {
|
||||||
|
return ggml_map_custom2_impl_f32(ctx, a, b, fun, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
const ggml_custom2_op_f32_t fun) {
|
||||||
|
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_map_custom3
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom3_impl_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
const ggml_custom3_op_f32_t fun,
|
||||||
|
bool inplace) {
|
||||||
|
bool is_node = false;
|
||||||
|
|
||||||
|
if (!inplace && (a->grad || b->grad || c->grad)) {
|
||||||
|
is_node = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
|
ggml_scratch_save(ctx);
|
||||||
|
|
||||||
|
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
||||||
|
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
||||||
|
|
||||||
|
ggml_scratch_load(ctx);
|
||||||
|
|
||||||
|
result->op = GGML_OP_MAP_CUSTOM3;
|
||||||
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
result->src0 = a;
|
||||||
|
result->src1 = b;
|
||||||
|
result->opt[0] = addr_tensor;
|
||||||
|
result->opt[1] = c;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom3_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
const ggml_custom3_op_f32_t fun) {
|
||||||
|
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
const ggml_custom3_op_f32_t fun) {
|
||||||
|
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
|
||||||
|
}
|
||||||
|
|
||||||
// ggml_cross_entropy_loss
|
// ggml_cross_entropy_loss
|
||||||
|
|
||||||
struct ggml_tensor * ggml_cross_entropy_loss(
|
struct ggml_tensor * ggml_cross_entropy_loss(
|
||||||
|
@ -14621,6 +14798,114 @@ static void ggml_compute_forward_map_binary(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_compute_forward_map_custom1
|
||||||
|
|
||||||
|
static void ggml_compute_forward_map_custom1_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * dst,
|
||||||
|
const ggml_custom1_op_f32_t fun) {
|
||||||
|
assert(params->ith == 0);
|
||||||
|
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fun(dst, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void ggml_compute_forward_map_custom1(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * dst,
|
||||||
|
const ggml_custom1_op_f32_t fun) {
|
||||||
|
switch (a->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_compute_forward_map_custom2
|
||||||
|
|
||||||
|
static void ggml_compute_forward_map_custom2_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * a,
|
||||||
|
const struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * dst,
|
||||||
|
const ggml_custom2_op_f32_t fun) {
|
||||||
|
assert(params->ith == 0);
|
||||||
|
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fun(dst, a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void ggml_compute_forward_map_custom2(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * a,
|
||||||
|
const struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * dst,
|
||||||
|
const ggml_custom2_op_f32_t fun) {
|
||||||
|
switch (a->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_compute_forward_map_custom3
|
||||||
|
|
||||||
|
static void ggml_compute_forward_map_custom3_f32(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * a,
|
||||||
|
const struct ggml_tensor * b,
|
||||||
|
const struct ggml_tensor * c,
|
||||||
|
struct ggml_tensor * dst,
|
||||||
|
const ggml_custom3_op_f32_t fun) {
|
||||||
|
assert(params->ith == 0);
|
||||||
|
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
fun(dst, a, b, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void ggml_compute_forward_map_custom3(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * a,
|
||||||
|
const struct ggml_tensor * b,
|
||||||
|
const struct ggml_tensor * c,
|
||||||
|
struct ggml_tensor * dst,
|
||||||
|
const ggml_custom3_op_f32_t fun) {
|
||||||
|
switch (a->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
|
||||||
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ggml_compute_forward_cross_entropy_loss
|
// ggml_compute_forward_cross_entropy_loss
|
||||||
|
|
||||||
static void ggml_compute_forward_cross_entropy_loss_f32(
|
static void ggml_compute_forward_cross_entropy_loss_f32(
|
||||||
|
@ -14911,7 +15196,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||||
if (skip_cpu) {
|
if (skip_cpu) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
GGML_ASSERT(tensor->src0->backend == GGML_BACKEND_CPU);
|
GGML_ASSERT(tensor->src0 == NULL || tensor->src0->backend == GGML_BACKEND_CPU);
|
||||||
GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU);
|
GGML_ASSERT(tensor->src1 == NULL || tensor->src1->backend == GGML_BACKEND_CPU);
|
||||||
#endif // GGML_USE_CUBLAS
|
#endif // GGML_USE_CUBLAS
|
||||||
|
|
||||||
|
@ -15158,6 +15443,24 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
||||||
ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun);
|
ggml_compute_forward_map_binary(params, tensor->src0, tensor->src1, tensor, fun);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case GGML_OP_MAP_CUSTOM1:
|
||||||
|
{
|
||||||
|
const ggml_custom1_op_f32_t fun = *((ggml_custom1_op_f32_t *)tensor->opt[0]->data);
|
||||||
|
ggml_compute_forward_map_custom1(params, tensor->src0, tensor, fun);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GGML_OP_MAP_CUSTOM2:
|
||||||
|
{
|
||||||
|
const ggml_custom2_op_f32_t fun = *((ggml_custom2_op_f32_t *)tensor->opt[0]->data);
|
||||||
|
ggml_compute_forward_map_custom2(params, tensor->src0, tensor->src1, tensor, fun);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GGML_OP_MAP_CUSTOM3:
|
||||||
|
{
|
||||||
|
const ggml_custom3_op_f32_t fun = *((ggml_custom3_op_f32_t *)tensor->opt[0]->data);
|
||||||
|
ggml_compute_forward_map_custom3(params, tensor->src0, tensor->src1, tensor->opt[1], tensor, fun);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||||
{
|
{
|
||||||
ggml_compute_forward_cross_entropy_loss(params, tensor->src0, tensor->src1, tensor);
|
ggml_compute_forward_cross_entropy_loss(params, tensor->src0, tensor->src1, tensor);
|
||||||
|
@ -15964,6 +16267,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
||||||
case GGML_OP_WIN_UNPART:
|
case GGML_OP_WIN_UNPART:
|
||||||
case GGML_OP_MAP_UNARY:
|
case GGML_OP_MAP_UNARY:
|
||||||
case GGML_OP_MAP_BINARY:
|
case GGML_OP_MAP_BINARY:
|
||||||
|
case GGML_OP_MAP_CUSTOM1:
|
||||||
|
case GGML_OP_MAP_CUSTOM2:
|
||||||
|
case GGML_OP_MAP_CUSTOM3:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(false); // not supported
|
GGML_ASSERT(false); // not supported
|
||||||
} break;
|
} break;
|
||||||
|
@ -16605,6 +16911,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
||||||
case GGML_OP_WIN_UNPART:
|
case GGML_OP_WIN_UNPART:
|
||||||
case GGML_OP_MAP_UNARY:
|
case GGML_OP_MAP_UNARY:
|
||||||
case GGML_OP_MAP_BINARY:
|
case GGML_OP_MAP_BINARY:
|
||||||
|
case GGML_OP_MAP_CUSTOM1:
|
||||||
|
case GGML_OP_MAP_CUSTOM2:
|
||||||
|
case GGML_OP_MAP_CUSTOM3:
|
||||||
{
|
{
|
||||||
node->n_tasks = 1;
|
node->n_tasks = 1;
|
||||||
} break;
|
} break;
|
||||||
|
|
58
ggml.h
58
ggml.h
|
@ -345,6 +345,10 @@ extern "C" {
|
||||||
GGML_OP_MAP_UNARY,
|
GGML_OP_MAP_UNARY,
|
||||||
GGML_OP_MAP_BINARY,
|
GGML_OP_MAP_BINARY,
|
||||||
|
|
||||||
|
GGML_OP_MAP_CUSTOM1,
|
||||||
|
GGML_OP_MAP_CUSTOM2,
|
||||||
|
GGML_OP_MAP_CUSTOM3,
|
||||||
|
|
||||||
GGML_OP_CROSS_ENTROPY_LOSS,
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
||||||
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
||||||
|
|
||||||
|
@ -1167,21 +1171,73 @@ extern "C" {
|
||||||
int h0,
|
int h0,
|
||||||
int w);
|
int w);
|
||||||
|
|
||||||
// Mapping operations
|
// custom operators
|
||||||
|
|
||||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||||
|
|
||||||
|
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||||
|
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||||
|
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
ggml_unary_op_f32_t fun);
|
ggml_unary_op_f32_t fun);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
ggml_unary_op_f32_t fun);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
ggml_binary_op_f32_t fun);
|
ggml_binary_op_f32_t fun);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
ggml_binary_op_f32_t fun);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
ggml_custom1_op_f32_t fun);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
ggml_custom1_op_f32_t fun);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
ggml_custom2_op_f32_t fun);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
ggml_custom2_op_f32_t fun);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
ggml_custom3_op_f32_t fun);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
ggml_custom3_op_f32_t fun);
|
||||||
|
|
||||||
// loss function
|
// loss function
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
@ -5,6 +6,10 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MAX_NARGS 3
|
#define MAX_NARGS 3
|
||||||
|
|
||||||
#undef MIN
|
#undef MIN
|
||||||
|
@ -197,8 +202,23 @@ bool check_gradient(
|
||||||
float max_error_abs,
|
float max_error_abs,
|
||||||
float max_error_rel) {
|
float max_error_rel) {
|
||||||
|
|
||||||
|
static int n_threads = -1;
|
||||||
|
if (n_threads < 0) {
|
||||||
|
n_threads = GGML_DEFAULT_N_THREADS;
|
||||||
|
|
||||||
|
const char *env = getenv("GGML_N_THREADS");
|
||||||
|
if (env) {
|
||||||
|
n_threads = atoi(env);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("GGML_N_THREADS = %d\n", n_threads);
|
||||||
|
}
|
||||||
|
|
||||||
struct ggml_cgraph gf = ggml_build_forward (f);
|
struct ggml_cgraph gf = ggml_build_forward (f);
|
||||||
|
gf.n_threads = n_threads;
|
||||||
|
|
||||||
struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
|
struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
|
||||||
|
gb.n_threads = n_threads;
|
||||||
|
|
||||||
ggml_graph_compute(ctx0, &gf);
|
ggml_graph_compute(ctx0, &gf);
|
||||||
ggml_graph_reset (&gf);
|
ggml_graph_reset (&gf);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue