Merge 'origin/master' into hipblas

This commit is contained in:
Henri Vasserman 2023-08-09 00:37:01 +03:00
commit ab6212864c
No known key found for this signature in database
GPG key ID: 2995FC0F58B1A986
38 changed files with 4227 additions and 2795 deletions

View file

@ -73,7 +73,7 @@ set(LLAMA_CUDA_MMQ_Y "64" CACHE STRING "llama: y tile size for mmq CUDA ke
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
option(LLAMA_CUDA_DMMV_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF) option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)
option(LLAMA_CLBLAST "llama: use CLBlast" OFF) option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
@ -266,8 +266,8 @@ if (LLAMA_CUBLAS)
if (DEFINED LLAMA_CUDA_DMMV_Y) if (DEFINED LLAMA_CUDA_DMMV_Y)
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
endif() endif()
if (LLAMA_CUDA_DMMV_F16) if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
add_compile_definitions(GGML_CUDA_DMMV_F16) add_compile_definitions(GGML_CUDA_F16)
endif() endif()
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
@ -281,8 +281,8 @@ if (LLAMA_CUBLAS)
# 52 == lowest CUDA 12 standard # 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics # 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics # 61 == integer CUDA intrinsics
# 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
if (LLAMA_CUDA_DMMV_F16) if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
else() else()
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics

View file

@ -142,6 +142,28 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
#CXXFLAGS += -mssse3 #CXXFLAGS += -mssse3
endif endif
ifneq ($(filter aarch64%,$(UNAME_M)),)
# Apple M1, M2, etc.
# Raspberry Pi 3, 4, Zero 2 (64-bit)
CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native
endif
ifneq ($(filter armv6%,$(UNAME_M)),)
# Raspberry Pi 1, Zero
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
endif
ifneq ($(filter armv7%,$(UNAME_M)),)
# Raspberry Pi 2
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
endif
ifneq ($(filter armv8%,$(UNAME_M)),)
# Raspberry Pi 3, 4, Zero 2 (32-bit)
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
endif
ifneq ($(filter ppc64%,$(UNAME_M)),) ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
ifneq (,$(findstring POWER9,$(POWER9_M))) ifneq (,$(findstring POWER9,$(POWER9_M)))
@ -243,7 +265,7 @@ ifdef LLAMA_CUDA_CCBIN
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
endif endif
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ $(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) -Wno-pedantic -c $< -o $@
endif # LLAMA_CUBLAS endif # LLAMA_CUBLAS
ifdef LLAMA_CLBLAST ifdef LLAMA_CLBLAST
@ -293,28 +315,6 @@ ifdef LLAMA_METAL
OBJS += ggml-metal.o OBJS += ggml-metal.o
endif # LLAMA_METAL endif # LLAMA_METAL
ifneq ($(filter aarch64%,$(UNAME_M)),)
# Apple M1, M2, etc.
# Raspberry Pi 3, 4, Zero 2 (64-bit)
CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native
endif
ifneq ($(filter armv6%,$(UNAME_M)),)
# Raspberry Pi 1, Zero
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
endif
ifneq ($(filter armv7%,$(UNAME_M)),)
# Raspberry Pi 2
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
endif
ifneq ($(filter armv8%,$(UNAME_M)),)
# Raspberry Pi 3, 4, Zero 2 (32-bit)
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
endif
ifdef LLAMA_METAL ifdef LLAMA_METAL
ggml-metal.o: ggml-metal.m ggml-metal.h ggml-metal.o: ggml-metal.m ggml-metal.h
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -c $< -o $@
@ -363,6 +363,9 @@ llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h llama-ut
common.o: examples/common.cpp examples/common.h common.o: examples/common.cpp examples/common.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
console.o: examples/console.cpp examples/console.h
$(CXX) $(CXXFLAGS) -c $< -o $@
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
@ -376,7 +379,7 @@ clean:
# Examples # Examples
# #
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS) main: examples/main/main.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
@echo @echo
@echo '==== Run ./main -h for help. ====' @echo '==== Run ./main -h for help. ===='
@ -400,7 +403,7 @@ embedding: examples/embedding/embedding.cpp build-info.h ggml.
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS) save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o common.o $(OBJS) server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2)
$(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS)
@ -434,13 +437,13 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
tests/test-double-float: tests/test-double-float.c build-info.h ggml.o llama.o common.o $(OBJS) tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
tests/test-grad0: tests/test-grad0.c build-info.h ggml.o llama.o common.o $(OBJS) tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
tests/test-opt: tests/test-opt.c build-info.h ggml.o llama.o common.o $(OBJS) tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS) tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS)

View file

@ -80,7 +80,7 @@ as the main playground for developing new features for the [ggml](https://github
- [x] LLaMA 2 🦙🦙 - [x] LLaMA 2 🦙🦙
- [X] [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca) - [X] [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca)
- [X] [GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all) - [X] [GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all)
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) - [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne) - [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
- [X] [Vicuna](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5533894) - [X] [Vicuna](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5533894)
- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) - [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
@ -88,6 +88,7 @@ as the main playground for developing new features for the [ggml](https://github
- [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b) - [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b)
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM) - [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B) and its derivations (such as [baichuan-7b-sft](https://huggingface.co/hiyouga/baichuan-7b-sft)) - [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B) and its derivations (such as [baichuan-7b-sft](https://huggingface.co/hiyouga/baichuan-7b-sft))
- [X] [Aquila-7B](https://huggingface.co/BAAI/Aquila-7B) / [AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
**Bindings:** **Bindings:**
@ -522,6 +523,9 @@ Building the program with BLAS support may lead to some performance improvements
# obtain the original LLaMA model weights and place them in ./models # obtain the original LLaMA model weights and place them in ./models
ls ./models ls ./models
65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model 65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model
# [Optional] for models using BPE tokenizers
ls ./models
65B 30B 13B 7B vocab.json
# install Python dependencies # install Python dependencies
python3 -m pip install -r requirements.txt python3 -m pip install -r requirements.txt
@ -529,6 +533,9 @@ python3 -m pip install -r requirements.txt
# convert the 7B model to ggml FP16 format # convert the 7B model to ggml FP16 format
python3 convert.py models/7B/ python3 convert.py models/7B/
# [Optional] for models using BPE tokenizers
python convert.py models/7B/ --vocabtype bpe
# quantize the model to 4-bits (using q4_0 method) # quantize the model to 4-bits (using q4_0 method)
./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0 ./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0

139
build.zig
View file

@ -1,68 +1,87 @@
// Compatible with Zig Version 0.11.0
const std = @import("std"); const std = @import("std");
const commit_hash = @embedFile(".git/refs/heads/master"); const Compile = std.Build.Step.Compile;
const ConfigHeader = std.Build.Step.ConfigHeader;
const Mode = std.builtin.Mode;
const CrossTarget = std.zig.CrossTarget;
const Maker = struct {
builder: *std.build.Builder,
target: CrossTarget,
optimize: Mode,
config_header: *ConfigHeader,
const cflags = .{"-std=c11"};
const cxxflags = .{"-std=c++11"};
fn init(builder: *std.build.Builder) Maker {
const commit_hash = @embedFile(".git/refs/heads/master");
const config_header = builder.addConfigHeader(
.{ .style = .blank, .include_path = "build-info.h" },
.{
.BUILD_NUMBER = 0,
.BUILD_COMMIT = commit_hash[0 .. commit_hash.len - 1], // omit newline
},
);
return Maker{
.builder = builder,
.target = builder.standardTargetOptions(.{}),
.optimize = builder.standardOptimizeOption(.{}),
.config_header = config_header,
};
}
fn obj(m: *const Maker, name: []const u8, src: []const u8) *Compile {
const o = m.builder.addObject(.{ .name = name, .target = m.target, .optimize = m.optimize });
if (std.mem.endsWith(u8, src, ".c")) {
o.addCSourceFiles(&.{src}, &cflags);
o.linkLibC();
} else {
o.addCSourceFiles(&.{src}, &cxxflags);
o.linkLibCpp();
}
o.addIncludePath(.{ .path = "." });
o.addIncludePath(.{ .path = "./examples" });
return o;
}
fn exe(m: *const Maker, name: []const u8, src: []const u8, deps: []const *Compile) *Compile {
const e = m.builder.addExecutable(.{ .name = name, .target = m.target, .optimize = m.optimize });
e.addIncludePath(.{ .path = "." });
e.addIncludePath(.{ .path = "./examples" });
e.addCSourceFiles(&.{src}, &cxxflags);
for (deps) |d| e.addObject(d);
e.linkLibC();
e.linkLibCpp();
e.addConfigHeader(m.config_header);
m.builder.installArtifact(e);
// Currently a bug is preventing correct linking for optimized builds for Windows:
// https://github.com/ziglang/zig/issues/15958
if (e.target.isWindows()) {
e.want_lto = false;
}
return e;
}
};
// Zig Version: 0.11.0-dev.3986+e05c242cd
pub fn build(b: *std.build.Builder) void { pub fn build(b: *std.build.Builder) void {
const target = b.standardTargetOptions(.{}); const make = Maker.init(b);
const optimize = b.standardOptimizeOption(.{});
const config_header = b.addConfigHeader( const ggml = make.obj("ggml", "ggml.c");
.{ .style = .blank, .include_path = "build-info.h" }, const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
.{ const llama = make.obj("llama", "llama.cpp");
.BUILD_NUMBER = 0, const common = make.obj("common", "examples/common.cpp");
.BUILD_COMMIT = commit_hash[0 .. commit_hash.len - 1], // omit newline const grammar_parser = make.obj("grammar-parser", "examples/grammar-parser.cpp");
},
);
const lib = b.addStaticLibrary(.{ _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, llama, common, grammar_parser });
.name = "llama", _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, llama });
.target = target, _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, llama, common });
.optimize = optimize, _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, llama, common });
}); _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, llama });
lib.linkLibC();
lib.linkLibCpp();
lib.addIncludePath(".");
lib.addIncludePath("./examples");
lib.addConfigHeader(config_header);
lib.addCSourceFiles(&.{"ggml.c"}, &.{"-std=c11"});
lib.addCSourceFiles(&.{"llama.cpp"}, &.{"-std=c++11"});
b.installArtifact(lib);
const examples = .{ const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, llama, common, grammar_parser });
"main", if (server.target.isWindows()) {
"baby-llama", server.linkSystemLibrary("ws2_32");
"embedding",
"metal",
"perplexity",
"quantize",
"quantize-stats",
"save-load-state",
"server",
"simple",
"train-text-from-scratch",
};
inline for (examples) |example_name| {
const exe = b.addExecutable(.{
.name = example_name,
.target = target,
.optimize = optimize,
});
exe.addIncludePath(".");
exe.addIncludePath("./examples");
exe.addConfigHeader(config_header);
exe.addCSourceFiles(&.{
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{ example_name, example_name }),
"examples/common.cpp",
}, &.{"-std=c++11"});
exe.linkLibrary(lib);
b.installArtifact(exe);
const run_cmd = b.addRunArtifact(exe);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| run_cmd.addArgs(args);
const run_step = b.step("run-" ++ example_name, "Run the app");
run_step.dependOn(&run_cmd.step);
} }
} }

View file

@ -465,6 +465,13 @@ class GGMLQuantizedTensor(Tensor):
def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'GGMLQuantizedTensor': def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'GGMLQuantizedTensor':
return GGMLQuantizedTensor(permute(self.ndarray, n_head, n_kv_head), self.shape, self.data_type) return GGMLQuantizedTensor(permute(self.ndarray, n_head, n_kv_head), self.shape, self.data_type)
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
r = self.ndarray.shape[0] // 3
return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
def part(self, n_part: int) -> 'UnquantizedTensor':
r = self.ndarray.shape[0] // 3
return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
GGMLCompatibleTensor = Union[UnquantizedTensor, GGMLQuantizedTensor] GGMLCompatibleTensor = Union[UnquantizedTensor, GGMLQuantizedTensor]

View file

@ -13,6 +13,8 @@ set(TARGET common)
add_library(${TARGET} OBJECT add_library(${TARGET} OBJECT
common.h common.h
common.cpp common.cpp
console.h
console.cpp
grammar-parser.h grammar-parser.h
grammar-parser.cpp grammar-parser.cpp
) )

View file

@ -25,7 +25,6 @@
#else #else
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <unistd.h> #include <unistd.h>
#include <wchar.h>
#endif #endif
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -195,6 +194,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
break; break;
} }
params.rope_freq_scale = std::stof(argv[i]); params.rope_freq_scale = std::stof(argv[i]);
} else if (arg == "--rope-scale") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.rope_freq_scale = 1.0f/std::stof(argv[i]);
} else if (arg == "--memory-f32") { } else if (arg == "--memory-f32") {
params.memory_f16 = false; params.memory_f16 = false;
} else if (arg == "--top-p") { } else if (arg == "--top-p") {
@ -329,6 +334,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
params.instruct = true; params.instruct = true;
} else if (arg == "--multiline-input") { } else if (arg == "--multiline-input") {
params.multiline_input = true; params.multiline_input = true;
} else if (arg == "--simple-io") {
params.simple_io = true;
} else if (arg == "--color") { } else if (arg == "--color") {
params.use_color = true; params.use_color = true;
} else if (arg == "--mlock") { } else if (arg == "--mlock") {
@ -563,8 +570,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stdout, " --cfg-negative-prompt PROMPT \n"); fprintf(stdout, " --cfg-negative-prompt PROMPT \n");
fprintf(stdout, " negative prompt to use for guidance. (default: empty)\n"); fprintf(stdout, " negative prompt to use for guidance. (default: empty)\n");
fprintf(stdout, " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); fprintf(stdout, " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale);
fprintf(stdout, " --rope-freq-base N RoPE base frequency (default: %.1f)\n", params.rope_freq_base); fprintf(stdout, " --rope-scale N RoPE context linear scaling factor, inverse of --rope-freq-scale (default: %g)\n", 1.0f/params.rope_freq_scale);
fprintf(stdout, " --rope-freq-scale N RoPE frequency scaling factor (default: %g)\n", params.rope_freq_scale); fprintf(stdout, " --rope-freq-base N RoPE base frequency, used by NTK-aware scaling (default: %.1f)\n", params.rope_freq_base);
fprintf(stdout, " --rope-freq-scale N RoPE frequency linear scaling factor, inverse of --rope-scale (default: %g)\n", params.rope_freq_scale);
fprintf(stdout, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); fprintf(stdout, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n");
fprintf(stdout, " --no-penalize-nl do not penalize newline token\n"); fprintf(stdout, " --no-penalize-nl do not penalize newline token\n");
fprintf(stdout, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n"); fprintf(stdout, " --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
@ -572,7 +580,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stdout, " --temp N temperature (default: %.1f)\n", (double)params.temp); fprintf(stdout, " --temp N temperature (default: %.1f)\n", (double)params.temp);
fprintf(stdout, " --perplexity compute perplexity over each ctx window of the prompt\n"); fprintf(stdout, " --perplexity compute perplexity over each ctx window of the prompt\n");
fprintf(stdout, " --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n"); fprintf(stdout, " --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n");
fprintf(stdout, " --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %d)\n", params.hellaswag_tasks); fprintf(stdout, " --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
fprintf(stdout, " --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); fprintf(stdout, " --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
fprintf(stdout, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); fprintf(stdout, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
if (llama_mlock_supported()) { if (llama_mlock_supported()) {
@ -598,6 +606,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stdout, " --mtest compute maximum memory usage\n"); fprintf(stdout, " --mtest compute maximum memory usage\n");
fprintf(stdout, " --export export the computation graph to 'llama.ggml'\n"); fprintf(stdout, " --export export the computation graph to 'llama.ggml'\n");
fprintf(stdout, " --verbose-prompt print prompt before generation\n"); fprintf(stdout, " --verbose-prompt print prompt before generation\n");
fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles\n");
fprintf(stdout, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n"); fprintf(stdout, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
fprintf(stdout, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n"); fprintf(stdout, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
fprintf(stdout, " -m FNAME, --model FNAME\n"); fprintf(stdout, " -m FNAME, --model FNAME\n");
@ -690,376 +699,3 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
return std::make_tuple(model, lctx); return std::make_tuple(model, lctx);
} }
void console_init(console_state & con_st) {
#if defined(_WIN32)
// Windows-specific console initialization
DWORD dwMode = 0;
con_st.hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
if (con_st.hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(con_st.hConsole, &dwMode)) {
con_st.hConsole = GetStdHandle(STD_ERROR_HANDLE);
if (con_st.hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(con_st.hConsole, &dwMode))) {
con_st.hConsole = NULL;
}
}
if (con_st.hConsole) {
// Enable ANSI colors on Windows 10+
if (con_st.use_color && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
SetConsoleMode(con_st.hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
}
// Set console output codepage to UTF8
SetConsoleOutputCP(CP_UTF8);
}
HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE);
if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) {
// Set console input codepage to UTF16
_setmode(_fileno(stdin), _O_WTEXT);
// Turn off ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT)
dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
SetConsoleMode(hConIn, dwMode);
}
#else
// POSIX-specific console initialization
struct termios new_termios;
tcgetattr(STDIN_FILENO, &con_st.prev_state);
new_termios = con_st.prev_state;
new_termios.c_lflag &= ~(ICANON | ECHO);
new_termios.c_cc[VMIN] = 1;
new_termios.c_cc[VTIME] = 0;
tcsetattr(STDIN_FILENO, TCSANOW, &new_termios);
con_st.tty = fopen("/dev/tty", "w+");
if (con_st.tty != nullptr) {
con_st.out = con_st.tty;
}
setlocale(LC_ALL, "");
#endif
}
void console_cleanup(console_state & con_st) {
// Reset console color
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
#if !defined(_WIN32)
if (con_st.tty != nullptr) {
con_st.out = stdout;
fclose(con_st.tty);
con_st.tty = nullptr;
}
// Restore the terminal settings on POSIX systems
tcsetattr(STDIN_FILENO, TCSANOW, &con_st.prev_state);
#endif
}
/* Keep track of current color of output, and emit ANSI code if it changes. */
void console_set_color(console_state & con_st, console_color_t color) {
if (con_st.use_color && con_st.color != color) {
fflush(stdout);
switch(color) {
case CONSOLE_COLOR_DEFAULT:
fprintf(con_st.out, ANSI_COLOR_RESET);
break;
case CONSOLE_COLOR_PROMPT:
fprintf(con_st.out, ANSI_COLOR_YELLOW);
break;
case CONSOLE_COLOR_USER_INPUT:
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN);
break;
case CONSOLE_COLOR_ERROR:
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED);
break;
}
con_st.color = color;
fflush(con_st.out);
}
}
char32_t getchar32() {
#if defined(_WIN32)
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
wchar_t high_surrogate = 0;
while (true) {
INPUT_RECORD record;
DWORD count;
if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) {
return WEOF;
}
if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) {
wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar;
if (wc == 0) {
continue;
}
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
high_surrogate = wc;
continue;
} else if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate
if (high_surrogate != 0) { // Check if we have a high surrogate
return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000;
}
}
high_surrogate = 0; // Reset the high surrogate
return static_cast<char32_t>(wc);
}
}
#else
wchar_t wc = getwchar();
if (static_cast<wint_t>(wc) == WEOF) {
return WEOF;
}
#if WCHAR_MAX == 0xFFFF
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
wchar_t low_surrogate = getwchar();
if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate
return (static_cast<char32_t>(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000;
}
}
if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair
return 0xFFFD; // Return the replacement character U+FFFD
}
#endif
return static_cast<char32_t>(wc);
#endif
}
void pop_cursor(console_state & con_st) {
#if defined(_WIN32)
if (con_st.hConsole != NULL) {
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo);
COORD newCursorPosition = bufferInfo.dwCursorPosition;
if (newCursorPosition.X == 0) {
newCursorPosition.X = bufferInfo.dwSize.X - 1;
newCursorPosition.Y -= 1;
} else {
newCursorPosition.X -= 1;
}
SetConsoleCursorPosition(con_st.hConsole, newCursorPosition);
return;
}
#endif
putc('\b', con_st.out);
}
int estimateWidth(char32_t codepoint) {
#if defined(_WIN32)
return 1;
#else
return wcwidth(codepoint);
#endif
}
int put_codepoint(console_state & con_st, const char* utf8_codepoint, size_t length, int expectedWidth) {
#if defined(_WIN32)
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
if (!GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo)) {
// go with the default
return expectedWidth;
}
COORD initialPosition = bufferInfo.dwCursorPosition;
DWORD nNumberOfChars = length;
WriteConsole(con_st.hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL);
CONSOLE_SCREEN_BUFFER_INFO newBufferInfo;
GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo);
// Figure out our real position if we're in the last column
if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) {
DWORD nNumberOfChars;
WriteConsole(con_st.hConsole, &" \b", 2, &nNumberOfChars, NULL);
GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo);
}
int width = newBufferInfo.dwCursorPosition.X - initialPosition.X;
if (width < 0) {
width += newBufferInfo.dwSize.X;
}
return width;
#else
// we can trust expectedWidth if we've got one
if (expectedWidth >= 0 || con_st.tty == nullptr) {
fwrite(utf8_codepoint, length, 1, con_st.out);
return expectedWidth;
}
fputs("\033[6n", con_st.tty); // Query cursor position
int x1, x2, y1, y2;
int results = 0;
results = fscanf(con_st.tty, "\033[%d;%dR", &y1, &x1);
fwrite(utf8_codepoint, length, 1, con_st.tty);
fputs("\033[6n", con_st.tty); // Query cursor position
results += fscanf(con_st.tty, "\033[%d;%dR", &y2, &x2);
if (results != 4) {
return expectedWidth;
}
int width = x2 - x1;
if (width < 0) {
// Calculate the width considering text wrapping
struct winsize w;
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
width += w.ws_col;
}
return width;
#endif
}
void replace_last(console_state & con_st, char ch) {
#if defined(_WIN32)
pop_cursor(con_st);
put_codepoint(con_st, &ch, 1, 1);
#else
fprintf(con_st.out, "\b%c", ch);
#endif
}
void append_utf8(char32_t ch, std::string & out) {
if (ch <= 0x7F) {
out.push_back(static_cast<unsigned char>(ch));
} else if (ch <= 0x7FF) {
out.push_back(static_cast<unsigned char>(0xC0 | ((ch >> 6) & 0x1F)));
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
} else if (ch <= 0xFFFF) {
out.push_back(static_cast<unsigned char>(0xE0 | ((ch >> 12) & 0x0F)));
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
} else if (ch <= 0x10FFFF) {
out.push_back(static_cast<unsigned char>(0xF0 | ((ch >> 18) & 0x07)));
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 12) & 0x3F)));
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
} else {
// Invalid Unicode code point
}
}
// Helper function to remove the last UTF-8 character from a string
void pop_back_utf8_char(std::string & line) {
if (line.empty()) {
return;
}
size_t pos = line.length() - 1;
// Find the start of the last UTF-8 character (checking up to 4 bytes back)
for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) {
if ((line[pos] & 0xC0) != 0x80) break; // Found the start of the character
}
line.erase(pos);
}
bool console_readline(console_state & con_st, std::string & line) {
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
if (con_st.out != stdout) {
fflush(stdout);
}
line.clear();
std::vector<int> widths;
bool is_special_char = false;
bool end_of_stream = false;
char32_t input_char;
while (true) {
fflush(con_st.out); // Ensure all output is displayed before waiting for input
input_char = getchar32();
if (input_char == '\r' || input_char == '\n') {
break;
}
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
end_of_stream = true;
break;
}
if (is_special_char) {
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
replace_last(con_st, line.back());
is_special_char = false;
}
if (input_char == '\033') { // Escape sequence
char32_t code = getchar32();
if (code == '[' || code == 0x1B) {
// Discard the rest of the escape sequence
while ((code = getchar32()) != (char32_t) WEOF) {
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
break;
}
}
}
} else if (input_char == 0x08 || input_char == 0x7F) { // Backspace
if (!widths.empty()) {
int count;
do {
count = widths.back();
widths.pop_back();
// Move cursor back, print space, and move cursor back again
for (int i = 0; i < count; i++) {
replace_last(con_st, ' ');
pop_cursor(con_st);
}
pop_back_utf8_char(line);
} while (count == 0 && !widths.empty());
}
} else {
int offset = line.length();
append_utf8(input_char, line);
int width = put_codepoint(con_st, line.c_str() + offset, line.length() - offset, estimateWidth(input_char));
if (width < 0) {
width = 0;
}
widths.push_back(width);
}
if (!line.empty() && (line.back() == '\\' || line.back() == '/')) {
console_set_color(con_st, CONSOLE_COLOR_PROMPT);
replace_last(con_st, line.back());
is_special_char = true;
}
}
bool has_more = con_st.multiline_input;
if (is_special_char) {
replace_last(con_st, ' ');
pop_cursor(con_st);
char last = line.back();
line.pop_back();
if (last == '\\') {
line += '\n';
fputc('\n', con_st.out);
has_more = !has_more;
} else {
// llama will just eat the single space, it won't act as a space
if (line.length() == 1 && line.back() == ' ') {
line.clear();
pop_cursor(con_st);
}
has_more = false;
}
} else {
if (end_of_stream) {
has_more = false;
} else {
line += '\n';
fputc('\n', con_st.out);
}
}
fflush(con_st.out);
return has_more;
}

View file

@ -11,11 +11,6 @@
#include <unordered_map> #include <unordered_map>
#include <tuple> #include <tuple>
#if !defined (_WIN32)
#include <stdio.h>
#include <termios.h>
#endif
// //
// CLI argument parsing // CLI argument parsing
// //
@ -85,6 +80,7 @@ struct gpt_params {
bool embedding = false; // get only sentence embedding bool embedding = false; // get only sentence embedding
bool interactive_first = false; // wait for user input immediately bool interactive_first = false; // wait for user input immediately
bool multiline_input = false; // reverse the usage of `\` bool multiline_input = false; // reverse the usage of `\`
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
bool instruct = false; // instruction mode (used for Alpaca models) bool instruct = false; // instruction mode (used for Alpaca models)
@ -116,42 +112,3 @@ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::s
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params); std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params); struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
//
// Console utils
//
#define ANSI_COLOR_RED "\x1b[31m"
#define ANSI_COLOR_GREEN "\x1b[32m"
#define ANSI_COLOR_YELLOW "\x1b[33m"
#define ANSI_COLOR_BLUE "\x1b[34m"
#define ANSI_COLOR_MAGENTA "\x1b[35m"
#define ANSI_COLOR_CYAN "\x1b[36m"
#define ANSI_COLOR_RESET "\x1b[0m"
#define ANSI_BOLD "\x1b[1m"
enum console_color_t {
CONSOLE_COLOR_DEFAULT=0,
CONSOLE_COLOR_PROMPT,
CONSOLE_COLOR_USER_INPUT,
CONSOLE_COLOR_ERROR
};
struct console_state {
bool multiline_input = false;
bool use_color = false;
console_color_t color = CONSOLE_COLOR_DEFAULT;
FILE* out = stdout;
#if defined (_WIN32)
void* hConsole;
#else
FILE* tty = nullptr;
termios prev_state;
#endif
};
void console_init(console_state & con_st);
void console_cleanup(console_state & con_st);
void console_set_color(console_state & con_st, console_color_t color);
bool console_readline(console_state & con_st, std::string & line);

496
examples/console.cpp Normal file
View file

@ -0,0 +1,496 @@
#include "console.h"
#include <vector>
#include <iostream>
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#include <fcntl.h>
#include <io.h>
#else
#include <climits>
#include <sys/ioctl.h>
#include <unistd.h>
#include <wchar.h>
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <termios.h>
#endif
#define ANSI_COLOR_RED "\x1b[31m"
#define ANSI_COLOR_GREEN "\x1b[32m"
#define ANSI_COLOR_YELLOW "\x1b[33m"
#define ANSI_COLOR_BLUE "\x1b[34m"
#define ANSI_COLOR_MAGENTA "\x1b[35m"
#define ANSI_COLOR_CYAN "\x1b[36m"
#define ANSI_COLOR_RESET "\x1b[0m"
#define ANSI_BOLD "\x1b[1m"
namespace console {
//
// Console state
//
static bool advanced_display = false;
static bool simple_io = true;
static display_t current_display = reset;
static FILE* out = stdout;
#if defined (_WIN32)
static void* hConsole;
#else
static FILE* tty = nullptr;
static termios initial_state;
#endif
//
// Init and cleanup
//
void init(bool use_simple_io, bool use_advanced_display) {
advanced_display = use_advanced_display;
simple_io = use_simple_io;
#if defined(_WIN32)
// Windows-specific console initialization
DWORD dwMode = 0;
hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) {
hConsole = GetStdHandle(STD_ERROR_HANDLE);
if (hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(hConsole, &dwMode))) {
hConsole = nullptr;
simple_io = true;
}
}
if (hConsole) {
// Enable ANSI colors on Windows 10+
if (advanced_display && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
SetConsoleMode(hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
}
// Set console output codepage to UTF8
SetConsoleOutputCP(CP_UTF8);
}
HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE);
if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) {
// Set console input codepage to UTF16
_setmode(_fileno(stdin), _O_WTEXT);
// Set ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT)
if (simple_io) {
dwMode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT;
} else {
dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
}
if (!SetConsoleMode(hConIn, dwMode)) {
simple_io = true;
}
}
#else
// POSIX-specific console initialization
if (!simple_io) {
struct termios new_termios;
tcgetattr(STDIN_FILENO, &initial_state);
new_termios = initial_state;
new_termios.c_lflag &= ~(ICANON | ECHO);
new_termios.c_cc[VMIN] = 1;
new_termios.c_cc[VTIME] = 0;
tcsetattr(STDIN_FILENO, TCSANOW, &new_termios);
tty = fopen("/dev/tty", "w+");
if (tty != nullptr) {
out = tty;
}
}
setlocale(LC_ALL, "");
#endif
}
void cleanup() {
// Reset console display
set_display(reset);
#if !defined(_WIN32)
// Restore settings on POSIX systems
if (!simple_io) {
if (tty != nullptr) {
out = stdout;
fclose(tty);
tty = nullptr;
}
tcsetattr(STDIN_FILENO, TCSANOW, &initial_state);
}
#endif
}
//
// Display and IO
//
// Keep track of current display and only emit ANSI code if it changes
void set_display(display_t display) {
if (advanced_display && current_display != display) {
fflush(stdout);
switch(display) {
case reset:
fprintf(out, ANSI_COLOR_RESET);
break;
case prompt:
fprintf(out, ANSI_COLOR_YELLOW);
break;
case user_input:
fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN);
break;
case error:
fprintf(out, ANSI_BOLD ANSI_COLOR_RED);
}
current_display = display;
fflush(out);
}
}
char32_t getchar32() {
#if defined(_WIN32)
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
wchar_t high_surrogate = 0;
while (true) {
INPUT_RECORD record;
DWORD count;
if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) {
return WEOF;
}
if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) {
wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar;
if (wc == 0) {
continue;
}
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
high_surrogate = wc;
continue;
}
if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate
if (high_surrogate != 0) { // Check if we have a high surrogate
return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000;
}
}
high_surrogate = 0; // Reset the high surrogate
return static_cast<char32_t>(wc);
}
}
#else
wchar_t wc = getwchar();
if (static_cast<wint_t>(wc) == WEOF) {
return WEOF;
}
#if WCHAR_MAX == 0xFFFF
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
wchar_t low_surrogate = getwchar();
if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate
return (static_cast<char32_t>(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000;
}
}
if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair
return 0xFFFD; // Return the replacement character U+FFFD
}
#endif
return static_cast<char32_t>(wc);
#endif
}
void pop_cursor() {
#if defined(_WIN32)
if (hConsole != NULL) {
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
GetConsoleScreenBufferInfo(hConsole, &bufferInfo);
COORD newCursorPosition = bufferInfo.dwCursorPosition;
if (newCursorPosition.X == 0) {
newCursorPosition.X = bufferInfo.dwSize.X - 1;
newCursorPosition.Y -= 1;
} else {
newCursorPosition.X -= 1;
}
SetConsoleCursorPosition(hConsole, newCursorPosition);
return;
}
#endif
putc('\b', out);
}
int estimateWidth(char32_t codepoint) {
#if defined(_WIN32)
return 1;
#else
return wcwidth(codepoint);
#endif
}
int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
#if defined(_WIN32)
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
// go with the default
return expectedWidth;
}
COORD initialPosition = bufferInfo.dwCursorPosition;
DWORD nNumberOfChars = length;
WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL);
CONSOLE_SCREEN_BUFFER_INFO newBufferInfo;
GetConsoleScreenBufferInfo(hConsole, &newBufferInfo);
// Figure out our real position if we're in the last column
if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) {
DWORD nNumberOfChars;
WriteConsole(hConsole, &" \b", 2, &nNumberOfChars, NULL);
GetConsoleScreenBufferInfo(hConsole, &newBufferInfo);
}
int width = newBufferInfo.dwCursorPosition.X - initialPosition.X;
if (width < 0) {
width += newBufferInfo.dwSize.X;
}
return width;
#else
// We can trust expectedWidth if we've got one
if (expectedWidth >= 0 || tty == nullptr) {
fwrite(utf8_codepoint, length, 1, out);
return expectedWidth;
}
fputs("\033[6n", tty); // Query cursor position
int x1;
int y1;
int x2;
int y2;
int results = 0;
results = fscanf(tty, "\033[%d;%dR", &y1, &x1);
fwrite(utf8_codepoint, length, 1, tty);
fputs("\033[6n", tty); // Query cursor position
results += fscanf(tty, "\033[%d;%dR", &y2, &x2);
if (results != 4) {
return expectedWidth;
}
int width = x2 - x1;
if (width < 0) {
// Calculate the width considering text wrapping
struct winsize w;
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
width += w.ws_col;
}
return width;
#endif
}
void replace_last(char ch) {
#if defined(_WIN32)
pop_cursor();
put_codepoint(&ch, 1, 1);
#else
fprintf(out, "\b%c", ch);
#endif
}
void append_utf8(char32_t ch, std::string & out) {
if (ch <= 0x7F) {
out.push_back(static_cast<unsigned char>(ch));
} else if (ch <= 0x7FF) {
out.push_back(static_cast<unsigned char>(0xC0 | ((ch >> 6) & 0x1F)));
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
} else if (ch <= 0xFFFF) {
out.push_back(static_cast<unsigned char>(0xE0 | ((ch >> 12) & 0x0F)));
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
} else if (ch <= 0x10FFFF) {
out.push_back(static_cast<unsigned char>(0xF0 | ((ch >> 18) & 0x07)));
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 12) & 0x3F)));
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
} else {
// Invalid Unicode code point
}
}
// Helper function to remove the last UTF-8 character from a string
void pop_back_utf8_char(std::string & line) {
if (line.empty()) {
return;
}
size_t pos = line.length() - 1;
// Find the start of the last UTF-8 character (checking up to 4 bytes back)
for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) {
if ((line[pos] & 0xC0) != 0x80) {
break; // Found the start of the character
}
}
line.erase(pos);
}
bool readline_advanced(std::string & line, bool multiline_input) {
if (out != stdout) {
fflush(stdout);
}
line.clear();
std::vector<int> widths;
bool is_special_char = false;
bool end_of_stream = false;
char32_t input_char;
while (true) {
fflush(out); // Ensure all output is displayed before waiting for input
input_char = getchar32();
if (input_char == '\r' || input_char == '\n') {
break;
}
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
end_of_stream = true;
break;
}
if (is_special_char) {
set_display(user_input);
replace_last(line.back());
is_special_char = false;
}
if (input_char == '\033') { // Escape sequence
char32_t code = getchar32();
if (code == '[' || code == 0x1B) {
// Discard the rest of the escape sequence
while ((code = getchar32()) != (char32_t) WEOF) {
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
break;
}
}
}
} else if (input_char == 0x08 || input_char == 0x7F) { // Backspace
if (!widths.empty()) {
int count;
do {
count = widths.back();
widths.pop_back();
// Move cursor back, print space, and move cursor back again
for (int i = 0; i < count; i++) {
replace_last(' ');
pop_cursor();
}
pop_back_utf8_char(line);
} while (count == 0 && !widths.empty());
}
} else {
int offset = line.length();
append_utf8(input_char, line);
int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char));
if (width < 0) {
width = 0;
}
widths.push_back(width);
}
if (!line.empty() && (line.back() == '\\' || line.back() == '/')) {
set_display(prompt);
replace_last(line.back());
is_special_char = true;
}
}
bool has_more = multiline_input;
if (is_special_char) {
replace_last(' ');
pop_cursor();
char last = line.back();
line.pop_back();
if (last == '\\') {
line += '\n';
fputc('\n', out);
has_more = !has_more;
} else {
// llama will just eat the single space, it won't act as a space
if (line.length() == 1 && line.back() == ' ') {
line.clear();
pop_cursor();
}
has_more = false;
}
} else {
if (end_of_stream) {
has_more = false;
} else {
line += '\n';
fputc('\n', out);
}
}
fflush(out);
return has_more;
}
bool readline_simple(std::string & line, bool multiline_input) {
#if defined(_WIN32)
std::wstring wline;
if (!std::getline(std::wcin, wline)) {
// Input stream is bad or EOF received
line.clear();
GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
return false;
}
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), NULL, 0, NULL, NULL);
line.resize(size_needed);
WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), &line[0], size_needed, NULL, NULL);
#else
if (!std::getline(std::cin, line)) {
// Input stream is bad or EOF received
line.clear();
return false;
}
#endif
if (!line.empty()) {
char last = line.back();
if (last == '/') { // Always return control on '/' symbol
line.pop_back();
return false;
}
if (last == '\\') { // '\\' changes the default action
line.pop_back();
multiline_input = !multiline_input;
}
}
line += '\n';
// By default, continue input if multiline_input is set
return multiline_input;
}
bool readline(std::string & line, bool multiline_input) {
set_display(user_input);
if (simple_io) {
return readline_simple(line, multiline_input);
}
return readline_advanced(line, multiline_input);
}
}

19
examples/console.h Normal file
View file

@ -0,0 +1,19 @@
// Console functions
#pragma once
#include <string>
namespace console {
enum display_t {
reset = 0,
prompt,
user_input,
error
};
void init(bool use_simple_io, bool use_advanced_display);
void cleanup();
void set_display(display_t display);
bool readline(std::string & line, bool multiline_input);
}

View file

@ -30,7 +30,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) {
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT); fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
if (params.seed == LLAMA_DEFAULT_SEED) { if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL); params.seed = uint32_t(time(NULL));
} }
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);

View file

@ -405,7 +405,7 @@ namespace grammar_parser {
for (size_t i = 0, end = state.rules.size(); i < end; i++) { for (size_t i = 0, end = state.rules.size(); i < end; i++) {
// fprintf(file, "%zu: ", i); // fprintf(file, "%zu: ", i);
// print_rule_binary(file, state.rules[i]); // print_rule_binary(file, state.rules[i]);
print_rule(file, i, state.rules[i], symbol_id_names); print_rule(file, uint32_t(i), state.rules[i], symbol_id_names);
// fprintf(file, "\n"); // fprintf(file, "\n");
} }
} catch (const std::exception & err) { } catch (const std::exception & err) {

View file

@ -0,0 +1,132 @@
import argparse
import json
import re
import sys
# whitespace is constrained to a single space char to prevent model "running away" in
# whitespace. Also maybe improves generation quality?
SPACE_RULE = '" "?'
PRIMITIVE_RULES = {
'boolean': '("true" | "false") space',
'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space',
'string': r''' "\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space ''',
'null': '"null" space',
}
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'}
class SchemaConverter:
def __init__(self, prop_order):
self._prop_order = prop_order
self._rules = {'space': SPACE_RULE}
def _format_literal(self, literal):
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), json.dumps(literal)
)
return f'"{escaped}"'
def _add_rule(self, name, rule):
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
if esc_name not in self._rules or self._rules[esc_name] == rule:
key = esc_name
else:
i = 0
while f'{esc_name}{i}' in self._rules:
i += 1
key = f'{esc_name}{i}'
self._rules[key] = rule
return key
def visit(self, schema, name):
schema_type = schema.get('type')
rule_name = name or 'root'
if 'oneOf' in schema or 'anyOf' in schema:
rule = ' | '.join((
self.visit(alt_schema, f'{name}{"-" if name else ""}{i}')
for i, alt_schema in enumerate(schema.get('oneOf') or schema['anyOf'])
))
return self._add_rule(rule_name, rule)
elif 'const' in schema:
return self._add_rule(rule_name, self._format_literal(schema['const']))
elif 'enum' in schema:
rule = ' | '.join((self._format_literal(v) for v in schema['enum']))
return self._add_rule(rule_name, rule)
elif schema_type == 'object' and 'properties' in schema:
# TODO: `required` keyword
prop_order = self._prop_order
prop_pairs = sorted(
schema['properties'].items(),
# sort by position in prop_order (if specified) then by key
key=lambda kv: (prop_order.get(kv[0], len(prop_order)), kv[0]),
)
rule = '"{" space'
for i, (prop_name, prop_schema) in enumerate(prop_pairs):
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
if i > 0:
rule += ' "," space'
rule += fr' {self._format_literal(prop_name)} space ":" space {prop_rule_name}'
rule += ' "}" space'
return self._add_rule(rule_name, rule)
elif schema_type == 'array' and 'items' in schema:
# TODO `prefixItems` keyword
item_rule_name = self.visit(schema['items'], f'{name}{"-" if name else ""}item')
rule = f'"[" space ({item_rule_name} ("," space {item_rule_name})*)? "]" space'
return self._add_rule(rule_name, rule)
else:
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
return self._add_rule(
'root' if rule_name == 'root' else schema_type,
PRIMITIVE_RULES[schema_type]
)
def format_grammar(self):
return '\n'.join((f'{name} ::= {rule}' for name, rule in self._rules.items()))
def main(args_in = None):
parser = argparse.ArgumentParser(
description='''
Generates a grammar (suitable for use in ./main) that produces JSON conforming to a
given JSON schema. Only a subset of JSON schema features are supported; more may be
added in the future.
''',
)
parser.add_argument(
'--prop-order',
default=[],
type=lambda s: s.split(','),
help='''
comma-separated property names defining the order of precedence for object properties;
properties not specified here are given lower precedence than those that are, and are
sorted alphabetically
'''
)
parser.add_argument('schema', help='file containing JSON schema ("-" for stdin)')
args = parser.parse_args(args_in)
schema = json.load(sys.stdin if args.schema == '-' else open(args.schema))
prop_order = {name: idx for idx, name in enumerate(args.prop_order)}
converter = SchemaConverter(prop_order)
converter.visit(schema, '')
print(converter.format_grammar())
if __name__ == '__main__':
main()

132
examples/llama.vim Normal file
View file

@ -0,0 +1,132 @@
" Requires an already running llama.cpp server
" To install either copy or symlink to ~/.vim/autoload/llama.vim
" Then start with either :call llama#doLlamaGen(),
" or add a keybind to your vimrc such as
" nnoremap Z :call llama#doLlamaGen()<CR>
" Similarly, you could add an insert mode keybind with
" inoremap <C-B> <Cmd>call llama#doLlamaGen()<CR>
"
" g:llama_api_url and g:llama_overrides can be configured in your .vimrc
" let g:llama_api_url = "192.168.1.10:8080"
" llama_overrides can also be set through buffer/window scopes. For instance
" autocmd filetype python let b:llama_overrides = {"temp": 0.2}
" Could be added to your .vimrc to automatically set a lower temperature when
" editing a python script
" Additionally, an override dict can be stored at the top of a file
" !*{"stop": ["User:"]}
" Could be added to the start of your chatlog.txt to set the stopping token
" These parameter dicts are merged together from lowest to highest priority:
" server default -> g:llama_overrides -> w:llama_overrides ->
" b:llama_overrides -> in file (!*) overrides
"
" Sublists (like logit_bias and stop) are overridden, not merged
" Example override:
" !*{"logit_bias": [[13, -5], [2, false]], "temperature": 1, "top_k": 5, "top_p": 0.5, "n_predict": 256, "repeat_last_n": 256, "repeat_penalty": 1.17647}
if !exists("g:llama_api_url")
let g:llama_api_url= "127.0.0.1:8080"
endif
if !exists("g:llama_overrides")
let g:llama_overrides = {}
endif
const s:querydata = {"n_predict": 256, "stop": [ "\n" ], "stream": v:true }
const s:curlcommand = ['curl','--data-raw', "{\"prompt\":\"### System:\"}", '--silent', '--no-buffer', '--request', 'POST', '--url', g:llama_api_url .. '/completion', '--header', "Content-Type: application/json"]
let s:linedict = {}
func s:callbackHandler(bufn, channel, msg)
if len(a:msg) < 3
return
elseif a:msg[0] == "d"
let l:msg = a:msg[6:-1]
else
let l:msg = a:msg
endif
let l:decoded_msg = json_decode(l:msg)
let l:newtext = split(l:decoded_msg['content'], "\n", 1)
if len(l:newtext) > 0
call setbufline(a:bufn, s:linedict[a:bufn], getbufline(a:bufn, s:linedict[a:bufn])[0] .. newtext[0])
else
echo "nothing genned"
endif
if len(newtext) > 1
let l:failed = appendbufline(a:bufn, s:linedict[a:bufn], newtext[1:-1])
let s:linedict[a:bufn] = s:linedict[a:bufn] + len(newtext)-1
endif
if has_key(l:decoded_msg, "stop") && l:decoded_msg.stop
echo "Finished generation"
endif
endfunction
func llama#doLlamaGen()
if exists("b:job")
if job_status(b:job) == "run"
call job_stop(b:job)
return
endif
endif
let l:cbuffer = bufnr("%")
let s:linedict[l:cbuffer] = line('$')
let l:buflines = getbufline(l:cbuffer, 1, 1000)
let l:querydata = copy(s:querydata)
call extend(l:querydata, g:llama_overrides)
if exists("w:llama_overrides")
call extend(l:querydata, w:llama_overrides)
endif
if exists("b:llama_overrides")
call extend(l:querydata, b:llama_overrides)
endif
if l:buflines[0][0:1] == '!*'
let l:userdata = json_decode(l:buflines[0][2:-1])
call extend(l:querydata, l:userdata)
let l:buflines = l:buflines[1:-1]
endif
let l:querydata.prompt = join(l:buflines, "\n")
let l:curlcommand = copy(s:curlcommand)
let l:curlcommand[2] = json_encode(l:querydata)
let b:job = job_start(l:curlcommand, {"callback": function("s:callbackHandler", [l:cbuffer])})
endfunction
" Echos the tokkenization of the provided string , or cursor to end of word
" Onus is placed on the user to include the preceding space
func llama#tokenizeWord(...)
if (a:0 > 0)
let l:input = a:1
else
exe "normal \"*ye"
let l:input = @*
endif
let l:querydata = {"content": l:input}
let l:curlcommand = copy(s:curlcommand)
let l:curlcommand[2] = json_encode(l:querydata)
let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
let s:token_job = job_start(l:curlcommand, {"callback": function("s:tokenizeWordCallback", [l:input])})
endfunction
func s:tokenizeWordCallback(plaintext, channel, msg)
echo '"' .. a:plaintext ..'" - ' .. string(json_decode(a:msg).tokens)
endfunction
" Echos the token count of the entire buffer (or provided string)
" Example usage :echo llama#tokenCount()
func llama#tokenCount(...)
if (a:0 > 0)
let l:buflines = a:1
else
let l:buflines = getline(1,1000)
if l:buflines[0][0:1] == '!*'
let l:buflines = l:buflines[1:-1]
endif
let l:buflines = join(l:buflines, "\n")
endif
let l:querydata = {"content": l:buflines}
let l:curlcommand = copy(s:curlcommand)
let l:curlcommand[2] = json_encode(l:querydata)
let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
let s:token_job = job_start(l:curlcommand, {"callback": "s:tokenCountCallback"})
endfunction
func s:tokenCountCallback(channel, msg)
let resp = json_decode(a:msg)
echo len(resp.tokens)
endfunction

View file

@ -1,3 +1,5 @@
" Basic plugin example
function! Llm() function! Llm()
let url = "http://127.0.0.1:8080/completion" let url = "http://127.0.0.1:8080/completion"
@ -16,8 +18,10 @@ function! Llm()
" Extract the content field from the response " Extract the content field from the response
let content = json_decode(response).content let content = json_decode(response).content
let split_newlines = split(content, '\n', 1)
" Insert the content at the cursor position " Insert the content at the cursor position
call setline(line('.'), getline('.') . content) call setline(line('.'), [ getline('.') . split_newlines[0] ] + split_newlines[1:])
endfunction endfunction
command! Llm call Llm() command! Llm call Llm()

View file

@ -140,6 +140,12 @@ The `--ctx-size` option allows you to set the size of the prompt context used by
- `-c N, --ctx-size N`: Set the size of the prompt context (default: 512). The LLaMA models were built with a context of 2048, which will yield the best results on longer input/inference. However, increasing the context size beyond 2048 may lead to unpredictable results. - `-c N, --ctx-size N`: Set the size of the prompt context (default: 512). The LLaMA models were built with a context of 2048, which will yield the best results on longer input/inference. However, increasing the context size beyond 2048 may lead to unpredictable results.
### Extended Context Size
Some fine-tuned models have extened the context length by scaling RoPE. For example, if the original pretrained model have a context length (max sequence length) of 4096 (4k) and the fine-tuned model have 32k. That is a scaling factor of 8, and should work by setting the above `--ctx-size` to 32768 (32k) and `--rope-scale` to 8.
- `--rope-scale N`: Where N is the linear scaling factor used by the fine-tuned model.
### Keep Prompt ### Keep Prompt
The `--keep` option allows users to retain the original prompt when the model runs out of context, ensuring a connection to the initial instruction or conversation topic is maintained. The `--keep` option allows users to retain the original prompt when the model runs out of context, ensuring a connection to the initial instruction or conversation topic is maintained.

View file

@ -4,6 +4,7 @@
#endif #endif
#include "common.h" #include "common.h"
#include "console.h"
#include "llama.h" #include "llama.h"
#include "build-info.h" #include "build-info.h"
#include "grammar-parser.h" #include "grammar-parser.h"
@ -35,9 +36,7 @@
#pragma warning(disable: 4244 4267) // possible loss of data #pragma warning(disable: 4244 4267) // possible loss of data
#endif #endif
static console_state con_st;
static llama_context ** g_ctx; static llama_context ** g_ctx;
static bool is_interacting = false; static bool is_interacting = false;
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
@ -46,7 +45,7 @@ void sigint_handler(int signo) {
if (!is_interacting) { if (!is_interacting) {
is_interacting=true; is_interacting=true;
} else { } else {
console_cleanup(con_st); console::cleanup();
printf("\n"); printf("\n");
llama_print_timings(*g_ctx); llama_print_timings(*g_ctx);
_exit(130); _exit(130);
@ -64,10 +63,8 @@ int main(int argc, char ** argv) {
// save choice to use color for later // save choice to use color for later
// (note for later: this is a slightly awkward choice) // (note for later: this is a slightly awkward choice)
con_st.use_color = params.use_color; console::init(params.simple_io, params.use_color);
con_st.multiline_input = params.multiline_input; atexit([]() { console::cleanup(); });
console_init(con_st);
atexit([]() { console_cleanup(con_st); });
if (params.perplexity) { if (params.perplexity) {
printf("\n************\n"); printf("\n************\n");
@ -373,7 +370,7 @@ int main(int argc, char ** argv) {
if (params.interactive) { if (params.interactive) {
const char *control_message; const char *control_message;
if (con_st.multiline_input) { if (params.multiline_input) {
control_message = " - To return control to LLaMa, end your input with '\\'.\n" control_message = " - To return control to LLaMa, end your input with '\\'.\n"
" - To return control without starting a new line, end your input with '/'.\n"; " - To return control without starting a new line, end your input with '/'.\n";
} else { } else {
@ -401,7 +398,7 @@ int main(int argc, char ** argv) {
int n_past_guidance = 0; int n_past_guidance = 0;
// the first thing we will do is to output the prompt, so set color accordingly // the first thing we will do is to output the prompt, so set color accordingly
console_set_color(con_st, CONSOLE_COLOR_PROMPT); console::set_display(console::prompt);
std::vector<llama_token> embd; std::vector<llama_token> embd;
std::vector<llama_token> embd_guidance; std::vector<llama_token> embd_guidance;
@ -422,9 +419,9 @@ int main(int argc, char ** argv) {
// Ensure the input doesn't exceed the context size by truncating embd if necessary. // Ensure the input doesn't exceed the context size by truncating embd if necessary.
if ((int)embd.size() > max_embd_size) { if ((int)embd.size() > max_embd_size) {
auto skipped_tokens = embd.size() - max_embd_size; auto skipped_tokens = embd.size() - max_embd_size;
console_set_color(con_st, CONSOLE_COLOR_ERROR); console::set_display(console::error);
printf("<<input too long: skipped %zu token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : ""); printf("<<input too long: skipped %zu token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
console_set_color(con_st, CONSOLE_COLOR_DEFAULT); console::set_display(console::reset);
fflush(stdout); fflush(stdout);
embd.resize(max_embd_size); embd.resize(max_embd_size);
} }
@ -667,7 +664,7 @@ int main(int argc, char ** argv) {
} }
// reset color to default if we there is no pending user input // reset color to default if we there is no pending user input
if (input_echo && (int)embd_inp.size() == n_consumed) { if (input_echo && (int)embd_inp.size() == n_consumed) {
console_set_color(con_st, CONSOLE_COLOR_DEFAULT); console::set_display(console::reset);
} }
// if not currently processing queued inputs; // if not currently processing queued inputs;
@ -693,7 +690,7 @@ int main(int argc, char ** argv) {
if (last_output.find(antiprompt.c_str(), search_start_pos) != std::string::npos) { if (last_output.find(antiprompt.c_str(), search_start_pos) != std::string::npos) {
if (params.interactive) { if (params.interactive) {
is_interacting = true; is_interacting = true;
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT); console::set_display(console::user_input);
} }
is_antiprompt = true; is_antiprompt = true;
fflush(stdout); fflush(stdout);
@ -714,7 +711,7 @@ int main(int argc, char ** argv) {
is_interacting = true; is_interacting = true;
printf("\n"); printf("\n");
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT); console::set_display(console::user_input);
fflush(stdout); fflush(stdout);
} else if (params.instruct) { } else if (params.instruct) {
is_interacting = true; is_interacting = true;
@ -739,12 +736,12 @@ int main(int argc, char ** argv) {
std::string line; std::string line;
bool another_line = true; bool another_line = true;
do { do {
another_line = console_readline(con_st, line); another_line = console::readline(line, params.multiline_input);
buffer += line; buffer += line;
} while (another_line); } while (another_line);
// done taking input, reset color // done taking input, reset color
console_set_color(con_st, CONSOLE_COLOR_DEFAULT); console::set_display(console::reset);
// Add tokens to embd only if the input buffer is non-empty // Add tokens to embd only if the input buffer is non-empty
// Entering a empty line lets the user pass control back // Entering a empty line lets the user pass control back

View file

@ -153,7 +153,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
} }
size_t hs_task_count = prompt_lines.size()/6; size_t hs_task_count = prompt_lines.size()/6;
fprintf(stderr, "%s : loaded %lu tasks from prompt.\n", __func__, hs_task_count); fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, hs_task_count);
// This is needed as usual for LLaMA models // This is needed as usual for LLaMA models
bool prepend_bos = true; bool prepend_bos = true;
@ -178,7 +178,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
double ending_logprob[4]; double ending_logprob[4];
}; };
fprintf(stderr, "%s : selecting %lu %s tasks.\n", __func__, hs_task_count, (randomize_tasks?"randomized":"the first") ); fprintf(stderr, "%s : selecting %zu %s tasks.\n", __func__, hs_task_count, (randomize_tasks?"randomized":"the first") );
// Select and read data from prompt lines // Select and read data from prompt lines
hs_data_t *hs_data = new hs_data_t[hs_task_count]; hs_data_t *hs_data = new hs_data_t[hs_task_count];
@ -223,7 +223,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
// Stop if query wont fit the ctx window // Stop if query wont fit the ctx window
if (query_size > (size_t)params.n_ctx) { if (query_size > (size_t)params.n_ctx) {
fprintf(stderr, "%s : number of tokens in query %lu > n_ctxl\n", __func__, query_size); fprintf(stderr, "%s : number of tokens in query %zu > n_ctxl\n", __func__, query_size);
return; return;
} }
@ -284,7 +284,7 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
} }
// Print the accumulated accuracy mean x 100 // Print the accumulated accuracy mean x 100
printf("%li\t%.8lf\n",task_idx+1, acc/double(task_idx+1)*100.0); printf("%zu\t%.8lf\n",task_idx+1, acc/double(task_idx+1)*100.0);
fflush(stdout); fflush(stdout);
} }

View file

@ -151,6 +151,8 @@ node .
`mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1). `mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1).
`grammar`: Set grammar for grammar-based sampling (default: no grammar)
`seed`: Set the random number generator (RNG) seed (default: -1, -1 = random seed). `seed`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).
`ignore_eos`: Ignore end of stream token and continue generating (default: false). `ignore_eos`: Ignore end of stream token and continue generating (default: false).
@ -163,7 +165,7 @@ node .
`content`: Set the text to tokenize. `content`: Set the text to tokenize.
Note that the special `BOS` token is not added in fron of the text and also a space character is not inserted automatically as it is for `/completion`. Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
- **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does. - **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does.

View file

@ -87,289 +87,342 @@ unsigned char completion_js[] = {
0x20, 0x54, 0x65, 0x78, 0x74, 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x20, 0x54, 0x65, 0x78, 0x74, 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72,
0x28, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x28, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63,
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b,
0x0a, 0x0a, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f,
0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x76, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f,
0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20,
0x77, 0x68, 0x69, 0x6c, 0x65, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x29, 0x70, 0x61, 0x72, 0x74, 0x69, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x72, 0x65,
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x61, 0x64, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x0a, 0x0a, 0x20, 0x20,
0x73, 0x74, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65,
0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75,
0x2e, 0x72, 0x65, 0x61, 0x64, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x65, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c,
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x65, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x20,
0x74, 0x2e, 0x64, 0x6f, 0x6e, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x74, 0x20, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x72, 0x65, 0x61,
0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x73, 0x65, 0x20, 0x61, 0x64, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
0x6e, 0x73, 0x77, 0x65, 0x72, 0x73, 0x20, 0x69, 0x6e, 0x20, 0x74, 0x68, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x6f,
0x65, 0x20, 0x66, 0x6f, 0x72, 0x6d, 0x20, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x6e, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x70, 0x6c, 0x65, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x20, 0x6f, 0x66, 0x20, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20,
0x3a, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x5c, 0x6e, 0x20, 0x77, 0x69, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x74, 0x68, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, 0x61, 0x6c, 0x77, 0x61, 0x2f, 0x2f, 0x20, 0x41, 0x64, 0x64, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x6c,
0x79, 0x73, 0x20, 0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x74, 0x20, 0x61, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x64, 0x61, 0x74, 0x61,
0x73, 0x20, 0x61, 0x20, 0x6b, 0x65, 0x79, 0x2e, 0x20, 0x69, 0x6e, 0x20, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x75, 0x72, 0x72,
0x6f, 0x75, 0x72, 0x20, 0x63, 0x61, 0x73, 0x65, 0x20, 0x77, 0x65, 0x0a, 0x65, 0x6e, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x6d, 0x61, 0x69, 0x20, 0x64, 0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x6e, 0x6c, 0x79, 0x20, 0x63, 0x61, 0x72, 0x65, 0x20, 0x61, 0x62, 0x6f, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x3d,
0x75, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x64, 0x61, 0x74, 0x61, 0x3a, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x2b, 0x20,
0x20, 0x6b, 0x65, 0x79, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2c, 0x20, 0x77, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f,
0x68, 0x69, 0x63, 0x68, 0x20, 0x77, 0x65, 0x20, 0x65, 0x78, 0x70, 0x65, 0x64, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61,
0x63, 0x74, 0x20, 0x61, 0x73, 0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x0a, 0x20, 0x6c, 0x75, 0x65, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74, 0x20, 0x2f, 0x2f, 0x20, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x20, 0x69, 0x66,
0x65, 0x78, 0x74, 0x20, 0x3d, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x63, 0x68,
0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x28, 0x72, 0x65, 0x73, 0x61, 0x72, 0x61, 0x63, 0x74, 0x65, 0x72, 0x20, 0x69, 0x73, 0x20, 0x61,
0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x0a,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x70, 0x61, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
0x72, 0x73, 0x65, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x73, 0x73, 0x65, 0x20, 0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65,
0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78, 0x74,
0x64, 0x64, 0x20, 0x74, 0x68, 0x65, 0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72, 0x2e, 0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x28, 0x27, 0x5c,
0x65, 0x73, 0x75, 0x6c, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6e, 0x27, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x2f, 0x2f, 0x20, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x20, 0x74, 0x68, 0x65,
0x3d, 0x20, 0x2f, 0x5e, 0x28, 0x5c, 0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73, 0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x6c,
0x28, 0x2e, 0x2a, 0x29, 0x24, 0x2f, 0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20, 0x69, 0x6e, 0x65, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x65, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x74,
0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x6f, 0x66, 0x20, 0x65, 0x78, 0x74, 0x2e, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x27, 0x5c,
0x74, 0x65, 0x78, 0x74, 0x2e, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x41, 0x6c, 0x6e, 0x27, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x6c, 0x28, 0x72, 0x65, 0x67, 0x65, 0x78, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x49, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x65,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x78, 0x74, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x6e, 0x27, 0x74, 0x20, 0x65,
0x6c, 0x74, 0x5b, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x31, 0x5d, 0x5d, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x61, 0x20, 0x6c, 0x69,
0x20, 0x3d, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x32, 0x5d, 0x0a, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x2c, 0x20, 0x74, 0x68,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x65, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20,
0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x69, 0x73, 0x20, 0x69, 0x6e, 0x63, 0x6f,
0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20, 0x74, 0x68, 0x69, 0x73, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x20, 0x2f, 0x2f, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x20, 0x69, 0x74,
0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73, 0x20, 0x6a, 0x75, 0x73, 0x20, 0x69, 0x6e, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72,
0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x6f, 0x20, 0x62, 0x65, 0x20, 0x61, 0x64, 0x64, 0x65, 0x64,
0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x64, 0x61, 0x74, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6e, 0x65, 0x78, 0x74,
0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64, 0x61,
0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x28, 0x21, 0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69,
0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b, 0x0a, 0x20, 0x6e, 0x65, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x29, 0x20, 0x7b, 0x0a, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f,
0x20, 0x2b, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x76, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x2e,
0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x70, 0x6f, 0x70, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x79, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x69, 0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x79, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76,
0x69, 0x65, 0x6c, 0x64, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x3b, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20,
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x69, 0x52, 0x65, 0x73, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76,
0x66, 0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x20, 0x73, 0x65, 0x72, 0x20, 0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x68, 0x61, 0x76,
0x74, 0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x20, 0x66, 0x72, 0x65, 0x20, 0x61, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65,
0x6f, 0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2c, 0x20, 0x77, 0x61, 0x6b, 0x20, 0x61, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x65, 0x6e,
0x65, 0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20,
0x20, 0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x50, 0x61, 0x72, 0x73,
0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x65, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x73, 0x73, 0x65, 0x20, 0x65, 0x76,
0x61, 0x74, 0x61, 0x2e, 0x73, 0x74, 0x6f, 0x70, 0x29, 0x20, 0x7b, 0x0a, 0x65, 0x6e, 0x74, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x64, 0x64,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x20, 0x74, 0x68, 0x65, 0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72, 0x65, 0x73,
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x75, 0x6c, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x3d, 0x20,
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x2f, 0x5e, 0x28, 0x5c, 0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73, 0x28, 0x2e,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x2a, 0x29, 0x24, 0x2f, 0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74,
0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x69, 0x6e,
0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x65, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63,
0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x68, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x2e, 0x65, 0x78,
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72, 0x65, 0x63, 0x28, 0x6c, 0x69, 0x6e, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20,
0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6d, 0x61,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x74, 0x63, 0x68, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x5b,
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x31, 0x5d, 0x5d, 0x20, 0x3d, 0x20,
0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x32, 0x5d, 0x0a, 0x20, 0x20, 0x20,
0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69,
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x6e, 0x63, 0x65, 0x20, 0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20,
0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x65, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73,
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6a, 0x75, 0x73, 0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65,
0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e,
0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x20, 0x64, 0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75,
0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20,
0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72,
0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d,
0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28,
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29,
0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x79, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d,
0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x63, 0x72, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61,
0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x0a, 0x20,
0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x2f, 0x20, 0x79, 0x69, 0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x79, 0x69, 0x65, 0x6c,
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20, 0x66, 0x64, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x3b, 0x0a, 0x0a, 0x20,
0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x20, 0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20,
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x61, 0x20, 0x73, 0x74, 0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e,
0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x2c, 0x20, 0x77, 0x65, 0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72,
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x65, 0x61, 0x6b, 0x20, 0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20,
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45, 0x76, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x28, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61,
0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28, 0x2e, 0x73, 0x74, 0x6f, 0x70, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61,
0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x2e, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20,
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64,
0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b,
0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20,
0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x63,
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45,
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x29,
0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28,
0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c,
0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d,
0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72,
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73,
0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c,
0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c,
0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e,
0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e,
0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74,
0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61,
0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77,
0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74,
0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20,
0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68,
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29, 0x29,
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e,
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72,
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e,
0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76,
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74,
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65, 0x6e,
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74,
0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64,
0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69,
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b,
0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20,
0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62,
0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20,
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a,
0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61,
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72,
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20,
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d,
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f, 0x6e, 0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20,
0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20,
0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x28, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79,
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x3b, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28,
0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65,
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6d, 0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20,
0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20,
0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20, 0x6e, 0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x63,
0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x73, 0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f,
0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f,
0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f,
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c, 0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76,
0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20,
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c,
0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f,
0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20,
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a, 0x2f, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45,
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72,
0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20,
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63,
0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28,
0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c,
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f,
0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78,
0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c,
0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20,
0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d,
0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x50, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20,
0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20, 0x72, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28,
0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20,
0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f,
0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e,
0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20,
0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f,
0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b,
0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a,
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e,
0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70,
0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65,
0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e,
0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c,
0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x2f, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63,
0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72, 0x65, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29,
0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75,
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65,
0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x70, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45,
0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73,
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65,
0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74,
0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65,
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e,
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29, 0x20, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74,
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73,
0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x74, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20, 0x69, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74,
0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68, 0x69, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74,
0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x20, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20,
0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28,
0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73, 0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b,
0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75,
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69,
0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45,
0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73,
0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f,
0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69,
0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29,
0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65,
0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76,
0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f,
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74,
0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20,
0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20,
0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f,
0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a,
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28,
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e,
0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d,
0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72,
0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f,
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a,
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f,
0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69,
0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a,
0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f,
0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f,
0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d,
0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d,
0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20,
0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20,
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e,
0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20,
0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e,
0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20,
0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74,
0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e,
0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70,
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d,
0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20,
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75,
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74,
0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c,
0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68,
0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28,
0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a,
0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72,
0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a,
0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28,
0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74,
0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c,
0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28,
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20,
0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72,
0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20,
0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f,
0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29,
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62,
0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a,
0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65,
0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20,
0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68,
0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68,
0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c,
0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74,
0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20,
0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72,
0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d,
0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e,
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65,
0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74,
0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x61,
0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x22,
0x2f, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x22,
0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20,
0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20,
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x22, 0x2f,
0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x22, 0x29,
0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72,
0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20,
0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67,
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
}; };
unsigned int completion_js_len = 4462; unsigned int completion_js_len = 5099;

File diff suppressed because it is too large Load diff

View file

@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
const decoder = new TextDecoder(); const decoder = new TextDecoder();
let content = ""; let content = "";
let leftover = ""; // Buffer for partially read lines
try { try {
let cont = true; let cont = true;
@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
break; break;
} }
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we // Add any leftover data to the current chunk of data
// mainly care about the data: key here, which we expect as json const text = leftover + decoder.decode(result.value);
const text = decoder.decode(result.value);
// parse all sse events and add them to result // Check if the last character is a line break
const regex = /^(\S+):\s(.*)$/gm; const endsWithLineBreak = text.endsWith('\n');
for (const match of text.matchAll(regex)) {
result[match[1]] = match[2] // Split the text into lines
let lines = text.split('\n');
// If the text doesn't end with a line break, then the last line is incomplete
// Store it in leftover to be added to the next chunk of data
if (!endsWithLineBreak) {
leftover = lines.pop();
} else {
leftover = ""; // Reset leftover if we have a line break at the end
} }
// since we know this is llama.cpp, let's just decode the json in data // Parse all sse events and add them to result
result.data = JSON.parse(result.data); const regex = /^(\S+):\s(.*)$/gm;
content += result.data.content; for (const line of lines) {
const match = regex.exec(line);
if (match) {
result[match[1]] = match[2]
// since we know this is llama.cpp, let's just decode the json in data
if (result.data) {
result.data = JSON.parse(result.data);
content += result.data.content;
// yield // yield
yield result; yield result;
// if we got a stop token from server, we will break here // if we got a stop token from server, we will break here
if (result.data.stop) { if (result.data.stop) {
if (result.data.generation_settings) { if (result.data.generation_settings) {
generation_settings = result.data.generation_settings; generation_settings = result.data.generation_settings;
}
cont = false;
break;
}
}
} }
break;
} }
} }
} catch (e) { } catch (e) {

View file

@ -3,12 +3,11 @@
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" /> <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
<meta name="color-scheme" content="light dark">
<title>llama.cpp - chat</title> <title>llama.cpp - chat</title>
<style> <style>
body { body {
background-color: #fff;
color: #000;
font-family: system-ui; font-family: system-ui;
font-size: 90%; font-size: 90%;
} }
@ -283,8 +282,9 @@
useEffect(() => { useEffect(() => {
// scroll to bottom (if needed) // scroll to bottom (if needed)
if (container.current && container.current.scrollHeight <= container.current.scrollTop + container.current.offsetHeight + 300) { const parent = container.current.parentElement;
container.current.scrollTo(0, container.current.scrollHeight) if (parent && parent.scrollHeight <= parent.scrollTop + parent.offsetHeight + 300) {
parent.scrollTo(0, parent.scrollHeight)
} }
}, [messages]) }, [messages])

View file

@ -1,6 +1,7 @@
#include "common.h" #include "common.h"
#include "llama.h" #include "llama.h"
#include "build-info.h" #include "build-info.h"
#include "grammar-parser.h"
#ifndef NDEBUG #ifndef NDEBUG
// crash the server in debug mode, otherwise send an http 500 error // crash the server in debug mode, otherwise send an http 500 error
@ -195,6 +196,8 @@ struct llama_server_context
llama_context *ctx = nullptr; llama_context *ctx = nullptr;
gpt_params params; gpt_params params;
llama_grammar *grammar = nullptr;
bool truncated = false; bool truncated = false;
bool stopped_eos = false; bool stopped_eos = false;
bool stopped_word = false; bool stopped_word = false;
@ -226,6 +229,7 @@ struct llama_server_context
void rewind() void rewind()
{ {
params.antiprompt.clear(); params.antiprompt.clear();
params.grammar.clear();
num_prompt_tokens = 0; num_prompt_tokens = 0;
num_tokens_predicted = 0; num_tokens_predicted = 0;
generated_text = ""; generated_text = "";
@ -237,6 +241,7 @@ struct llama_server_context
stopped_limit = false; stopped_limit = false;
stopping_word = ""; stopping_word = "";
multibyte_pending = 0; multibyte_pending = 0;
grammar = nullptr;
n_remain = 0; n_remain = 0;
n_past = 0; n_past = 0;
@ -257,6 +262,33 @@ struct llama_server_context
return true; return true;
} }
bool loadGrammar()
{
if (!params.grammar.empty()) {
grammar_parser::parse_state parsed_grammar;
parsed_grammar = grammar_parser::parse(params.grammar.c_str());
// will be empty (default) if there are parse errors
if (parsed_grammar.rules.empty()) {
LOG_ERROR("grammar parse error", {{"grammar", params.grammar}});
return false;
}
grammar_parser::print_grammar(stderr, parsed_grammar);
{
auto it = params.logit_bias.find(llama_token_eos());
if (it != params.logit_bias.end() && it->second == -INFINITY) {
LOG_WARNING("EOS token is disabled, which will cause most grammars to fail", {});
}
}
std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
grammar = llama_grammar_init(
grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
}
return true;
}
void loadPrompt() void loadPrompt()
{ {
params.prompt.insert(0, 1, ' '); // always add a first space params.prompt.insert(0, 1, ' '); // always add a first space
@ -420,6 +452,10 @@ struct llama_server_context
logits[llama_token_nl()] = nl_logit; logits[llama_token_nl()] = nl_logit;
} }
if (grammar != nullptr) {
llama_sample_grammar(ctx, &candidates_p, grammar);
}
if (temp <= 0) if (temp <= 0)
{ {
// Greedy sampling // Greedy sampling
@ -457,10 +493,15 @@ struct llama_server_context
} }
} }
if (grammar != nullptr) {
llama_grammar_accept_token(ctx, grammar, result.tok);
}
for (size_t i = 0; i < std::min(candidates_p.size, (size_t)n_probs); ++i) for (size_t i = 0; i < std::min(candidates_p.size, (size_t)n_probs); ++i)
{ {
result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p}); result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p});
} }
last_n_tokens.erase(last_n_tokens.begin()); last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(result.tok); last_n_tokens.push_back(result.tok);
num_tokens_predicted++; num_tokens_predicted++;
@ -947,6 +988,7 @@ static json format_generation_settings(llama_server_context &llama)
{"stream", llama.stream}, {"stream", llama.stream},
{"logit_bias", llama.params.logit_bias}, {"logit_bias", llama.params.logit_bias},
{"n_probs", llama.params.n_probs}, {"n_probs", llama.params.n_probs},
{"grammar", llama.params.grammar},
}; };
} }
@ -1048,6 +1090,7 @@ static void parse_options_completion(const json &body, llama_server_context &lla
llama.params.n_keep = body.value("n_keep", default_params.n_keep); llama.params.n_keep = body.value("n_keep", default_params.n_keep);
llama.params.seed = body.value("seed", default_params.seed); llama.params.seed = body.value("seed", default_params.seed);
llama.params.prompt = body.value("prompt", default_params.prompt); llama.params.prompt = body.value("prompt", default_params.prompt);
llama.params.grammar = body.value("grammar", default_params.grammar);
llama.params.n_probs = body.value("n_probs", default_params.n_probs); llama.params.n_probs = body.value("n_probs", default_params.n_probs);
llama.params.logit_bias.clear(); llama.params.logit_bias.clear();
@ -1179,6 +1222,12 @@ int main(int argc, char **argv)
parse_options_completion(json::parse(req.body), llama); parse_options_completion(json::parse(req.body), llama);
if (!llama.loadGrammar())
{
res.status = 400;
return;
}
llama.loadPrompt(); llama.loadPrompt();
llama.beginCompletion(); llama.beginCompletion();
@ -1274,7 +1323,11 @@ int main(int argc, char **argv)
sink.done(); sink.done();
return true; return true;
}; };
res.set_chunked_content_provider("text/event-stream", chunked_content_provider); const auto on_complete = [&](bool) {
llama.mutex.unlock();
};
lock.release();
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
} }); } });
svr.Get("/model.json", [&llama](const Request &, Response &res) svr.Get("/model.json", [&llama](const Request &, Response &res)
@ -1330,8 +1383,12 @@ int main(int argc, char **argv)
svr.set_error_handler([](const Request &, Response &res) svr.set_error_handler([](const Request &, Response &res)
{ {
res.set_content("File Not Found", "text/plain"); if (res.status == 400) {
res.status = 404; }); res.set_content("Invalid request", "text/plain");
} else {
res.set_content("File Not Found", "text/plain");
res.status = 404;
} });
// set timeouts and change hostname and port // set timeouts and change hostname and port
svr.set_read_timeout(sparams.read_timeout); svr.set_read_timeout(sparams.read_timeout);
@ -1359,6 +1416,9 @@ int main(int argc, char **argv)
return 1; return 1;
} }
if (llama.grammar != nullptr) {
llama_grammar_free(llama.grammar);
}
llama_backend_free(); llama_backend_free();
return 0; return 0;

View file

@ -123,7 +123,7 @@ int main(int argc, char ** argv)
// Evaluate the tokens : // Evaluate the tokens :
//--------------------------------- //---------------------------------
if ( llama_eval( ctx , tokens_list.data() , tokens_list.size() , llama_get_kv_cache_token_count( ctx ) , params.n_threads ) ) if ( llama_eval( ctx , tokens_list.data() , int(tokens_list.size()) , llama_get_kv_cache_token_count( ctx ) , params.n_threads ) )
{ {
fprintf( stderr, "%s : failed to eval\n" , __func__ ); fprintf( stderr, "%s : failed to eval\n" , __func__ );
return 1; return 1;

File diff suppressed because it is too large Load diff

View file

@ -7,6 +7,11 @@
#import <Metal/Metal.h> #import <Metal/Metal.h>
#import <MetalPerformanceShaders/MetalPerformanceShaders.h> #import <MetalPerformanceShaders/MetalPerformanceShaders.h>
#undef MIN
#undef MAX
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#ifdef GGML_METAL_NDEBUG #ifdef GGML_METAL_NDEBUG
#define metal_printf(...) #define metal_printf(...)
#else #else
@ -15,6 +20,8 @@
#define UNUSED(x) (void)(x) #define UNUSED(x) (void)(x)
#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
struct ggml_metal_buffer { struct ggml_metal_buffer {
const char * name; const char * name;
@ -36,7 +43,7 @@ struct ggml_metal_context {
int n_buffers; int n_buffers;
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS]; struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
int concur_list[GGML_MAX_NODES]; int concur_list[GGML_MAX_CONCUR];
int concur_list_len; int concur_list_len;
// custom kernels // custom kernels
@ -370,15 +377,15 @@ void ggml_metal_graph_find_concurrency(
struct ggml_metal_context * ctx, struct ggml_metal_context * ctx,
struct ggml_cgraph * gf) { struct ggml_cgraph * gf) {
int search_depth = gf->n_nodes; //we only find concurrency in this range to avoid wasting too much time int search_depth = gf->n_nodes; //we only find concurrency in this range to avoid wasting too much time
int nodes_unused[GGML_MAX_NODES]; int nodes_unused[GGML_MAX_CONCUR];
for (int i = 0; i < GGML_MAX_NODES; i++) {ctx->concur_list[i] = 0;} for (int i = 0; i < GGML_MAX_CONCUR; i++) { ctx->concur_list[i] = 0; }
for (int i = 0; i < gf->n_nodes; i++) {nodes_unused[i] = 1;} for (int i = 0; i < gf->n_nodes; i++) { nodes_unused[i] = 1; }
ctx->concur_list_len = 0; ctx->concur_list_len = 0;
int n_left = gf->n_nodes; int n_left = gf->n_nodes;
int n_start = 0; // all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list int n_start = 0; // all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
int level_pos = 0; // at ctx->concur_list, the last layer (level) ends at level_pos int level_pos = 0; // at ctx->concur_list, the last layer (level) ends at level_pos
while (n_left > 0) { while (n_left > 0) {
// number of nodes at a layer (that can be issued concurrently) // number of nodes at a layer (that can be issued concurrently)
@ -386,28 +393,40 @@ void ggml_metal_graph_find_concurrency(
for (int i = n_start; i < ((n_start + search_depth > gf->n_nodes) ? gf->n_nodes : n_start + search_depth); i++) { for (int i = n_start; i < ((n_start + search_depth > gf->n_nodes) ? gf->n_nodes : n_start + search_depth); i++) {
if (nodes_unused[i]) { if (nodes_unused[i]) {
// if the requirements for gf->nodes[i] are satisfied // if the requirements for gf->nodes[i] are satisfied
int exe_flag=1; int exe_flag = 1;
// scan all srcs // scan all srcs
for (int src_ind = 0; src_ind < GGML_MAX_SRC; src_ind++) { for (int src_ind = 0; src_ind < GGML_MAX_SRC; src_ind++) {
struct ggml_tensor * src_cur = gf->nodes[i]->src[src_ind]; struct ggml_tensor * src_cur = gf->nodes[i]->src[src_ind];
if (src_cur) { if (src_cur) {
// if is leaf nodes it's satisfied. // if is leaf nodes it's satisfied.
if (src_cur->op == GGML_OP_NONE && src_cur->grad == NULL) {continue;} // TODO: ggml_is_leaf()
if (src_cur->op == GGML_OP_NONE && src_cur->grad == NULL) {
continue;
}
// otherwise this src should be the output from previous nodes. // otherwise this src should be the output from previous nodes.
int is_found = 0; int is_found = 0;
// scan 2*search_depth back because we inserted barrier. // scan 2*search_depth back because we inserted barrier.
for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) { //for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) {
if (gf->nodes[ctx->concur_list[j]] == src_cur) {is_found = 1; break;} for (int j = MAX(0, level_pos - 2*search_depth); j < level_pos; j++) {
if (ctx->concur_list[j] >= 0 && gf->nodes[ctx->concur_list[j]] == src_cur) {
is_found = 1;
break;
}
}
if (is_found == 0) {
exe_flag = 0;
break;
} }
if (is_found == 0) {exe_flag = 0; break;}
} }
} }
if (exe_flag) { if (exe_flag) {
// check if nodes[i]'s data will be overwritten by a node before nodes[i]. // check if nodes[i]'s data will be overwritten by a node before nodes[i].
// if node[5] and node[3] write to the same memory region, then we can't issue node[5] before node[3] // if node[5] and node[3] write to the same memory region, then we can't issue node[5] before node[3]
int64_t data_start = (int64_t) gf->nodes[i]->data; int64_t data_start = (int64_t) gf->nodes[i]->data;
int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]); int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]);
for (int j = n_start; j < i; j++) { for (int j = n_start; j < i; j++) {
if (nodes_unused[j] && gf->nodes[j]->op != GGML_OP_RESHAPE \ if (nodes_unused[j] && gf->nodes[j]->op != GGML_OP_RESHAPE \
&& gf->nodes[j]->op != GGML_OP_VIEW \ && gf->nodes[j]->op != GGML_OP_VIEW \
@ -416,9 +435,9 @@ void ggml_metal_graph_find_concurrency(
if (((int64_t)gf->nodes[j]->data) >= data_start + length || \ if (((int64_t)gf->nodes[j]->data) >= data_start + length || \
((int64_t)gf->nodes[j]->data) + (int64_t) ggml_nbytes(gf->nodes[j]) <= data_start) { ((int64_t)gf->nodes[j]->data) + (int64_t) ggml_nbytes(gf->nodes[j]) <= data_start) {
continue; continue;
} else {
exe_flag = 0;
} }
exe_flag = 0;
} }
} }
} }
@ -435,11 +454,13 @@ void ggml_metal_graph_find_concurrency(
ctx->concur_list[level_pos + concurrency] = -1; ctx->concur_list[level_pos + concurrency] = -1;
ctx->concur_list_len++; ctx->concur_list_len++;
// jump all sorted nodes at nodes_bak // jump all sorted nodes at nodes_bak
while (!nodes_unused[n_start]) {n_start++;} while (!nodes_unused[n_start]) {
n_start++;
}
level_pos += concurrency + 1; level_pos += concurrency + 1;
} }
if (ctx->concur_list_len > GGML_MAX_NODES) { if (ctx->concur_list_len > GGML_MAX_CONCUR) {
fprintf(stderr, "%s: too many elements for metal ctx->concur_list!\n", __func__); fprintf(stderr, "%s: too many elements for metal ctx->concur_list!\n", __func__);
} }
} }
@ -453,7 +474,7 @@ void ggml_metal_graph_compute(
// else fallback to serial dispatch // else fallback to serial dispatch
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor; MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
const bool has_concur = ctx->concur_list_len && ctx->concur_list_len <= GGML_MAX_NODES; const bool has_concur = ctx->concur_list_len && ctx->concur_list_len <= GGML_MAX_CONCUR;
const int n_nodes = has_concur ? ctx->concur_list_len : gf->n_nodes; const int n_nodes = has_concur ? ctx->concur_list_len : gf->n_nodes;
edesc.dispatchType = has_concur ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial; edesc.dispatchType = has_concur ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial;
@ -718,7 +739,8 @@ void ggml_metal_graph_compute(
// TODO: needs to be updated after PR: https://github.com/ggerganov/ggml/pull/224 // TODO: needs to be updated after PR: https://github.com/ggerganov/ggml/pull/224
GGML_ASSERT(ne00 == ne10); GGML_ASSERT(ne00 == ne10);
GGML_ASSERT(ne02 == ne12); // GGML_ASSERT(ne02 == ne12); // Should be checked on individual data types until broadcast is implemented everywhere
GGML_ASSERT(ne03 == ne13);
if (ggml_is_contiguous(src0) && if (ggml_is_contiguous(src0) &&
ggml_is_contiguous(src1) && ggml_is_contiguous(src1) &&
@ -746,11 +768,11 @@ void ggml_metal_graph_compute(
initWithDevice:ctx->device transposeLeft:false transposeRight:true initWithDevice:ctx->device transposeLeft:false transposeRight:true
resultRows:ne11 resultColumns:ne01 interiorColumns:ne00 alpha:1.0 beta:0.0]; resultRows:ne11 resultColumns:ne01 interiorColumns:ne00 alpha:1.0 beta:0.0];
// we need to do ne02 multiplications // we need to do ne12 multiplications
// TODO: is there a way to do this in parallel - currently very slow .. // TODO: is there a way to do this in parallel - currently very slow ..
// TODO: might be possible to offload part of the computation to ANE using Accelerate's CBLAS // TODO: might be possible to offload part of the computation to ANE using Accelerate's CBLAS
for (int64_t i02 = 0; i02 < ne02; ++i02) { for (int64_t i02 = 0; i02 < ne12; ++i02) {
size_t offs_src0_cur = offs_src0 + i02*nb02; size_t offs_src0_cur = offs_src0 + i02/(ne12/ne02)*nb02; // gqa not used for now
size_t offs_src1_cur = offs_src1 + i02*nb12; size_t offs_src1_cur = offs_src1 + i02*nb12;
size_t offs_dst_cur = offs_dst + i02*nb2; size_t offs_dst_cur = offs_dst + i02*nb2;
@ -772,8 +794,6 @@ void ggml_metal_graph_compute(
switch (src0t) { switch (src0t) {
case GGML_TYPE_F16: case GGML_TYPE_F16:
{ {
GGML_ASSERT(ne02 == ne12);
nth0 = 64; nth0 = 64;
nth1 = 1; nth1 = 1;
[encoder setComputePipelineState:ctx->pipeline_mul_mat_f16_f32]; [encoder setComputePipelineState:ctx->pipeline_mul_mat_f16_f32];
@ -853,16 +873,18 @@ void ggml_metal_graph_compute(
[encoder setBuffer:id_dst offset:offs_dst atIndex:2]; [encoder setBuffer:id_dst offset:offs_dst atIndex:2];
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3]; [encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:4]; [encoder setBytes:&ne01 length:sizeof(ne01) atIndex:4];
[encoder setBytes:&nb00 length:sizeof(nb00) atIndex:5]; [encoder setBytes:&ne02 length:sizeof(ne02) atIndex:5];
[encoder setBytes:&nb01 length:sizeof(nb01) atIndex:6]; [encoder setBytes:&nb00 length:sizeof(nb00) atIndex:6];
[encoder setBytes:&nb02 length:sizeof(nb02) atIndex:7]; [encoder setBytes:&nb01 length:sizeof(nb01) atIndex:7];
[encoder setBytes:&ne10 length:sizeof(ne10) atIndex:8]; [encoder setBytes:&nb02 length:sizeof(nb02) atIndex:8];
[encoder setBytes:&ne11 length:sizeof(ne11) atIndex:9]; [encoder setBytes:&ne10 length:sizeof(ne10) atIndex:9];
[encoder setBytes:&nb10 length:sizeof(nb10) atIndex:10]; [encoder setBytes:&ne11 length:sizeof(ne11) atIndex:10];
[encoder setBytes:&nb11 length:sizeof(nb11) atIndex:11]; [encoder setBytes:&ne12 length:sizeof(ne12) atIndex:11];
[encoder setBytes:&nb12 length:sizeof(nb12) atIndex:12]; [encoder setBytes:&nb10 length:sizeof(nb10) atIndex:12];
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:13]; [encoder setBytes:&nb11 length:sizeof(nb11) atIndex:13];
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:14]; [encoder setBytes:&nb12 length:sizeof(nb12) atIndex:14];
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:15];
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:16];
if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 || if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_Q4_K) { src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_Q4_K) {

View file

@ -509,11 +509,13 @@ kernel void kernel_mul_mat_f16_f32(
device float * dst, device float * dst,
constant int64_t & ne00, constant int64_t & ne00,
constant int64_t & ne01, constant int64_t & ne01,
constant int64_t & ne02,
constant uint64_t & nb00, constant uint64_t & nb00,
constant uint64_t & nb01, constant uint64_t & nb01,
constant uint64_t & nb02, constant uint64_t & nb02,
constant int64_t & ne10, constant int64_t & ne10,
constant int64_t & ne11, constant int64_t & ne11,
constant int64_t & ne12,
constant uint64_t & nb10, constant uint64_t & nb10,
constant uint64_t & nb11, constant uint64_t & nb11,
constant uint64_t & nb12, constant uint64_t & nb12,
@ -529,7 +531,7 @@ kernel void kernel_mul_mat_f16_f32(
const int64_t r1 = tgpig.y; const int64_t r1 = tgpig.y;
const int64_t im = tgpig.z; const int64_t im = tgpig.z;
device const half * x = (device const half *) (src0 + r0*nb01 + im*nb02); device const half * x = (device const half *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02);
device const float * y = (device const float *) (src1 + r1*nb11 + im*nb12); device const float * y = (device const float *) (src1 + r1*nb11 + im*nb12);
sum[tpitg.x] = 0.0f; sum[tpitg.x] = 0.0f;
@ -552,6 +554,7 @@ kernel void kernel_mul_mat_f16_f32(
} }
} }
kernel void kernel_alibi_f32( kernel void kernel_alibi_f32(
device const float * src0, device const float * src0,
device float * dst, device float * dst,

544
ggml.c
View file

@ -195,8 +195,8 @@ typedef void * thread_ret_t;
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN) #define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr) #define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
#else #else
inline static void* ggml_aligned_malloc(size_t size) { inline static void * ggml_aligned_malloc(size_t size) {
void* aligned_memory = NULL; void * aligned_memory = NULL;
#ifdef GGML_USE_METAL #ifdef GGML_USE_METAL
int result = posix_memalign(&aligned_memory, getpagesize(), size); int result = posix_memalign(&aligned_memory, getpagesize(), size);
#else #else
@ -3811,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"CROSS_ENTROPY_LOSS_BACK", "CROSS_ENTROPY_LOSS_BACK",
}; };
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none", "none",
@ -3883,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"cross_entropy_loss_back(x,y)", "cross_entropy_loss_back(x,y)",
}; };
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59"); static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
@ -4110,7 +4110,7 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
// //
// is enough, but just in case, adding the second part // is enough, but just in case, adding the second part
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]); return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
} }
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) { size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
@ -4253,7 +4253,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
} }
static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) { bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return return
@ -4602,7 +4602,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
/*.ne =*/ { 1, 1, 1, 1 }, /*.ne =*/ { 1, 1, 1, 1 },
/*.nb =*/ { 0, 0, 0, 0 }, /*.nb =*/ { 0, 0, 0, 0 },
/*.op =*/ GGML_OP_NONE, /*.op =*/ GGML_OP_NONE,
/*.op_params =*/ {0}, /*.op_params =*/ { 0 },
/*.is_param =*/ false, /*.is_param =*/ false,
/*.grad =*/ NULL, /*.grad =*/ NULL,
/*.src =*/ { NULL }, /*.src =*/ { NULL },
@ -4634,6 +4634,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
} }
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
assert(params_size <= GGML_MAX_OP_PARAMS); assert(params_size <= GGML_MAX_OP_PARAMS);
memcpy(tensor->op_params, params, params_size); memcpy(tensor->op_params, params, params_size);
} }
@ -6439,7 +6440,7 @@ struct ggml_tensor * ggml_permute(
result->src[0] = a; result->src[0] = a;
int32_t params[] = { axis0, axis1, axis2, axis3 }; int32_t params[] = { axis0, axis1, axis2, axis3 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
return result; return result;
} }
@ -6565,7 +6566,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
int32_t params[] = { n_past, inplace ? 1 : 0 }; int32_t params[] = { n_past, inplace ? 1 : 0 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_DIAG_MASK_INF; result->op = GGML_OP_DIAG_MASK_INF;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6605,7 +6606,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
int32_t params[] = { n_past, inplace ? 1 : 0 }; int32_t params[] = { n_past, inplace ? 1 : 0 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_DIAG_MASK_ZERO; result->op = GGML_OP_DIAG_MASK_ZERO;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6721,9 +6722,9 @@ static struct ggml_tensor * ggml_rope_impl(
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
int32_t params[6] = { n_past, n_dims, mode, n_ctx }; int32_t params[6] = { n_past, n_dims, mode, n_ctx };
memcpy(params + 4, &freq_base, sizeof(float)); memcpy(params + 4, &freq_base, sizeof(float));
memcpy(params + 5, &freq_scale, sizeof(float)); memcpy(params + 5, &freq_scale, sizeof(float));
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_ROPE; result->op = GGML_OP_ROPE;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6797,7 +6798,7 @@ struct ggml_tensor * ggml_rope_back(
struct ggml_tensor * result = ggml_dup_tensor(ctx, a); struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
int32_t params[] = { n_past, n_dims, mode, n_ctx }; int32_t params[] = { n_past, n_dims, mode, n_ctx };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_ROPE_BACK; result->op = GGML_OP_ROPE_BACK;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6828,7 +6829,7 @@ struct ggml_tensor * ggml_alibi(
int32_t op_params[3] = { n_past, n_head }; int32_t op_params[3] = { n_past, n_head };
memcpy(op_params + 2, &bias_max, sizeof(float)); memcpy(op_params + 2, &bias_max, sizeof(float));
ggml_set_op_params(result, &op_params, sizeof(op_params)); ggml_set_op_params(result, op_params, sizeof(op_params));
result->op = GGML_OP_ALIBI; result->op = GGML_OP_ALIBI;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6855,7 +6856,7 @@ struct ggml_tensor * ggml_clamp(
struct ggml_tensor * result = ggml_view_tensor(ctx, a); struct ggml_tensor * result = ggml_view_tensor(ctx, a);
float params[] = { min, max }; float params[] = { min, max };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_CLAMP; result->op = GGML_OP_CLAMP;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6890,10 +6891,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0), ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
a->ne[2], 1, 1, a->ne[2], 1, 1,
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
int32_t params[] = { s0, p0, d0 }; int32_t params[] = { s0, p0, d0 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_CONV_1D; result->op = GGML_OP_CONV_1D;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6905,10 +6906,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
// ggml_conv_2d // ggml_conv_2d
struct ggml_tensor* ggml_conv_2d( struct ggml_tensor * ggml_conv_2d(
struct ggml_context* ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
int s0, int s0,
int s1, int s1,
int p0, int p0,
@ -6929,10 +6930,10 @@ struct ggml_tensor* ggml_conv_2d(
ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1), ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
a->ne[3], b->ne[3], a->ne[3], b->ne[3],
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { s0, s1, p0, p1, d0, d1 }; int32_t params[] = { s0, s1, p0, p1, d0, d1 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_CONV_2D; result->op = GGML_OP_CONV_2D;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6945,7 +6946,7 @@ struct ggml_tensor* ggml_conv_2d(
// ggml_conv_1d_ph // ggml_conv_1d_ph
struct ggml_tensor* ggml_conv_1d_ph( struct ggml_tensor * ggml_conv_1d_ph(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
@ -6963,7 +6964,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
// ggml_pool_1d // ggml_pool_1d
struct ggml_tensor* ggml_pool_1d( struct ggml_tensor * ggml_pool_1d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_op_pool op, enum ggml_op_pool op,
@ -6982,10 +6983,10 @@ struct ggml_tensor* ggml_pool_1d(
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0), ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
a->ne[1], a->ne[1],
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
int32_t params[] = { op, k0, s0, p0 }; int32_t params[] = { op, k0, s0, p0 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_POOL_1D; result->op = GGML_OP_POOL_1D;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -6996,7 +6997,7 @@ struct ggml_tensor* ggml_pool_1d(
// ggml_pool_2d // ggml_pool_2d
struct ggml_tensor* ggml_pool_2d( struct ggml_tensor * ggml_pool_2d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_op_pool op, enum ggml_op_pool op,
@ -7019,10 +7020,10 @@ struct ggml_tensor* ggml_pool_2d(
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1), ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
a->ne[2], a->ne[2],
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 }; int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_POOL_2D; result->op = GGML_OP_POOL_2D;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -7190,7 +7191,7 @@ struct ggml_tensor * ggml_win_part(
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
int32_t params[] = { npx, npy, w }; int32_t params[] = { npx, npy, w };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_WIN_PART; result->op = GGML_OP_WIN_PART;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -7220,7 +7221,7 @@ struct ggml_tensor * ggml_win_unpart(
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne); struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
int32_t params[] = { w }; int32_t params[] = { w };
ggml_set_op_params(result, &params, sizeof(params)); ggml_set_op_params(result, params, sizeof(params));
result->op = GGML_OP_WIN_UNPART; result->op = GGML_OP_WIN_UNPART;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@ -7349,7 +7350,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
return ggml_map_binary_impl_f32(ctx, a, b, fun, true); return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
} }
// ggml_map_custom1 // ggml_map_custom1_f32
static struct ggml_tensor * ggml_map_custom1_impl_f32( static struct ggml_tensor * ggml_map_custom1_impl_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
@ -7366,7 +7367,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
result->op = GGML_OP_MAP_CUSTOM1; result->op = GGML_OP_MAP_CUSTOM1_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a; result->src[0] = a;
@ -7387,7 +7388,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
return ggml_map_custom1_impl_f32(ctx, a, fun, true); return ggml_map_custom1_impl_f32(ctx, a, fun, true);
} }
// ggml_map_custom2 // ggml_map_custom2_f32
static struct ggml_tensor * ggml_map_custom2_impl_f32( static struct ggml_tensor * ggml_map_custom2_impl_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
@ -7405,7 +7406,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
result->op = GGML_OP_MAP_CUSTOM2; result->op = GGML_OP_MAP_CUSTOM2_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a; result->src[0] = a;
result->src[1] = b; result->src[1] = b;
@ -7429,7 +7430,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true); return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
} }
// ggml_map_custom3 // ggml_map_custom3_f32
static struct ggml_tensor * ggml_map_custom3_impl_f32( static struct ggml_tensor * ggml_map_custom3_impl_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
@ -7448,7 +7449,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
ggml_set_op_params(result, (const void *) &fun, sizeof(fun)); ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
result->op = GGML_OP_MAP_CUSTOM3; result->op = GGML_OP_MAP_CUSTOM3_F32;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a; result->src[0] = a;
result->src[1] = b; result->src[1] = b;
@ -7475,6 +7476,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true); return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
} }
// ggml_map_custom1
struct ggml_map_custom1_op_params {
ggml_custom1_op_t fun;
int n_tasks;
void * userdata;
};
static struct ggml_tensor * ggml_map_custom1_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_t fun,
int n_tasks,
void * userdata,
bool inplace) {
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
bool is_node = false;
if (!inplace && a->grad) {
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
struct ggml_map_custom1_op_params params = {
/*.fun =*/ fun,
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
ggml_set_op_params(result, (const void *) &params, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM1;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
return result;
}
struct ggml_tensor * ggml_map_custom1(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
}
struct ggml_tensor * ggml_map_custom1_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
const ggml_custom1_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
}
// ggml_map_custom2
struct ggml_map_custom2_op_params {
ggml_custom2_op_t fun;
int n_tasks;
void * userdata;
};
static struct ggml_tensor * ggml_map_custom2_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_t fun,
int n_tasks,
void * userdata,
bool inplace) {
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
bool is_node = false;
if (!inplace && (a->grad || b->grad)) {
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
struct ggml_map_custom2_op_params params = {
/*.fun =*/ fun,
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
ggml_set_op_params(result, (const void *) &params, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM2;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = b;
return result;
}
struct ggml_tensor * ggml_map_custom2(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
}
struct ggml_tensor * ggml_map_custom2_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
const ggml_custom2_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
}
// ggml_map_custom3
struct ggml_map_custom3_op_params {
ggml_custom3_op_t fun;
int n_tasks;
void * userdata;
};
static struct ggml_tensor * ggml_map_custom3_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_t fun,
int n_tasks,
void * userdata,
bool inplace) {
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
bool is_node = false;
if (!inplace && (a->grad || b->grad || c->grad)) {
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
struct ggml_map_custom3_op_params params = {
/*.fun =*/ fun,
/*.n_tasks =*/ n_tasks,
/*.userdata =*/ userdata
};
ggml_set_op_params(result, (const void *) &params, sizeof(params));
result->op = GGML_OP_MAP_CUSTOM3;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a;
result->src[1] = b;
result->src[2] = c;
return result;
}
struct ggml_tensor * ggml_map_custom3(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
}
struct ggml_tensor * ggml_map_custom3_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
const ggml_custom3_op_t fun,
int n_tasks,
void * userdata) {
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
}
// ggml_cross_entropy_loss // ggml_cross_entropy_loss
struct ggml_tensor * ggml_cross_entropy_loss( struct ggml_tensor * ggml_cross_entropy_loss(
@ -9283,8 +9468,8 @@ static void ggml_compute_forward_sum_rows_f32(
for (int64_t i3 = 0; i3 < ne03; i3++) { for (int64_t i3 = 0; i3 < ne03; i3++) {
for (int64_t i2 = 0; i2 < ne02; i2++) { for (int64_t i2 = 0; i2 < ne02; i2++) {
for (int64_t i1 = 0; i1 < ne01; i1++) { for (int64_t i1 = 0; i1 < ne01; i1++) {
float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03); float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3); float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
float row_sum = 0; float row_sum = 0;
ggml_vec_sum_f32(ne00, &row_sum, src_row); ggml_vec_sum_f32(ne00, &row_sum, src_row);
dst_row[0] = row_sum; dst_row[0] = row_sum;
@ -10546,72 +10731,96 @@ static void ggml_compute_forward_mul_mat(
return; return;
} }
// parallelize by src0 rows
const int64_t dr = (ne01 + nth - 1)/nth;
const int64_t ir10 = dr*ith;
const int64_t ir11 = MIN(ir10 + dr, ne01);
// src1 rows
const int64_t nr1 = ne11*ne12*ne13;
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata; const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type]; const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
for (int64_t ir1 = 0; ir1 < nr1; ++ir1) { const int64_t nr0 = ne01; // src0 rows
const int64_t i13 = (ir1/(ne12*ne11)); const int64_t nr1 = ne11*ne12*ne13; // src1 rows
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
const int64_t ir0 = (ir1/ne11)%(ne02*ne03); //printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
const int64_t i03 = (ir0/(ne02));
// Hack for "Falcon multi-query-attention key stutter" / alternative to ggml_repeat2.
// See https://github.com/ggerganov/llama.cpp/issues/1602#issuecomment-1606087470:
// GG: this is likely the correct way to broadcast, though need some more thought
// therefore leaving the comments to remind us for now
const int64_t i02 = (i12 / (ne12 / ne02));
// Original from PR/224 (and also essential/correct for non-broadcast matmuls in Falcon)
// const int64_t i02 = (ir0 - i03*ne02);
const int64_t i1 = i11; // distribute the thread work across the inner or outer loop based on which one is larger
const int64_t i2 = i12;
const int64_t i3 = i13;
const char * src0_row = (const char *) src0->data + ( 0 + i02*nb02 + i03*nb03 ); const int64_t nth0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
const int64_t nth1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides const int64_t ith0 = ith % nth0;
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using const int64_t ith1 = ith / nth0;
// the original src1 data pointer, so we should index using the indices directly
// TODO: this is a bit of a hack, we should probably have a better way to handle this
const char * src1_col = (const char *) wdata +
(src1_cont || src1->type != vec_dot_type
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
: (i11*nb11 + i12*nb12 + i13*nb13));
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3)); const int64_t dr0 = (nr0 + nth0 - 1)/nth0;
const int64_t dr1 = (nr1 + nth1 - 1)/nth1;
for (int64_t ir = ir10; ir < ir11; ++ir) { const int64_t ir010 = dr0*ith0;
vec_dot(ne00, &dst_col[ir], src0_row + ir*nb01, src1_col); const int64_t ir011 = MIN(ir010 + dr0, nr0);
}
const int64_t ir110 = dr1*ith1;
const int64_t ir111 = MIN(ir110 + dr1, nr1);
//printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
// threads with no work simply yield (not sure if it helps)
if (ir010 >= ir011 || ir110 >= ir111) {
sched_yield();
return;
} }
//int64_t t1 = ggml_time_us(); assert(ne12 % ne02 == 0);
//static int64_t acc = 0; assert(ne13 % ne03 == 0);
//acc += t1 - t0;
//if (t1 - t0 > 10) {
// printf("\n");
// printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03);
// printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03);
// printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13);
// printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc); // broadcast factors
//} const int64_t r2 = ne12/ne02;
const int64_t r3 = ne13/ne03;
// block-tiling attempt
const int64_t blck_0 = 16;
const int64_t blck_1 = 16;
// attempt to reduce false-sharing (does not seem to make a difference)
float tmp[16];
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) {
const int64_t i13 = (ir1/(ne12*ne11));
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
// broadcast src0 into src1
const int64_t i03 = i13/r3;
const int64_t i02 = i12/r2;
const int64_t i1 = i11;
const int64_t i2 = i12;
const int64_t i3 = i13;
const char * src0_row = (const char *) src0->data + (0 + i02*nb02 + i03*nb03);
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
// the original src1 data pointer, so we should index using the indices directly
// TODO: this is a bit of a hack, we should probably have a better way to handle this
const char * src1_col = (const char *) wdata +
(src1_cont || src1->type != vec_dot_type
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
: (i11*nb11 + i12*nb12 + i13*nb13));
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
//}
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
}
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
}
}
}
} }
// ggml_compute_forward_out_prod // ggml_compute_forward_out_prod
static void ggml_compute_forward_out_prod_f32( static void ggml_compute_forward_out_prod_f32(
const struct ggml_compute_params * params, const struct ggml_compute_params * params,
const struct ggml_tensor * src0, const struct ggml_tensor * src0,
@ -12894,7 +13103,7 @@ static void ggml_compute_forward_pool_1d(
const struct ggml_tensor * src0, const struct ggml_tensor * src0,
struct ggml_tensor * dst) { struct ggml_tensor * dst) {
const int32_t* opts = (const int32_t*)dst->op_params; const int32_t * opts = (const int32_t *)dst->op_params;
enum ggml_op_pool op = opts[0]; enum ggml_op_pool op = opts[0];
const int k0 = opts[1]; const int k0 = opts[1];
const int s0 = opts[2]; const int s0 = opts[2];
@ -14227,24 +14436,6 @@ static void ggml_compute_forward_map_custom1_f32(
fun(dst, a); fun(dst, a);
} }
static void ggml_compute_forward_map_custom1(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
struct ggml_tensor * dst,
const ggml_custom1_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_map_custom2 // ggml_compute_forward_map_custom2
static void ggml_compute_forward_map_custom2_f32( static void ggml_compute_forward_map_custom2_f32(
@ -14263,24 +14454,6 @@ static void ggml_compute_forward_map_custom2_f32(
} }
static void ggml_compute_forward_map_custom2(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
struct ggml_tensor * dst,
const ggml_custom2_op_f32_t fun) {
switch (a->type) {
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
}
}
// ggml_compute_forward_map_custom3 // ggml_compute_forward_map_custom3
static void ggml_compute_forward_map_custom3_f32( static void ggml_compute_forward_map_custom3_f32(
@ -14299,24 +14472,52 @@ static void ggml_compute_forward_map_custom3_f32(
fun(dst, a, b, c); fun(dst, a, b, c);
} }
// ggml_compute_forward_map_custom1
static void ggml_compute_forward_map_custom1(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
struct ggml_tensor * dst) {
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
p->fun(dst, a, params->ith, params->nth, p->userdata);
}
// ggml_compute_forward_map_custom2
static void ggml_compute_forward_map_custom2(
const struct ggml_compute_params * params,
const struct ggml_tensor * a,
const struct ggml_tensor * b,
struct ggml_tensor * dst) {
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
p->fun(dst, a, b, params->ith, params->nth, p->userdata);
}
// ggml_compute_forward_map_custom3
static void ggml_compute_forward_map_custom3( static void ggml_compute_forward_map_custom3(
const struct ggml_compute_params * params, const struct ggml_compute_params * params,
const struct ggml_tensor * a, const struct ggml_tensor * a,
const struct ggml_tensor * b, const struct ggml_tensor * b,
const struct ggml_tensor * c, const struct ggml_tensor * c,
struct ggml_tensor * dst, struct ggml_tensor * dst) {
const ggml_custom3_op_f32_t fun) { if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
switch (a->type) { return;
case GGML_TYPE_F32:
{
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
} break;
default:
{
GGML_ASSERT(false);
} break;
} }
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
} }
// ggml_compute_forward_cross_entropy_loss // ggml_compute_forward_cross_entropy_loss
@ -14838,25 +15039,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun); ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
} }
break; break;
case GGML_OP_MAP_CUSTOM1: case GGML_OP_MAP_CUSTOM1_F32:
{ {
ggml_custom1_op_f32_t fun; ggml_custom1_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun)); memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun); ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM2_F32:
{
ggml_custom2_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM3_F32:
{
ggml_custom3_op_f32_t fun;
memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
}
break;
case GGML_OP_MAP_CUSTOM1:
{
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
} }
break; break;
case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM2:
{ {
ggml_custom2_op_f32_t fun; ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
} }
break; break;
case GGML_OP_MAP_CUSTOM3: case GGML_OP_MAP_CUSTOM3:
{ {
ggml_custom3_op_f32_t fun; ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
memcpy(&fun, tensor->op_params, sizeof(fun));
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
} }
break; break;
case GGML_OP_CROSS_ENTROPY_LOSS: case GGML_OP_CROSS_ENTROPY_LOSS:
@ -15664,6 +15880,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break; } break;
case GGML_OP_MAP_UNARY: case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY: case GGML_OP_MAP_BINARY:
case GGML_OP_MAP_CUSTOM1_F32:
case GGML_OP_MAP_CUSTOM2_F32:
case GGML_OP_MAP_CUSTOM3_F32:
case GGML_OP_MAP_CUSTOM1: case GGML_OP_MAP_CUSTOM1:
case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3: case GGML_OP_MAP_CUSTOM3:
@ -16449,12 +16668,39 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
case GGML_OP_WIN_UNPART: case GGML_OP_WIN_UNPART:
case GGML_OP_MAP_UNARY: case GGML_OP_MAP_UNARY:
case GGML_OP_MAP_BINARY: case GGML_OP_MAP_BINARY:
case GGML_OP_MAP_CUSTOM1: case GGML_OP_MAP_CUSTOM1_F32:
case GGML_OP_MAP_CUSTOM2: case GGML_OP_MAP_CUSTOM2_F32:
case GGML_OP_MAP_CUSTOM3: case GGML_OP_MAP_CUSTOM3_F32:
{ {
n_tasks = 1; n_tasks = 1;
} break; } break;
case GGML_OP_MAP_CUSTOM1:
{
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
if (p->n_tasks == GGML_N_TASKS_MAX) {
n_tasks = n_threads;
} else {
n_tasks = MIN(p->n_tasks, n_threads);
}
} break;
case GGML_OP_MAP_CUSTOM2:
{
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
if (p->n_tasks == GGML_N_TASKS_MAX) {
n_tasks = n_threads;
} else {
n_tasks = MIN(p->n_tasks, n_threads);
}
} break;
case GGML_OP_MAP_CUSTOM3:
{
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
if (p->n_tasks == GGML_N_TASKS_MAX) {
n_tasks = n_threads;
} else {
n_tasks = MIN(p->n_tasks, n_threads);
}
} break;
case GGML_OP_CROSS_ENTROPY_LOSS: case GGML_OP_CROSS_ENTROPY_LOSS:
{ {
n_tasks = n_threads; n_tasks = n_threads;

145
ggml.h
View file

@ -183,6 +183,15 @@
# define GGML_API # define GGML_API
#endif #endif
// TODO: support for clang
#ifdef __GNUC__
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
#elif defined(_MSC_VER)
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
#else
# define GGML_DEPRECATED(func, hint) func
#endif
#include <stdint.h> #include <stdint.h>
#include <stddef.h> #include <stddef.h>
#include <stdbool.h> #include <stdbool.h>
@ -374,6 +383,10 @@ extern "C" {
GGML_OP_MAP_UNARY, GGML_OP_MAP_UNARY,
GGML_OP_MAP_BINARY, GGML_OP_MAP_BINARY,
GGML_OP_MAP_CUSTOM1_F32,
GGML_OP_MAP_CUSTOM2_F32,
GGML_OP_MAP_CUSTOM3_F32,
GGML_OP_MAP_CUSTOM1, GGML_OP_MAP_CUSTOM1,
GGML_OP_MAP_CUSTOM2, GGML_OP_MAP_CUSTOM2,
GGML_OP_MAP_CUSTOM3, GGML_OP_MAP_CUSTOM3,
@ -570,6 +583,8 @@ extern "C" {
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor); GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor); GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
// use this to compute the memory overhead of a tensor // use this to compute the memory overhead of a tensor
GGML_API size_t ggml_tensor_overhead(void); GGML_API size_t ggml_tensor_overhead(void);
@ -1240,7 +1255,7 @@ extern "C" {
// conv_1d with padding = half // conv_1d with padding = half
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d) // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
GGML_API struct ggml_tensor* ggml_conv_1d_ph( GGML_API struct ggml_tensor * ggml_conv_1d_ph(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
@ -1253,7 +1268,7 @@ extern "C" {
GGML_OP_POOL_COUNT, GGML_OP_POOL_COUNT,
}; };
GGML_API struct ggml_tensor* ggml_pool_1d( GGML_API struct ggml_tensor * ggml_pool_1d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_op_pool op, enum ggml_op_pool op,
@ -1261,7 +1276,7 @@ extern "C" {
int s0, // stride int s0, // stride
int p0); // padding int p0); // padding
GGML_API struct ggml_tensor* ggml_pool_2d( GGML_API struct ggml_tensor * ggml_pool_2d(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_op_pool op, enum ggml_op_pool op,
@ -1315,15 +1330,6 @@ extern "C" {
int h0, int h0,
int w); int w);
// custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
GGML_API struct ggml_tensor * ggml_unary( GGML_API struct ggml_tensor * ggml_unary(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
@ -1334,63 +1340,138 @@ extern "C" {
struct ggml_tensor * a, struct ggml_tensor * a,
enum ggml_unary_op op); enum ggml_unary_op op);
GGML_API struct ggml_tensor * ggml_map_unary_f32( // custom operators
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_unary_op_f32_t fun); ggml_unary_op_f32_t fun),
"use ggml_map_custom1 instead");
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_unary_op_f32_t fun); ggml_unary_op_f32_t fun),
"use ggml_map_custom1_inplace instead");
GGML_API struct ggml_tensor * ggml_map_binary_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_binary_op_f32_t fun); ggml_binary_op_f32_t fun),
"use ggml_map_custom2 instead");
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_binary_op_f32_t fun); ggml_binary_op_f32_t fun),
"use ggml_map_custom2_inplace instead");
GGML_API struct ggml_tensor * ggml_map_custom1_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_custom1_op_f32_t fun); ggml_custom1_op_f32_t fun),
"use ggml_map_custom1 instead");
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
ggml_custom1_op_f32_t fun); ggml_custom1_op_f32_t fun),
"use ggml_map_custom1_inplace instead");
GGML_API struct ggml_tensor * ggml_map_custom2_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_custom2_op_f32_t fun); ggml_custom2_op_f32_t fun),
"use ggml_map_custom2 instead");
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
ggml_custom2_op_f32_t fun); ggml_custom2_op_f32_t fun),
"use ggml_map_custom2_inplace instead");
GGML_API struct ggml_tensor * ggml_map_custom3_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
struct ggml_tensor * c, struct ggml_tensor * c,
ggml_custom3_op_f32_t fun); ggml_custom3_op_f32_t fun),
"use ggml_map_custom3 instead");
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32( GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
struct ggml_context * ctx, struct ggml_context * ctx,
struct ggml_tensor * a, struct ggml_tensor * a,
struct ggml_tensor * b, struct ggml_tensor * b,
struct ggml_tensor * c, struct ggml_tensor * c,
ggml_custom3_op_f32_t fun); ggml_custom3_op_f32_t fun),
"use ggml_map_custom3_inplace instead");
// custom operators v2
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
#define GGML_N_TASKS_MAX -1
GGML_API struct ggml_tensor * ggml_map_custom1(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_custom1_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
ggml_custom1_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom2(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_custom2_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
ggml_custom2_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom3(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
ggml_custom3_op_t fun,
int n_tasks,
void * userdata);
GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
ggml_custom3_op_t fun,
int n_tasks,
void * userdata);
// loss function // loss function

View file

@ -1,29 +1,25 @@
# Grammar for subset of JSON - doesn't support full string or number syntax root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
root ::= object
value ::= object | array | string | number | boolean | "null"
object ::= object ::=
"{" ws ( "{" ws (
string ":" ws value string ":" ws value
("," ws string ":" ws value)* ("," ws string ":" ws value)*
)? "}" )? "}" ws
array ::= array ::=
"[" ws ( "[" ws (
value value
("," ws value)* ("," ws value)*
)? "]" )? "]" ws
string ::= string ::=
"\"" ( "\"" (
[^"\\] | [^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws )* "\"" ws
# Only plain integers currently number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
number ::= "-"? [0-9]+ ws
boolean ::= ("true" | "false") ws
# Optional space: by convention, applied in this grammar after literal chars when allowed # Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= ([ \t\n] ws)? ws ::= ([ \t\n] ws)?

View file

@ -149,6 +149,46 @@ struct llama_file {
} }
}; };
// llama_context_data
struct llama_data_context {
virtual void write(const void * src, size_t size) = 0;
virtual size_t get_size_written() = 0;
virtual ~llama_data_context() = default;
};
struct llama_data_buffer_context : llama_data_context {
uint8_t* ptr;
size_t size_written = 0;
llama_data_buffer_context(uint8_t * p) : ptr(p) {}
void write(const void * src, size_t size) override {
memcpy(ptr, src, size);
ptr += size;
size_written += size;
}
size_t get_size_written() override {
return size_written;
}
};
struct llama_data_file_context : llama_data_context {
llama_file* file;
size_t size_written = 0;
llama_data_file_context(llama_file * f) : file(f) {}
void write(const void * src, size_t size) override {
file->write_raw(src, size);
size_written += size;
}
size_t get_size_written() override {
return size_written;
}
};
#if defined(_WIN32) #if defined(_WIN32)
static std::string llama_format_win_err(DWORD err) { static std::string llama_format_win_err(DWORD err) {
LPSTR buf; LPSTR buf;
@ -179,7 +219,7 @@ struct llama_mmap {
// prefetch/readahead impairs performance on NUMA systems // prefetch/readahead impairs performance on NUMA systems
if (numa) { prefetch = 0; } if (numa) { prefetch = 0; }
#ifdef __linux__ #ifdef __linux__
if (prefetch) { flags |= MAP_POPULATE; } if (prefetch >= file->size) { flags |= MAP_POPULATE; }
#endif #endif
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0); addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
if (addr == MAP_FAILED) { if (addr == MAP_FAILED) {

View file

@ -149,7 +149,7 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
} }
// amount of VRAM needed per batch size to hold temporary results // amount of VRAM needed per batch size to hold temporary results
// the values for 3b and 65b are not derived from testing but instead chosen conservatively // the values for 3b are not derived from testing but instead chosen conservatively
static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE() static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE()
{ {
static std::map<e_model, size_t> k_sizes = { static std::map<e_model, size_t> k_sizes = {
@ -157,14 +157,14 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE()
{ MODEL_7B, 512ull * kB }, { MODEL_7B, 512ull * kB },
{ MODEL_13B, 640ull * kB }, { MODEL_13B, 640ull * kB },
{ MODEL_30B, 768ull * kB }, { MODEL_30B, 768ull * kB },
{ MODEL_65B, 1536ull * kB }, { MODEL_65B, 1280ull * kB },
{ MODEL_70B, 1536ull * kB }, // TODO (likely can be reduced) { MODEL_70B, 1280ull * kB },
}; };
return k_sizes; return k_sizes;
} }
// amount of VRAM needed per batch size and context to hold temporary results // amount of VRAM needed per batch size and context to hold temporary results
// the values for 3b and 65b are not derived from testing but instead chosen conservatively // the values for 3b are not derived from testing but instead chosen conservatively
static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT() static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
{ {
static std::map<e_model, size_t> k_sizes = { static std::map<e_model, size_t> k_sizes = {
@ -172,8 +172,8 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
{ MODEL_7B, 128ull }, { MODEL_7B, 128ull },
{ MODEL_13B, 160ull }, { MODEL_13B, 160ull },
{ MODEL_30B, 208ull }, { MODEL_30B, 208ull },
{ MODEL_65B, 416ull }, { MODEL_65B, 256ull },
{ MODEL_70B, 416ull }, // TODO (likely can be reduced) { MODEL_70B, 256ull },
}; };
return k_sizes; return k_sizes;
} }
@ -747,12 +747,12 @@ struct llama_model_loader {
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) { void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
size_t data_size = 0; size_t data_size = 0;
size_t prefetch_size = 0; size_t prefetch_size = file_loader->file.size;
size_t lock_size = 0; size_t lock_size = 0;
for (const llama_load_tensor & lt : tensors_map.tensors) { for (const llama_load_tensor & lt : tensors_map.tensors) {
data_size += lt.size; data_size += lt.size;
if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) { if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) {
prefetch_size += lt.size; prefetch_size -= lt.size;
} }
} }
@ -3743,10 +3743,20 @@ size_t llama_get_state_size(const struct llama_context * ctx) {
return s_total; return s_total;
} }
// Copies the state to the specified destination address /** copy state data into either a buffer or file depending on the passed in context
size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) { *
uint8_t * out = dst; * file context:
* llama_file file("/path", "wb");
* llama_data_file_context data_ctx(&file);
* llama_copy_state_data(ctx, &data_ctx);
*
* buffer context:
* std::vector<uint8_t> buf(max_size, 0);
* llama_data_buffer_context data_ctx(&buf.data());
* llama_copy_state_data(ctx, &data_ctx);
*
*/
void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
// copy rng // copy rng
{ {
std::stringstream rng_ss; std::stringstream rng_ss;
@ -3758,8 +3768,8 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE); memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE);
memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size()); memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size());
memcpy(out, &rng_size, sizeof(rng_size)); out += sizeof(rng_size); data_ctx->write(&rng_size, sizeof(rng_size));
memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE; data_ctx->write(&rng_buf[0], LLAMA_MAX_RNG_STATE);
} }
// copy logits // copy logits
@ -3767,25 +3777,29 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
const size_t logits_cap = ctx->logits.capacity(); const size_t logits_cap = ctx->logits.capacity();
const size_t logits_size = ctx->logits.size(); const size_t logits_size = ctx->logits.size();
memcpy(out, &logits_cap, sizeof(logits_cap)); out += sizeof(logits_cap); data_ctx->write(&logits_cap, sizeof(logits_cap));
memcpy(out, &logits_size, sizeof(logits_size)); out += sizeof(logits_size); data_ctx->write(&logits_size, sizeof(logits_size));
if (logits_size) { if (logits_size) {
memcpy(out, ctx->logits.data(), logits_size * sizeof(float)); data_ctx->write(ctx->logits.data(), logits_size * sizeof(float));
} }
out += logits_cap * sizeof(float); // If there is a gap between the size and the capacity, write padding
size_t padding_size = (logits_cap - logits_size) * sizeof(float);
if (padding_size > 0) {
std::vector<uint8_t> padding(padding_size, 0); // Create a buffer filled with zeros
data_ctx->write(padding.data(), padding_size);
}
} }
// copy embeddings // copy embeddings
{ {
const size_t embedding_size = ctx->embedding.size(); const size_t embedding_size = ctx->embedding.size();
memcpy(out, &embedding_size, sizeof(embedding_size)); out += sizeof(embedding_size); data_ctx->write(&embedding_size, sizeof(embedding_size));
if (embedding_size) { if (embedding_size) {
memcpy(out, ctx->embedding.data(), embedding_size * sizeof(float)); data_ctx->write(ctx->embedding.data(), embedding_size * sizeof(float));
out += embedding_size * sizeof(float);
} }
} }
@ -3800,8 +3814,8 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
const size_t kv_size = kv_self.buf.size; const size_t kv_size = kv_self.buf.size;
const int kv_ntok = llama_get_kv_cache_token_count(ctx); const int kv_ntok = llama_get_kv_cache_token_count(ctx);
memcpy(out, &kv_size, sizeof(kv_size)); out += sizeof(kv_size); data_ctx->write(&kv_size, sizeof(kv_size));
memcpy(out, &kv_ntok, sizeof(kv_ntok)); out += sizeof(kv_ntok); data_ctx->write(&kv_ntok, sizeof(kv_ntok));
if (kv_size) { if (kv_size) {
const size_t elt_size = ggml_element_size(kv_self.k); const size_t elt_size = ggml_element_size(kv_self.k);
@ -3810,12 +3824,12 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
ggml_cgraph gf{}; ggml_cgraph gf{};
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_ntok, n_layer); ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_ntok, n_layer);
kout3d->data = out; std::vector<uint8_t> kout3d_data(ggml_nbytes(kout3d), 0);
out += ggml_nbytes(kout3d); kout3d->data = kout3d_data.data();
ggml_tensor * vout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, kv_ntok, n_embd, n_layer); ggml_tensor * vout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, kv_ntok, n_embd, n_layer);
vout3d->data = out; std::vector<uint8_t> vout3d_data(ggml_nbytes(vout3d), 0);
out += ggml_nbytes(vout3d); vout3d->data = vout3d_data.data();
ggml_tensor * k3d = ggml_view_3d(cpy_ctx, kv_self.k, ggml_tensor * k3d = ggml_view_3d(cpy_ctx, kv_self.k,
n_embd, kv_ntok, n_layer, n_embd, kv_ntok, n_layer,
@ -3830,15 +3844,20 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1); ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
ggml_free(cpy_ctx); ggml_free(cpy_ctx);
// our data is now in the kout3d_data and vout3d_data buffers
// write them to file
data_ctx->write(kout3d_data.data(), kout3d_data.size());
data_ctx->write(vout3d_data.data(), vout3d_data.size());
} }
} }
}
const size_t written = out - dst; size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
const size_t max_size = llama_get_state_size(ctx); llama_data_buffer_context data_ctx(dst);
llama_copy_state_data_internal(ctx, &data_ctx);
LLAMA_ASSERT(written <= max_size); return data_ctx.get_size_written();
return written;
} }
// Sets the state reading from the specified source address // Sets the state reading from the specified source address
@ -4023,15 +4042,9 @@ bool llama_save_session_file(struct llama_context * ctx, const char * path_sessi
file.write_u32((uint32_t) n_token_count); file.write_u32((uint32_t) n_token_count);
file.write_raw(tokens, sizeof(llama_token) * n_token_count); file.write_raw(tokens, sizeof(llama_token) * n_token_count);
// save the context state // save the context state using stream saving
{ llama_data_file_context data_ctx(&file);
const size_t n_state_size_max = llama_get_state_size(ctx); llama_copy_state_data_internal(ctx, &data_ctx);
std::vector<uint8_t> state_data(n_state_size_max);
const size_t n_state_size_cur = llama_copy_state_data(ctx, state_data.data());
file.write_raw(state_data.data(), n_state_size_cur);
}
return true; return true;
} }

View file

@ -10,5 +10,5 @@ cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
cp -rpv ../ggml/tests/test-opt.c ./tests/test-opt.c cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
cp -rpv ../ggml/tests/test-grad0.c ./tests/test-grad0.c cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp

View file

@ -6,10 +6,10 @@ function(llama_add_test source)
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
endfunction() endfunction()
# llama_add_test(test-double-float.c) # SLOW # llama_add_test(test-double-float.cpp) # SLOW
llama_add_test(test-quantize-fns.cpp) llama_add_test(test-quantize-fns.cpp)
llama_add_test(test-quantize-perf.cpp) llama_add_test(test-quantize-perf.cpp)
llama_add_test(test-sampling.cpp) llama_add_test(test-sampling.cpp)
llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
llama_add_test(test-grad0.c) # SLOW llama_add_test(test-grad0.cpp) # SLOW
# llama_add_test(test-opt.c) # SLOW # llama_add_test(test-opt.cpp) # SLOW

View file

@ -3,10 +3,11 @@
// This is done by checking all finite (non-NaN, non-infinite) floats. // This is done by checking all finite (non-NaN, non-infinite) floats.
#undef NDEBUG #undef NDEBUG
#include <assert.h> #include <cassert>
#include <immintrin.h> #include <immintrin.h>
#include <math.h> #include <cmath>
#include <stdint.h> #include <cstdint>
#include <cstring>
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdouble-promotion" #pragma GCC diagnostic ignored "-Wdouble-promotion"
@ -32,8 +33,9 @@ inline static float silu_float(float x) {
int main(void) { int main(void) {
uint32_t x = UINT32_MAX; uint32_t x = UINT32_MAX;
do { do {
float f = *(float *)&x; float f;
assert(!isfinite(f) || (round_orig(f) == round_float(f))); memcpy(&f, &x, sizeof(x));
assert(!std::isfinite(f) || (round_orig(f) == round_float(f)));
} while (x--); } while (x--);
#ifdef __F16C__ #ifdef __F16C__

View file

@ -1,10 +1,10 @@
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
#include "ggml.h" #include "ggml.h"
#include <math.h> #include <cmath>
#include <stdio.h> #include <cstdio>
#include <stdlib.h> #include <cstdlib>
#include <assert.h> #include <cassert>
#if defined(_MSC_VER) #if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data #pragma warning(disable: 4244 4267) // possible loss of data
@ -47,16 +47,16 @@
#define GGML_PRINT(...) printf(__VA_ARGS__) #define GGML_PRINT(...) printf(__VA_ARGS__)
float frand(void) { static float frand(void) {
return (float)rand()/(float)RAND_MAX; return (float)rand()/(float)RAND_MAX;
} }
int irand(int n) { static int irand(int n) {
if (n == 0) return 0; if (n == 0) return 0;
return rand()%n; return rand()%n;
} }
void get_random_dims(int64_t * dims, int ndims) { static void get_random_dims(int64_t * dims, int ndims) {
dims[0] = dims[1] = dims[2] = dims[3] = 1; dims[0] = dims[1] = dims[2] = dims[3] = 1;
for (int i = 0; i < ndims; i++) { for (int i = 0; i < ndims; i++) {
@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) {
} }
} }
struct ggml_tensor * get_random_tensor_f32( static struct ggml_tensor * get_random_tensor_f32(
struct ggml_context * ctx0, struct ggml_context * ctx0,
int ndims, int ndims,
int64_t ne[], int64_t ne[],
@ -112,7 +112,7 @@ struct ggml_tensor * get_random_tensor_f32(
return result; return result;
} }
struct ggml_tensor * get_random_tensor_f16( static struct ggml_tensor * get_random_tensor_f16(
struct ggml_context * ctx0, struct ggml_context * ctx0,
int ndims, int ndims,
int64_t ne[], int64_t ne[],
@ -160,7 +160,7 @@ struct ggml_tensor * get_random_tensor_f16(
return result; return result;
} }
struct ggml_tensor * get_random_tensor_i32( static struct ggml_tensor * get_random_tensor_i32(
struct ggml_context * ctx0, struct ggml_context * ctx0,
int ndims, int ndims,
int64_t ne[], int64_t ne[],
@ -208,7 +208,7 @@ struct ggml_tensor * get_random_tensor_i32(
return result; return result;
} }
void print_elements(const char* label, const struct ggml_tensor * t) { static void print_elements(const char* label, const struct ggml_tensor * t) {
if (!t) { if (!t) {
printf("%s: %s = null\n", __func__, label); printf("%s: %s = null\n", __func__, label);
return; return;
@ -228,7 +228,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) {
} }
bool check_gradient( static bool check_gradient(
const char * op_name, const char * op_name,
struct ggml_context * ctx0, struct ggml_context * ctx0,
struct ggml_tensor * x[], struct ggml_tensor * x[],
@ -310,7 +310,7 @@ bool check_gradient(
} }
// TODO: clean-up this .. // TODO: clean-up this ..
bool check_mat_mul( static bool check_mat_mul(
const struct ggml_tensor * y, const struct ggml_tensor * y,
const struct ggml_tensor * x0, const struct ggml_tensor * x0,
const struct ggml_tensor * x1) { const struct ggml_tensor * x1) {
@ -373,9 +373,9 @@ bool check_mat_mul(
int main(int argc, const char ** argv) { int main(int argc, const char ** argv) {
struct ggml_init_params params = { struct ggml_init_params params = {
.mem_size = 128*1024*1024, /* .mem_size = */ 128*1024*1024,
.mem_buffer = NULL, /* .mem_buffer = */ NULL,
.no_alloc = false, /* .no_alloc = */ false,
}; };
int64_t ne[4]; int64_t ne[4];

View file

@ -1,9 +1,9 @@
#include "ggml.h" #include "ggml.h"
#include <math.h> #include <cmath>
#include <stdio.h> #include <cstdio>
#include <stdlib.h> #include <cstdlib>
#include <assert.h> #include <cassert>
#define MAX_NARGS 2 #define MAX_NARGS 2
@ -119,10 +119,11 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
int main(void) { int main(void) {
struct ggml_init_params params = { struct ggml_init_params params = {
.mem_size = 1024*1024*1024, /* .mem_size = */ 1024*1024*1024,
.mem_buffer = NULL, /* .mem_buffer = */ NULL,
.no_alloc = false, /* .no_alloc = */ false,
}; };
struct ggml_context * ctx = ggml_init(params); struct ggml_context * ctx = ggml_init(params);
int64_t ne1[4] = {4, 128, 1, 1}; int64_t ne1[4] = {4, 128, 1, 1};