Merge remote-tracking branch 'origin/master' into grammar-fast
This commit is contained in:
commit
9f13623149
52 changed files with 3422 additions and 8398 deletions
2
.github/workflows/bench.yml
vendored
2
.github/workflows/bench.yml
vendored
|
@ -32,7 +32,7 @@ on:
|
||||||
- cron: '04 2 * * *'
|
- cron: '04 2 * * *'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}-${{ github.event.inputs.sha }}
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
|
12
.github/workflows/server.yml
vendored
12
.github/workflows/server.yml
vendored
|
@ -23,7 +23,7 @@ on:
|
||||||
- cron: '2 4 * * *'
|
- cron: '2 4 * * *'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
@ -58,6 +58,7 @@ jobs:
|
||||||
git \
|
git \
|
||||||
cmake \
|
cmake \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
|
python3-venv \
|
||||||
curl \
|
curl \
|
||||||
wget \
|
wget \
|
||||||
language-pack-en \
|
language-pack-en \
|
||||||
|
@ -100,10 +101,13 @@ jobs:
|
||||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
||||||
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
|
cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
|
||||||
|
|
||||||
- name: Tests dependencies
|
- name: Setup python env
|
||||||
id: test_dependencies
|
id: pipenv
|
||||||
run: |
|
run: |
|
||||||
pip install -r examples/server/tests/requirements.txt
|
cd examples/server/tests
|
||||||
|
python3 -m venv venv
|
||||||
|
. venv/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
- name: Tests
|
- name: Tests
|
||||||
id: server_integration_tests
|
id: server_integration_tests
|
||||||
|
|
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -34,6 +34,7 @@ lcov-report/
|
||||||
gcovr-report/
|
gcovr-report/
|
||||||
|
|
||||||
build*
|
build*
|
||||||
|
!build.zig
|
||||||
cmake-build-*
|
cmake-build-*
|
||||||
out/
|
out/
|
||||||
tmp/
|
tmp/
|
||||||
|
@ -100,6 +101,9 @@ qnt-*.txt
|
||||||
perf-*.txt
|
perf-*.txt
|
||||||
|
|
||||||
examples/jeopardy/results.txt
|
examples/jeopardy/results.txt
|
||||||
|
examples/server/*.html.hpp
|
||||||
|
examples/server/*.js.hpp
|
||||||
|
examples/server/*.mjs.hpp
|
||||||
|
|
||||||
poetry.lock
|
poetry.lock
|
||||||
poetry.toml
|
poetry.toml
|
||||||
|
|
|
@ -43,17 +43,7 @@ else()
|
||||||
set(LLAMA_METAL_DEFAULT OFF)
|
set(LLAMA_METAL_DEFAULT OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# TODO: fix this for Android CI
|
set(LLAMA_LLAMAFILE_DEFAULT ON)
|
||||||
# https://github.com/ggerganov/llama.cpp/pull/6716#issuecomment-2061509191
|
|
||||||
#if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
|
|
||||||
# set(LLAMA_LLAMAFILE_DEFAULT OFF)
|
|
||||||
#else()
|
|
||||||
# set(LLAMA_LLAMAFILE_DEFAULT ON)
|
|
||||||
#endif()
|
|
||||||
|
|
||||||
# TODO: temporary disable until MoE is fixed
|
|
||||||
# https://github.com/ggerganov/llama.cpp/pull/6716
|
|
||||||
set(LLAMA_LLAMAFILE_DEFAULT OFF)
|
|
||||||
|
|
||||||
# general
|
# general
|
||||||
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
|
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
|
||||||
|
|
17
Makefile
17
Makefile
|
@ -384,10 +384,6 @@ ifdef LLAMA_OPENBLAS
|
||||||
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
MK_LDFLAGS += $(shell pkg-config --libs openblas)
|
||||||
endif # LLAMA_OPENBLAS
|
endif # LLAMA_OPENBLAS
|
||||||
|
|
||||||
# TODO: temporary disable until MoE is fixed
|
|
||||||
# https://github.com/ggerganov/llama.cpp/pull/6716
|
|
||||||
LLAMA_NO_LLAMAFILE := 1
|
|
||||||
|
|
||||||
ifndef LLAMA_NO_LLAMAFILE
|
ifndef LLAMA_NO_LLAMAFILE
|
||||||
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
|
||||||
OBJS += sgemm.o
|
OBJS += sgemm.o
|
||||||
|
@ -772,7 +768,7 @@ batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
quantize: examples/quantize/quantize.cpp build-info.o ggml.o llama.o $(OBJS)
|
quantize: examples/quantize/quantize.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
@ -800,10 +796,19 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/server/json-schema-to-grammar.mjs.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
|
||||||
|
|
||||||
|
# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
|
||||||
|
examples/server/%.hpp: examples/server/public/% Makefile
|
||||||
|
@( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
|
||||||
|
echo "unsigned char $${NAME}[] = {" && \
|
||||||
|
cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
|
||||||
|
echo "};" && \
|
||||||
|
echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
|
||||||
|
) > $@
|
||||||
|
|
||||||
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
|
||||||
|
|
|
@ -229,12 +229,12 @@ source /opt/intel/oneapi/setvars.sh
|
||||||
# Build LLAMA with MKL BLAS acceleration for intel GPU
|
# Build LLAMA with MKL BLAS acceleration for intel GPU
|
||||||
mkdir -p build && cd build
|
mkdir -p build && cd build
|
||||||
|
|
||||||
# Option 1: Use FP16 for better performance in long-prompt inference
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
|
||||||
|
|
||||||
# Option 2: Use FP32 by default
|
|
||||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
# Option 2: Use FP16
|
||||||
|
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||||
|
|
||||||
#build all binary
|
#build all binary
|
||||||
cmake --build . --config Release -j -v
|
cmake --build . --config Release -j -v
|
||||||
```
|
```
|
||||||
|
@ -250,12 +250,12 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
|
||||||
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
||||||
mkdir -p build && cd build
|
mkdir -p build && cd build
|
||||||
|
|
||||||
# Option 1: Use FP16 for better performance in long-prompt inference
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
|
||||||
|
|
||||||
# Option 2: Use FP32 by default
|
|
||||||
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
# Option 2: Use FP16
|
||||||
|
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
||||||
|
|
||||||
#build all binary
|
#build all binary
|
||||||
cmake --build . --config Release -j -v
|
cmake --build . --config Release -j -v
|
||||||
|
|
||||||
|
@ -416,6 +416,10 @@ mkdir -p build
|
||||||
cd build
|
cd build
|
||||||
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
||||||
|
|
||||||
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
|
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
|
||||||
|
|
||||||
|
# Option 2: Or FP16
|
||||||
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
||||||
|
|
||||||
make -j
|
make -j
|
||||||
|
|
10
README.md
10
README.md
|
@ -93,6 +93,7 @@ Typically finetunes of the base models below are supported as well.
|
||||||
|
|
||||||
- [X] LLaMA 🦙
|
- [X] LLaMA 🦙
|
||||||
- [x] LLaMA 2 🦙🦙
|
- [x] LLaMA 2 🦙🦙
|
||||||
|
- [x] LLaMA 3 🦙🦙🦙
|
||||||
- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1)
|
- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1)
|
||||||
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
|
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
|
||||||
- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct)
|
- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct)
|
||||||
|
@ -119,8 +120,9 @@ Typically finetunes of the base models below are supported as well.
|
||||||
- [x] [CodeShell](https://github.com/WisdomShell/codeshell)
|
- [x] [CodeShell](https://github.com/WisdomShell/codeshell)
|
||||||
- [x] [Gemma](https://ai.google.dev/gemma)
|
- [x] [Gemma](https://ai.google.dev/gemma)
|
||||||
- [x] [Mamba](https://github.com/state-spaces/mamba)
|
- [x] [Mamba](https://github.com/state-spaces/mamba)
|
||||||
|
- [x] [Grok-1](https://huggingface.co/keyfan/grok-1-hf)
|
||||||
- [x] [Xverse](https://huggingface.co/models?search=xverse)
|
- [x] [Xverse](https://huggingface.co/models?search=xverse)
|
||||||
- [x] [Command-R](https://huggingface.co/CohereForAI/c4ai-command-r-v01)
|
- [x] [Command-R models](https://huggingface.co/models?search=CohereForAI/c4ai-command-r)
|
||||||
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
|
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
|
||||||
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
|
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
|
||||||
- [x] [OLMo](https://allenai.org/olmo)
|
- [x] [OLMo](https://allenai.org/olmo)
|
||||||
|
@ -135,6 +137,8 @@ Typically finetunes of the base models below are supported as well.
|
||||||
- [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V)
|
- [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V)
|
||||||
- [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM)
|
- [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM)
|
||||||
- [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL)
|
- [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL)
|
||||||
|
- [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM)
|
||||||
|
- [x] [Moondream](https://huggingface.co/vikhyatk/moondream2)
|
||||||
|
|
||||||
**HTTP server**
|
**HTTP server**
|
||||||
|
|
||||||
|
@ -1117,7 +1121,9 @@ docker run --gpus all -v /path/to/models:/models local/llama.cpp:server-cuda -m
|
||||||
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
|
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
|
||||||
- See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions
|
- See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions
|
||||||
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
|
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
|
||||||
- Matrix multiplication is unconventional: [`z = ggml_mul_mat(ctx, x, y)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means `zT = x @ yT`
|
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
### Docs
|
### Docs
|
||||||
|
|
||||||
|
|
29
build.zig
29
build.zig
|
@ -140,4 +140,33 @@ pub fn build(b: *std.build.Builder) !void {
|
||||||
if (server.target.isWindows()) {
|
if (server.target.isWindows()) {
|
||||||
server.linkSystemLibrary("ws2_32");
|
server.linkSystemLibrary("ws2_32");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const server_assets = [_][]const u8{ "index.html", "index.js", "completion.js", "json-schema-to-grammar.mjs" };
|
||||||
|
for (server_assets) |asset| {
|
||||||
|
const input_path = b.fmt("examples/server/public/{s}", .{asset});
|
||||||
|
const output_path = b.fmt("examples/server/{s}.hpp", .{asset});
|
||||||
|
|
||||||
|
// Portable equivalent of `b.addSystemCommand(&.{ "xxd", "-n", asset, "-i", input_path, output_path }) })`:
|
||||||
|
|
||||||
|
const input = try std.fs.cwd().readFileAlloc(b.allocator, input_path, std.math.maxInt(usize));
|
||||||
|
defer b.allocator.free(input);
|
||||||
|
|
||||||
|
var buf = std.ArrayList(u8).init(b.allocator);
|
||||||
|
defer buf.deinit();
|
||||||
|
|
||||||
|
for (input) |byte| {
|
||||||
|
try std.fmt.format(buf.writer(), "0x{X:0>2}, ", .{byte});
|
||||||
|
}
|
||||||
|
|
||||||
|
var name = try std.mem.replaceOwned(u8, b.allocator, asset, "-", "_");
|
||||||
|
defer b.allocator.free(name);
|
||||||
|
std.mem.replaceScalar(u8, name, '.', '_');
|
||||||
|
|
||||||
|
try std.fs.cwd().writeFile(output_path, b.fmt(
|
||||||
|
"unsigned char {s}[] = {{{s}}};\nunsigned int {s}_len = {d};\n",
|
||||||
|
.{ name, buf.items, name, input.len },
|
||||||
|
));
|
||||||
|
|
||||||
|
std.debug.print("Dumped hex of \"{s}\" ({s}) to {s}\n", .{ input_path, name, output_path });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -160,7 +160,9 @@ function gg_run_test_scripts_debug {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
# TODO: too slow, run on dedicated node
|
||||||
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||||
|
#(cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
}
|
}
|
||||||
|
@ -184,6 +186,7 @@ function gg_run_test_scripts_release {
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||||
|
(cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
}
|
}
|
||||||
|
|
|
@ -234,15 +234,63 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
|
||||||
|
const char * sep = strchr(data, '=');
|
||||||
|
if (sep == nullptr || sep - data >= 128) {
|
||||||
|
fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
llama_model_kv_override kvo;
|
||||||
|
std::strncpy(kvo.key, data, sep - data);
|
||||||
|
kvo.key[sep - data] = 0;
|
||||||
|
sep++;
|
||||||
|
if (strncmp(sep, "int:", 4) == 0) {
|
||||||
|
sep += 4;
|
||||||
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
||||||
|
kvo.val_i64 = std::atol(sep);
|
||||||
|
} else if (strncmp(sep, "float:", 6) == 0) {
|
||||||
|
sep += 6;
|
||||||
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
|
||||||
|
kvo.val_f64 = std::atof(sep);
|
||||||
|
} else if (strncmp(sep, "bool:", 5) == 0) {
|
||||||
|
sep += 5;
|
||||||
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
|
||||||
|
if (std::strcmp(sep, "true") == 0) {
|
||||||
|
kvo.val_bool = true;
|
||||||
|
} else if (std::strcmp(sep, "false") == 0) {
|
||||||
|
kvo.val_bool = false;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (strncmp(sep, "str:", 4) == 0) {
|
||||||
|
sep += 4;
|
||||||
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
|
||||||
|
if (strlen(sep) > 127) {
|
||||||
|
fprintf(stderr, "%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
strncpy(kvo.val_str, sep, 127);
|
||||||
|
kvo.val_str[127] = '\0';
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
overrides.emplace_back(std::move(kvo));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param) {
|
bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param) {
|
||||||
llama_sampling_params& sparams = params.sparams;
|
llama_sampling_params & sparams = params.sparams;
|
||||||
|
|
||||||
if (arg == "-s" || arg == "--seed") {
|
if (arg == "-s" || arg == "--seed") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// This is temporary, in the future the samplign state will be moved fully to llama_sampling_context.
|
||||||
params.seed = std::stoul(argv[i]);
|
params.seed = std::stoul(argv[i]);
|
||||||
|
sparams.seed = std::stoul(argv[i]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (arg == "-t" || arg == "--threads") {
|
if (arg == "-t" || arg == "--threads") {
|
||||||
|
@ -1087,6 +1135,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
params.n_print = std::stoi(argv[i]);
|
params.n_print = std::stoi(argv[i]);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (arg == "--check-tensors") {
|
||||||
|
params.check_tensors = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (arg == "--ppl-output-type") {
|
if (arg == "--ppl-output-type") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
|
@ -1238,47 +1290,11 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
char* sep = strchr(argv[i], '=');
|
if (!parse_kv_override(argv[i], params.kv_overrides)) {
|
||||||
if (sep == nullptr || sep - argv[i] >= 128) {
|
|
||||||
fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]);
|
|
||||||
invalid_param = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
struct llama_model_kv_override kvo;
|
|
||||||
std::strncpy(kvo.key, argv[i], sep - argv[i]);
|
|
||||||
kvo.key[sep - argv[i]] = 0;
|
|
||||||
sep++;
|
|
||||||
if (strncmp(sep, "int:", 4) == 0) {
|
|
||||||
sep += 4;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
|
||||||
kvo.int_value = std::atol(sep);
|
|
||||||
}
|
|
||||||
else if (strncmp(sep, "float:", 6) == 0) {
|
|
||||||
sep += 6;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
|
|
||||||
kvo.float_value = std::atof(sep);
|
|
||||||
}
|
|
||||||
else if (strncmp(sep, "bool:", 5) == 0) {
|
|
||||||
sep += 5;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
|
|
||||||
if (std::strcmp(sep, "true") == 0) {
|
|
||||||
kvo.bool_value = true;
|
|
||||||
}
|
|
||||||
else if (std::strcmp(sep, "false") == 0) {
|
|
||||||
kvo.bool_value = false;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
|
|
||||||
invalid_param = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
|
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
params.kv_overrides.push_back(kvo);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
#ifndef LOG_DISABLE_LOGS
|
#ifndef LOG_DISABLE_LOGS
|
||||||
|
@ -1549,9 +1565,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||||
printf(" path to dynamic lookup cache to use for lookup decoding (updated by generation)\n");
|
printf(" path to dynamic lookup cache to use for lookup decoding (updated by generation)\n");
|
||||||
printf(" --override-kv KEY=TYPE:VALUE\n");
|
printf(" --override-kv KEY=TYPE:VALUE\n");
|
||||||
printf(" advanced option to override model metadata by key. may be specified multiple times.\n");
|
printf(" advanced option to override model metadata by key. may be specified multiple times.\n");
|
||||||
printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
|
printf(" types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
|
||||||
printf(" -ptc N, --print-token-count N\n");
|
printf(" -ptc N, --print-token-count N\n");
|
||||||
printf(" print token count every N tokens (default: %d)\n", params.n_print);
|
printf(" print token count every N tokens (default: %d)\n", params.n_print);
|
||||||
|
printf(" --check-tensors check model tensor data for invalid values\n");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
#ifndef LOG_DISABLE_LOGS
|
#ifndef LOG_DISABLE_LOGS
|
||||||
log_print_usage();
|
log_print_usage();
|
||||||
|
@ -1772,6 +1789,7 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
|
||||||
mparams.tensor_split = params.tensor_split;
|
mparams.tensor_split = params.tensor_split;
|
||||||
mparams.use_mmap = params.use_mmap;
|
mparams.use_mmap = params.use_mmap;
|
||||||
mparams.use_mlock = params.use_mlock;
|
mparams.use_mlock = params.use_mlock;
|
||||||
|
mparams.check_tensors = params.check_tensors;
|
||||||
if (params.kv_overrides.empty()) {
|
if (params.kv_overrides.empty()) {
|
||||||
mparams.kv_overrides = NULL;
|
mparams.kv_overrides = NULL;
|
||||||
} else {
|
} else {
|
||||||
|
@ -2326,12 +2344,12 @@ std::vector<llama_token> llama_tokenize(
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token) {
|
std::string llama_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
|
||||||
std::vector<char> result(8, 0);
|
std::vector<char> result(8, 0);
|
||||||
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
|
const int n_tokens = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
|
||||||
if (n_tokens < 0) {
|
if (n_tokens < 0) {
|
||||||
result.resize(-n_tokens);
|
result.resize(-n_tokens);
|
||||||
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), true);
|
int check = llama_token_to_piece(llama_get_model(ctx), token, result.data(), result.size(), special);
|
||||||
GGML_ASSERT(check == -n_tokens);
|
GGML_ASSERT(check == -n_tokens);
|
||||||
} else {
|
} else {
|
||||||
result.resize(n_tokens);
|
result.resize(n_tokens);
|
||||||
|
|
|
@ -86,8 +86,8 @@ struct gpt_params {
|
||||||
|
|
||||||
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
||||||
|
|
||||||
llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||||
llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
||||||
|
|
||||||
// // sampling parameters
|
// // sampling parameters
|
||||||
struct llama_sampling_params sparams;
|
struct llama_sampling_params sparams;
|
||||||
|
@ -161,6 +161,7 @@ struct gpt_params {
|
||||||
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
||||||
bool no_kv_offload = false; // disable KV offloading
|
bool no_kv_offload = false; // disable KV offloading
|
||||||
bool warmup = true; // warmup run
|
bool warmup = true; // warmup run
|
||||||
|
bool check_tensors = false; // validate tensor data
|
||||||
|
|
||||||
std::string cache_type_k = "f16"; // KV cache data type for the K
|
std::string cache_type_k = "f16"; // KV cache data type for the K
|
||||||
std::string cache_type_v = "f16"; // KV cache data type for the V
|
std::string cache_type_v = "f16"; // KV cache data type for the V
|
||||||
|
@ -170,6 +171,8 @@ struct gpt_params {
|
||||||
std::string image = ""; // path to an image file
|
std::string image = ""; // path to an image file
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
||||||
|
|
||||||
bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params);
|
bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params);
|
||||||
|
|
||||||
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
||||||
|
@ -237,11 +240,12 @@ std::vector<llama_token> llama_tokenize(
|
||||||
bool add_special,
|
bool add_special,
|
||||||
bool parse_special = false);
|
bool parse_special = false);
|
||||||
|
|
||||||
// tokenizes a token into a piece
|
// tokenizes a token into a piece, optionally renders special/control tokens
|
||||||
// should work similar to Python's `tokenizer.id_to_piece`
|
// should work similar to Python's `tokenizer.id_to_piece`
|
||||||
std::string llama_token_to_piece(
|
std::string llama_token_to_piece(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
llama_token token);
|
llama_token token,
|
||||||
|
bool special = true);
|
||||||
|
|
||||||
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
|
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
|
||||||
// that takes into account the tokenizer type and decides how to handle the leading space
|
// that takes into account the tokenizer type and decides how to handle the leading space
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
|
#define LLAMA_API_INTERNAL
|
||||||
#include "sampling.h"
|
#include "sampling.h"
|
||||||
|
#include <random>
|
||||||
|
|
||||||
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
|
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
|
||||||
struct llama_sampling_context * result = new llama_sampling_context();
|
struct llama_sampling_context * result = new llama_sampling_context();
|
||||||
|
@ -33,6 +35,8 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_
|
||||||
|
|
||||||
result->prev.resize(params.n_prev);
|
result->prev.resize(params.n_prev);
|
||||||
|
|
||||||
|
llama_sampling_set_rng_seed(result, params.seed);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,6 +66,13 @@ void llama_sampling_reset(llama_sampling_context * ctx) {
|
||||||
ctx->cur.clear();
|
ctx->cur.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed) {
|
||||||
|
if (seed == LLAMA_DEFAULT_SEED) {
|
||||||
|
seed = time(NULL);
|
||||||
|
}
|
||||||
|
ctx->rng.seed(seed);
|
||||||
|
}
|
||||||
|
|
||||||
void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst) {
|
void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst) {
|
||||||
if (dst->grammar) {
|
if (dst->grammar) {
|
||||||
llama_grammar_free(dst->grammar);
|
llama_grammar_free(dst->grammar);
|
||||||
|
@ -203,7 +214,7 @@ static llama_token llama_sampling_sample_impl(
|
||||||
|
|
||||||
sampler_queue(ctx_main, params, cur_p, min_keep);
|
sampler_queue(ctx_main, params, cur_p, min_keep);
|
||||||
|
|
||||||
id = llama_sample_token(ctx_main, &cur_p);
|
id = llama_sample_token_with_rng(ctx_main, &cur_p, ctx_sampling->rng);
|
||||||
|
|
||||||
//{
|
//{
|
||||||
// const int n_top = 10;
|
// const int n_top = 10;
|
||||||
|
|
|
@ -4,9 +4,10 @@
|
||||||
|
|
||||||
#include "grammar-parser.h"
|
#include "grammar-parser.h"
|
||||||
|
|
||||||
|
#include <random>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
// sampler types
|
// sampler types
|
||||||
enum class llama_sampler_type : char {
|
enum class llama_sampler_type : char {
|
||||||
|
@ -20,25 +21,26 @@ enum class llama_sampler_type : char {
|
||||||
|
|
||||||
// sampling parameters
|
// sampling parameters
|
||||||
typedef struct llama_sampling_params {
|
typedef struct llama_sampling_params {
|
||||||
int32_t n_prev = 64; // number of previous tokens to remember
|
int32_t n_prev = 64; // number of previous tokens to remember
|
||||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||||
int32_t top_k = 40; // <= 0 to use vocab size
|
int32_t top_k = 40; // <= 0 to use vocab size
|
||||||
float top_p = 0.95f; // 1.0 = disabled
|
float top_p = 0.95f; // 1.0 = disabled
|
||||||
float min_p = 0.05f; // 0.0 = disabled
|
float min_p = 0.05f; // 0.0 = disabled
|
||||||
float tfs_z = 1.00f; // 1.0 = disabled
|
float tfs_z = 1.00f; // 1.0 = disabled
|
||||||
float typical_p = 1.00f; // 1.0 = disabled
|
float typical_p = 1.00f; // 1.0 = disabled
|
||||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||||
float penalty_present = 0.00f; // 0.0 = disabled
|
float penalty_present = 0.00f; // 0.0 = disabled
|
||||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||||
float mirostat_tau = 5.00f; // target entropy
|
float mirostat_tau = 5.00f; // target entropy
|
||||||
float mirostat_eta = 0.10f; // learning rate
|
float mirostat_eta = 0.10f; // learning rate
|
||||||
bool penalize_nl = false; // consider newlines as a repeatable token
|
bool penalize_nl = false; // consider newlines as a repeatable token
|
||||||
|
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
|
||||||
|
|
||||||
std::vector<llama_sampler_type> samplers_sequence = {
|
std::vector<llama_sampler_type> samplers_sequence = {
|
||||||
llama_sampler_type::TOP_K,
|
llama_sampler_type::TOP_K,
|
||||||
|
@ -79,6 +81,8 @@ struct llama_sampling_context {
|
||||||
// TODO: replace with ring-buffer
|
// TODO: replace with ring-buffer
|
||||||
std::vector<llama_token> prev;
|
std::vector<llama_token> prev;
|
||||||
std::vector<llama_token_data> cur;
|
std::vector<llama_token_data> cur;
|
||||||
|
|
||||||
|
std::mt19937 rng;
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
@ -93,6 +97,9 @@ void llama_sampling_free(struct llama_sampling_context * ctx);
|
||||||
// - reset grammar
|
// - reset grammar
|
||||||
void llama_sampling_reset(llama_sampling_context * ctx);
|
void llama_sampling_reset(llama_sampling_context * ctx);
|
||||||
|
|
||||||
|
// Set the sampler seed
|
||||||
|
void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed);
|
||||||
|
|
||||||
// Copy the sampler context
|
// Copy the sampler context
|
||||||
void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst);
|
void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst);
|
||||||
|
|
||||||
|
|
|
@ -363,6 +363,16 @@ class Model(ABC):
|
||||||
scores.append(-1000.0)
|
scores.append(-1000.0)
|
||||||
toktypes.append(SentencePieceTokenTypes.USER_DEFINED)
|
toktypes.append(SentencePieceTokenTypes.USER_DEFINED)
|
||||||
|
|
||||||
|
if vocab_size > len(tokens):
|
||||||
|
pad_count = vocab_size - len(tokens)
|
||||||
|
print(
|
||||||
|
f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]"
|
||||||
|
)
|
||||||
|
for i in range(1, pad_count + 1):
|
||||||
|
tokens.append(f"[PAD{i}]")
|
||||||
|
scores.append(-1000.0)
|
||||||
|
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
||||||
|
|
||||||
assert len(tokens) == vocab_size
|
assert len(tokens) == vocab_size
|
||||||
|
|
||||||
self.gguf_writer.add_tokenizer_model("llama")
|
self.gguf_writer.add_tokenizer_model("llama")
|
||||||
|
@ -1789,6 +1799,12 @@ class QwenModel(Model):
|
||||||
class Qwen2Model(Model):
|
class Qwen2Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.QWEN2
|
model_arch = gguf.MODEL_ARCH.QWEN2
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
try:
|
||||||
|
self._set_vocab_sentencepiece()
|
||||||
|
except FileNotFoundError:
|
||||||
|
self._set_vocab_gpt2()
|
||||||
|
|
||||||
|
|
||||||
@Model.register("Qwen2MoeForCausalLM")
|
@Model.register("Qwen2MoeForCausalLM")
|
||||||
class Qwen2MoeModel(Model):
|
class Qwen2MoeModel(Model):
|
||||||
|
@ -1979,6 +1995,91 @@ class Phi2Model(Model):
|
||||||
self.gguf_writer.add_add_bos_token(False)
|
self.gguf_writer.add_add_bos_token(False)
|
||||||
|
|
||||||
|
|
||||||
|
@Model.register("Phi3ForCausalLM")
|
||||||
|
class Phi3MiniModel(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.PHI3
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
from sentencepiece import SentencePieceProcessor
|
||||||
|
|
||||||
|
tokenizer_path = self.dir_model / 'tokenizer.model'
|
||||||
|
|
||||||
|
if not tokenizer_path.is_file():
|
||||||
|
print(f'Error: Missing {tokenizer_path}', file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
tokenizer = SentencePieceProcessor(str(tokenizer_path))
|
||||||
|
|
||||||
|
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
|
||||||
|
|
||||||
|
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
|
||||||
|
scores: list[float] = [-10000.0] * vocab_size
|
||||||
|
toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
|
||||||
|
|
||||||
|
for token_id in range(tokenizer.vocab_size()):
|
||||||
|
|
||||||
|
piece = tokenizer.id_to_piece(token_id)
|
||||||
|
text = piece.encode("utf-8")
|
||||||
|
score = tokenizer.get_score(token_id)
|
||||||
|
|
||||||
|
toktype = SentencePieceTokenTypes.NORMAL
|
||||||
|
if tokenizer.is_unknown(token_id):
|
||||||
|
toktype = SentencePieceTokenTypes.UNKNOWN
|
||||||
|
elif tokenizer.is_control(token_id):
|
||||||
|
toktype = SentencePieceTokenTypes.CONTROL
|
||||||
|
elif tokenizer.is_unused(token_id):
|
||||||
|
toktype = SentencePieceTokenTypes.UNUSED
|
||||||
|
elif tokenizer.is_byte(token_id):
|
||||||
|
toktype = SentencePieceTokenTypes.BYTE
|
||||||
|
|
||||||
|
tokens[token_id] = text
|
||||||
|
scores[token_id] = score
|
||||||
|
toktypes[token_id] = toktype
|
||||||
|
|
||||||
|
added_tokens_file = self.dir_model / 'added_tokens.json'
|
||||||
|
if added_tokens_file.is_file():
|
||||||
|
with open(added_tokens_file, "r", encoding="utf-8") as f:
|
||||||
|
added_tokens_json = json.load(f)
|
||||||
|
|
||||||
|
for key in added_tokens_json:
|
||||||
|
token_id = added_tokens_json[key]
|
||||||
|
if (token_id >= vocab_size):
|
||||||
|
print(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
|
||||||
|
continue
|
||||||
|
|
||||||
|
tokens[token_id] = key.encode("utf-8")
|
||||||
|
scores[token_id] = -1000.0
|
||||||
|
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
|
||||||
|
|
||||||
|
self.gguf_writer.add_tokenizer_model("llama")
|
||||||
|
self.gguf_writer.add_token_list(tokens)
|
||||||
|
self.gguf_writer.add_token_scores(scores)
|
||||||
|
self.gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
|
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
||||||
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
|
||||||
|
|
||||||
|
rot_pct = 1.0
|
||||||
|
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
||||||
|
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
||||||
|
rms_eps = self.find_hparam(["rms_norm_eps"])
|
||||||
|
|
||||||
|
self.gguf_writer.add_name("Phi3")
|
||||||
|
self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
|
||||||
|
|
||||||
|
self.gguf_writer.add_embedding_length(n_embd)
|
||||||
|
self.gguf_writer.add_feed_forward_length(8192)
|
||||||
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
self.gguf_writer.add_head_count(n_head)
|
||||||
|
self.gguf_writer.add_head_count_kv(n_head)
|
||||||
|
self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
|
||||||
|
self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
|
||||||
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
|
|
||||||
@Model.register("PlamoForCausalLM")
|
@Model.register("PlamoForCausalLM")
|
||||||
class PlamoModel(Model):
|
class PlamoModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.PLAMO
|
model_arch = gguf.MODEL_ARCH.PLAMO
|
||||||
|
|
16
examples/gguf-split/tests.sh
Normal file → Executable file
16
examples/gguf-split/tests.sh
Normal file → Executable file
|
@ -4,16 +4,16 @@ set -eu
|
||||||
|
|
||||||
if [ $# -lt 1 ]
|
if [ $# -lt 1 ]
|
||||||
then
|
then
|
||||||
echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
|
echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
|
||||||
echo "example: $0 ../../build/bin ../../tmp"
|
echo "example: $0 ../../build/bin ../../tmp"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $# -gt 1 ]
|
if [ $# -gt 1 ]
|
||||||
then
|
then
|
||||||
TMP_DIR=$2
|
TMP_DIR=$2
|
||||||
else
|
else
|
||||||
TMP_DIR=/tmp
|
TMP_DIR=/tmp
|
||||||
fi
|
fi
|
||||||
|
|
||||||
set -x
|
set -x
|
||||||
|
@ -21,7 +21,7 @@ set -x
|
||||||
SPLIT=$1/gguf-split
|
SPLIT=$1/gguf-split
|
||||||
MAIN=$1/main
|
MAIN=$1/main
|
||||||
WORK_PATH=$TMP_DIR/gguf-split
|
WORK_PATH=$TMP_DIR/gguf-split
|
||||||
CUR_DIR=$(pwd)
|
ROOT_DIR=$(realpath $(dirname $0)/../../)
|
||||||
|
|
||||||
mkdir -p "$WORK_PATH"
|
mkdir -p "$WORK_PATH"
|
||||||
|
|
||||||
|
@ -30,8 +30,8 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
|
||||||
|
|
||||||
# 1. Get a model
|
# 1. Get a model
|
||||||
(
|
(
|
||||||
cd $WORK_PATH
|
cd $WORK_PATH
|
||||||
"$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
|
"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
|
||||||
)
|
)
|
||||||
echo PASS
|
echo PASS
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@ struct Stats {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct StatParams {
|
struct StatParams {
|
||||||
|
std::string dataset;
|
||||||
std::string ofile = "imatrix.dat";
|
std::string ofile = "imatrix.dat";
|
||||||
int n_output_frequency = 10;
|
int n_output_frequency = 10;
|
||||||
int verbosity = 1;
|
int verbosity = 1;
|
||||||
|
@ -46,7 +47,7 @@ private:
|
||||||
std::vector<float> m_src1_data;
|
std::vector<float> m_src1_data;
|
||||||
std::vector<char> m_ids; // the expert ids from ggml_mul_mat_id
|
std::vector<char> m_ids; // the expert ids from ggml_mul_mat_id
|
||||||
//
|
//
|
||||||
void save_imatrix(const char * file_name) const;
|
void save_imatrix(const char * file_name, const char * dataset) const;
|
||||||
void keep_imatrix(int ncall) const;
|
void keep_imatrix(int ncall) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -199,7 +200,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
|
||||||
}
|
}
|
||||||
|
|
||||||
void IMatrixCollector::save_imatrix() const {
|
void IMatrixCollector::save_imatrix() const {
|
||||||
save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str());
|
save_imatrix(m_params.ofile.empty() ? "imatrix.dat" : m_params.ofile.c_str(), m_params.dataset.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void IMatrixCollector::keep_imatrix(int ncall) const {
|
void IMatrixCollector::keep_imatrix(int ncall) const {
|
||||||
|
@ -207,24 +208,33 @@ void IMatrixCollector::keep_imatrix(int ncall) const {
|
||||||
if (file_name.empty()) file_name = "imatrix.dat";
|
if (file_name.empty()) file_name = "imatrix.dat";
|
||||||
file_name += ".at_";
|
file_name += ".at_";
|
||||||
file_name += std::to_string(ncall);
|
file_name += std::to_string(ncall);
|
||||||
save_imatrix(file_name.c_str());
|
save_imatrix(file_name.c_str(), m_params.dataset.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void IMatrixCollector::save_imatrix(const char * fname) const {
|
void IMatrixCollector::save_imatrix(const char * fname, const char * dataset) const {
|
||||||
std::ofstream out(fname, std::ios::binary);
|
std::ofstream out(fname, std::ios::binary);
|
||||||
int n_entries = m_stats.size();
|
int n_entries = m_stats.size();
|
||||||
out.write((const char*)&n_entries, sizeof(n_entries));
|
out.write((const char *) &n_entries, sizeof(n_entries));
|
||||||
for (auto& p : m_stats) {
|
for (const auto & p : m_stats) {
|
||||||
int len = p.first.size();
|
int len = p.first.size();
|
||||||
out.write((const char*)&len, sizeof(len));
|
out.write((const char *) &len, sizeof(len));
|
||||||
out.write(p.first.c_str(), len);
|
out.write(p.first.c_str(), len);
|
||||||
out.write((const char*)&p.second.ncall, sizeof(p.second.ncall));
|
out.write((const char *) &p.second.ncall, sizeof(p.second.ncall));
|
||||||
int nval = p.second.values.size();
|
int nval = p.second.values.size();
|
||||||
out.write((const char*)&nval, sizeof(nval));
|
out.write((const char *) &nval, sizeof(nval));
|
||||||
if (nval > 0) out.write((const char*)p.second.values.data(), nval*sizeof(float));
|
if (nval > 0) out.write((const char *) p.second.values.data(), nval * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Write the number of call the matrix was computed with
|
||||||
|
out.write((const char *) &m_last_call, sizeof(m_last_call));
|
||||||
|
|
||||||
|
// Write the dataset name at the end of the file to later on specify it in quantize
|
||||||
|
int n_dataset = strlen(dataset);
|
||||||
|
out.write((const char *) &n_dataset, sizeof(n_dataset));
|
||||||
|
out.write(dataset, n_dataset);
|
||||||
|
|
||||||
if (m_params.verbosity > 0) {
|
if (m_params.verbosity > 0) {
|
||||||
fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n",__func__,m_last_call,fname);
|
fprintf(stderr, "\n%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -547,6 +557,29 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gpt_params params;
|
||||||
|
params.n_batch = 512;
|
||||||
|
if (!gpt_params_parse(args.size(), args.data(), params)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
params.logits_all = true;
|
||||||
|
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
||||||
|
|
||||||
|
print_build_info();
|
||||||
|
|
||||||
|
if (params.seed == LLAMA_DEFAULT_SEED) {
|
||||||
|
params.seed = time(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
||||||
|
|
||||||
|
std::mt19937 rng(params.seed);
|
||||||
|
if (params.random_prompt) {
|
||||||
|
params.prompt = gpt_random_prompt(rng);
|
||||||
|
}
|
||||||
|
|
||||||
|
sparams.dataset = params.prompt_file;
|
||||||
g_collector.set_parameters(std::move(sparams));
|
g_collector.set_parameters(std::move(sparams));
|
||||||
|
|
||||||
if (!combine_files.empty()) {
|
if (!combine_files.empty()) {
|
||||||
|
@ -585,28 +618,6 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gpt_params params;
|
|
||||||
params.n_batch = 512;
|
|
||||||
if (!gpt_params_parse(args.size(), args.data(), params)) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
params.logits_all = true;
|
|
||||||
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
|
||||||
|
|
||||||
print_build_info();
|
|
||||||
|
|
||||||
if (params.seed == LLAMA_DEFAULT_SEED) {
|
|
||||||
params.seed = time(NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
|
|
||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
|
||||||
if (params.random_prompt) {
|
|
||||||
params.prompt = gpt_random_prompt(rng);
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
llama_numa_init(params.numa);
|
llama_numa_init(params.numa);
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,7 @@ static std::string format(const char * fmt, ...) {
|
||||||
#define TN_POS_EMBD "%s.position_embd.weight"
|
#define TN_POS_EMBD "%s.position_embd.weight"
|
||||||
#define TN_CLASS_EMBD "v.class_embd"
|
#define TN_CLASS_EMBD "v.class_embd"
|
||||||
#define TN_PATCH_EMBD "v.patch_embd.weight"
|
#define TN_PATCH_EMBD "v.patch_embd.weight"
|
||||||
|
#define TN_PATCH_BIAS "v.patch_embd.bias"
|
||||||
#define TN_ATTN_K "%s.blk.%d.attn_k.%s"
|
#define TN_ATTN_K "%s.blk.%d.attn_k.%s"
|
||||||
#define TN_ATTN_Q "%s.blk.%d.attn_q.%s"
|
#define TN_ATTN_Q "%s.blk.%d.attn_q.%s"
|
||||||
#define TN_ATTN_V "%s.blk.%d.attn_v.%s"
|
#define TN_ATTN_V "%s.blk.%d.attn_v.%s"
|
||||||
|
@ -425,6 +426,7 @@ struct clip_vision_model {
|
||||||
// embeddings
|
// embeddings
|
||||||
struct ggml_tensor * class_embedding;
|
struct ggml_tensor * class_embedding;
|
||||||
struct ggml_tensor * patch_embeddings;
|
struct ggml_tensor * patch_embeddings;
|
||||||
|
struct ggml_tensor * patch_bias;
|
||||||
struct ggml_tensor * position_embeddings;
|
struct ggml_tensor * position_embeddings;
|
||||||
|
|
||||||
struct ggml_tensor * pre_ln_w;
|
struct ggml_tensor * pre_ln_w;
|
||||||
|
@ -501,6 +503,11 @@ struct clip_ctx {
|
||||||
bool use_gelu = false;
|
bool use_gelu = false;
|
||||||
int32_t ftype = 1;
|
int32_t ftype = 1;
|
||||||
|
|
||||||
|
bool has_class_embedding = true;
|
||||||
|
bool has_pre_norm = true;
|
||||||
|
bool has_post_norm = false;
|
||||||
|
bool has_patch_bias = false;
|
||||||
|
|
||||||
struct gguf_context * ctx_gguf;
|
struct gguf_context * ctx_gguf;
|
||||||
struct ggml_context * ctx_data;
|
struct ggml_context * ctx_data;
|
||||||
|
|
||||||
|
@ -526,7 +533,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
const int patch_size = hparams.patch_size;
|
const int patch_size = hparams.patch_size;
|
||||||
const int num_patches = ((image_size / patch_size) * (image_size / patch_size));
|
const int num_patches = ((image_size / patch_size) * (image_size / patch_size));
|
||||||
const int num_patches_per_side = image_size / patch_size; GGML_UNUSED(num_patches_per_side);
|
const int num_patches_per_side = image_size / patch_size; GGML_UNUSED(num_patches_per_side);
|
||||||
const int num_positions = num_patches + 1;
|
const int num_positions = num_patches + (ctx->has_class_embedding ? 1 : 0);
|
||||||
const int hidden_size = hparams.hidden_size;
|
const int hidden_size = hparams.hidden_size;
|
||||||
const int n_head = hparams.n_head;
|
const int n_head = hparams.n_head;
|
||||||
const int d_head = hidden_size / n_head;
|
const int d_head = hidden_size / n_head;
|
||||||
|
@ -557,16 +564,23 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
inp = ggml_reshape_3d(ctx0, inp, num_patches, hidden_size, batch_size);
|
inp = ggml_reshape_3d(ctx0, inp, num_patches, hidden_size, batch_size);
|
||||||
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 0, 2, 3));
|
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 0, 2, 3));
|
||||||
|
|
||||||
|
if (ctx->has_patch_bias) {
|
||||||
|
// inp = ggml_add(ctx0, inp, ggml_repeat(ctx0, model.patch_bias, inp));
|
||||||
|
inp = ggml_add(ctx0, inp, model.patch_bias);
|
||||||
|
}
|
||||||
|
|
||||||
// concat class_embeddings and patch_embeddings
|
// concat class_embeddings and patch_embeddings
|
||||||
struct ggml_tensor * embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size);
|
struct ggml_tensor * embeddings = inp;
|
||||||
|
if (ctx->has_class_embedding) {
|
||||||
|
embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size);
|
||||||
|
embeddings = ggml_acc(ctx0, embeddings, model.class_embedding,
|
||||||
|
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0);
|
||||||
|
embeddings = ggml_acc(ctx0, embeddings, inp,
|
||||||
|
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]);
|
||||||
|
}
|
||||||
ggml_set_name(embeddings, "embeddings");
|
ggml_set_name(embeddings, "embeddings");
|
||||||
ggml_set_input(embeddings);
|
ggml_set_input(embeddings);
|
||||||
|
|
||||||
embeddings = ggml_acc(ctx0, embeddings, model.class_embedding,
|
|
||||||
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0);
|
|
||||||
|
|
||||||
embeddings = ggml_acc(ctx0, embeddings, inp,
|
|
||||||
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]);
|
|
||||||
|
|
||||||
struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions);
|
struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions);
|
||||||
ggml_set_name(positions, "positions");
|
ggml_set_name(positions, "positions");
|
||||||
|
@ -576,7 +590,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
ggml_add(ctx0, embeddings, ggml_get_rows(ctx0, model.position_embeddings, positions));
|
ggml_add(ctx0, embeddings, ggml_get_rows(ctx0, model.position_embeddings, positions));
|
||||||
|
|
||||||
// pre-layernorm
|
// pre-layernorm
|
||||||
{
|
if (ctx->has_pre_norm) {
|
||||||
embeddings = ggml_norm(ctx0, embeddings, eps);
|
embeddings = ggml_norm(ctx0, embeddings, eps);
|
||||||
ggml_set_name(embeddings, "pre_ln");
|
ggml_set_name(embeddings, "pre_ln");
|
||||||
|
|
||||||
|
@ -664,6 +678,14 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
|
||||||
embeddings = cur;
|
embeddings = cur;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// post-layernorm
|
||||||
|
if (ctx->has_post_norm) {
|
||||||
|
embeddings = ggml_norm(ctx0, embeddings, eps);
|
||||||
|
ggml_set_name(embeddings, "post_ln");
|
||||||
|
|
||||||
|
embeddings = ggml_add(ctx0, ggml_mul(ctx0, embeddings, model.post_ln_w), model.post_ln_b);
|
||||||
|
}
|
||||||
|
|
||||||
// llava projector
|
// llava projector
|
||||||
{
|
{
|
||||||
embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]);
|
embeddings = ggml_reshape_2d(ctx0, embeddings, embeddings->ne[0], embeddings->ne[1]);
|
||||||
|
@ -1148,12 +1170,39 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD);
|
||||||
|
new_clip->has_class_embedding = true;
|
||||||
|
} catch (const std::exception& e) {
|
||||||
|
new_clip->has_class_embedding = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
|
||||||
|
vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
|
||||||
|
new_clip->has_pre_norm = true;
|
||||||
|
} catch (std::exception & e) {
|
||||||
|
new_clip->has_pre_norm = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
vision_model.post_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight"));
|
||||||
|
vision_model.post_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias"));
|
||||||
|
new_clip->has_post_norm = true;
|
||||||
|
} catch (std::exception & e) {
|
||||||
|
new_clip->has_post_norm = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS);
|
||||||
|
new_clip->has_patch_bias = true;
|
||||||
|
} catch (std::exception & e) {
|
||||||
|
new_clip->has_patch_bias = false;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
vision_model.patch_embeddings = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD);
|
vision_model.patch_embeddings = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD);
|
||||||
vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD);
|
|
||||||
vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
|
vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
|
||||||
vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
|
|
||||||
vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
|
|
||||||
} catch(const std::exception& e) {
|
} catch(const std::exception& e) {
|
||||||
LOG_TEE("%s: failed to load vision model tensors\n", __func__);
|
LOG_TEE("%s: failed to load vision model tensors\n", __func__);
|
||||||
}
|
}
|
||||||
|
@ -1325,7 +1374,7 @@ bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length
|
||||||
}
|
}
|
||||||
|
|
||||||
// Linear interpolation between two points
|
// Linear interpolation between two points
|
||||||
inline float lerp(float s, float e, float t) {
|
inline float clip_lerp(float s, float e, float t) {
|
||||||
return s + (e - s) * t;
|
return s + (e - s) * t;
|
||||||
}
|
}
|
||||||
// Bilinear resize function
|
// Bilinear resize function
|
||||||
|
@ -1347,17 +1396,17 @@ static void bilinear_resize(const clip_image_u8& src, clip_image_u8& dst, int ta
|
||||||
float y_lerp = py - y_floor;
|
float y_lerp = py - y_floor;
|
||||||
|
|
||||||
for (int c = 0; c < 3; c++) {
|
for (int c = 0; c < 3; c++) {
|
||||||
float top = lerp(
|
float top = clip_lerp(
|
||||||
static_cast<float>(src.buf[3 * (y_floor * src.nx + x_floor) + c]),
|
static_cast<float>(src.buf[3 * (y_floor * src.nx + x_floor) + c]),
|
||||||
static_cast<float>(src.buf[3 * (y_floor * src.nx + (x_floor + 1)) + c]),
|
static_cast<float>(src.buf[3 * (y_floor * src.nx + (x_floor + 1)) + c]),
|
||||||
x_lerp
|
x_lerp
|
||||||
);
|
);
|
||||||
float bottom = lerp(
|
float bottom = clip_lerp(
|
||||||
static_cast<float>(src.buf[3 * ((y_floor + 1) * src.nx + x_floor) + c]),
|
static_cast<float>(src.buf[3 * ((y_floor + 1) * src.nx + x_floor) + c]),
|
||||||
static_cast<float>(src.buf[3 * ((y_floor + 1) * src.nx + (x_floor + 1)) + c]),
|
static_cast<float>(src.buf[3 * ((y_floor + 1) * src.nx + (x_floor + 1)) + c]),
|
||||||
x_lerp
|
x_lerp
|
||||||
);
|
);
|
||||||
dst.buf[3 * (y * target_width + x) + c] = static_cast<uint8_t>(lerp(top, bottom, y_lerp));
|
dst.buf[3 * (y * target_width + x) + c] = static_cast<uint8_t>(clip_lerp(top, bottom, y_lerp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,6 @@ int main(int argc, char ** argv){
|
||||||
|
|
||||||
// load the model
|
// load the model
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
llama_set_rng_seed(ctx, params.seed);
|
|
||||||
GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
|
GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
|
||||||
|
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
|
|
|
@ -38,7 +38,6 @@ int main(int argc, char ** argv){
|
||||||
|
|
||||||
// load the model
|
// load the model
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
llama_set_rng_seed(ctx, params.seed);
|
|
||||||
GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
|
GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
|
||||||
|
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
|
|
|
@ -240,7 +240,6 @@ int main(int argc, char ** argv) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
session_tokens.resize(n_token_count_out);
|
session_tokens.resize(n_token_count_out);
|
||||||
llama_set_rng_seed(ctx, params.seed);
|
|
||||||
LOG_TEE("%s: loaded a session with prompt size of %d tokens\n", __func__, (int)session_tokens.size());
|
LOG_TEE("%s: loaded a session with prompt size of %d tokens\n", __func__, (int)session_tokens.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
set(TARGET quantize)
|
set(TARGET quantize)
|
||||||
add_executable(${TARGET} quantize.cpp)
|
add_executable(${TARGET} quantize.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_include_directories(${TARGET} PRIVATE ../../common)
|
target_include_directories(${TARGET} PRIVATE ../../common)
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
|
|
@ -8,7 +8,6 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
struct quant_option {
|
struct quant_option {
|
||||||
std::string name;
|
std::string name;
|
||||||
|
@ -53,6 +52,10 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
|
||||||
{ "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", },
|
{ "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", },
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = "quantize.imatrix.file";
|
||||||
|
static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix.dataset";
|
||||||
|
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count";
|
||||||
|
static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count";
|
||||||
|
|
||||||
static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
|
static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
|
||||||
std::string ftype_str;
|
std::string ftype_str;
|
||||||
|
@ -97,6 +100,7 @@ static void usage(const char * executable) {
|
||||||
printf(" --exclude-weights tensor_name: use importance matrix for this/these tensor(s)\n");
|
printf(" --exclude-weights tensor_name: use importance matrix for this/these tensor(s)\n");
|
||||||
printf(" --output-tensor-type ggml_type: use this ggml_type for the output.weight tensor\n");
|
printf(" --output-tensor-type ggml_type: use this ggml_type for the output.weight tensor\n");
|
||||||
printf(" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n");
|
printf(" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n");
|
||||||
|
printf(" --keep-split: will generate quatized model in the same shards as input");
|
||||||
printf(" --override-kv KEY=TYPE:VALUE\n");
|
printf(" --override-kv KEY=TYPE:VALUE\n");
|
||||||
printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n");
|
printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n");
|
||||||
printf("Note: --include-weights and --exclude-weights cannot be used together\n");
|
printf("Note: --include-weights and --exclude-weights cannot be used together\n");
|
||||||
|
@ -112,7 +116,7 @@ static void usage(const char * executable) {
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void load_imatrix(const std::string & imatrix_file, std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
|
static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
|
||||||
std::ifstream in(imatrix_file.c_str(), std::ios::binary);
|
std::ifstream in(imatrix_file.c_str(), std::ios::binary);
|
||||||
if (!in) {
|
if (!in) {
|
||||||
printf("%s: failed to open %s\n",__func__, imatrix_file.c_str());
|
printf("%s: failed to open %s\n",__func__, imatrix_file.c_str());
|
||||||
|
@ -159,18 +163,33 @@ static void load_imatrix(const std::string & imatrix_file, std::unordered_map<st
|
||||||
printf("%s: loaded data (size = %6d, ncall = %6d) for '%s'\n", __func__, int(e.size()), ncall, name.c_str());
|
printf("%s: loaded data (size = %6d, ncall = %6d) for '%s'\n", __func__, int(e.size()), ncall, name.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str());
|
|
||||||
|
// latest imatrix version contains the dataset filename at the end of the file
|
||||||
|
int m_last_call = 0;
|
||||||
|
if (in.peek() != EOF) {
|
||||||
|
in.read((char *)&m_last_call, sizeof(m_last_call));
|
||||||
|
int dataset_len;
|
||||||
|
in.read((char *)&dataset_len, sizeof(dataset_len));
|
||||||
|
std::vector<char> dataset_as_vec(dataset_len);
|
||||||
|
in.read(dataset_as_vec.data(), dataset_len);
|
||||||
|
imatrix_dataset.assign(dataset_as_vec.begin(), dataset_as_vec.end());
|
||||||
|
printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str());
|
||||||
|
}
|
||||||
|
printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_call);
|
||||||
|
return m_last_call;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void prepare_imatrix(const std::string & imatrix_file,
|
static int prepare_imatrix(const std::string & imatrix_file,
|
||||||
|
std::string & imatrix_dataset,
|
||||||
const std::vector<std::string> & included_weights,
|
const std::vector<std::string> & included_weights,
|
||||||
const std::vector<std::string> & excluded_weights,
|
const std::vector<std::string> & excluded_weights,
|
||||||
std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
|
std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
|
||||||
|
int m_last_call = -1;
|
||||||
if (!imatrix_file.empty()) {
|
if (!imatrix_file.empty()) {
|
||||||
load_imatrix(imatrix_file, imatrix_data);
|
m_last_call = load_imatrix(imatrix_file, imatrix_dataset, imatrix_data);
|
||||||
}
|
}
|
||||||
if (imatrix_data.empty()) {
|
if (imatrix_data.empty()) {
|
||||||
return;
|
return m_last_call;
|
||||||
}
|
}
|
||||||
if (!excluded_weights.empty()) {
|
if (!excluded_weights.empty()) {
|
||||||
for (auto& name : excluded_weights) {
|
for (auto& name : excluded_weights) {
|
||||||
|
@ -196,6 +215,7 @@ static void prepare_imatrix(const std::string & imatrix_file,
|
||||||
if (!imatrix_data.empty()) {
|
if (!imatrix_data.empty()) {
|
||||||
printf("%s: have %d importance matrix entries\n", __func__, int(imatrix_data.size()));
|
printf("%s: have %d importance matrix entries\n", __func__, int(imatrix_data.size()));
|
||||||
}
|
}
|
||||||
|
return m_last_call;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_type parse_ggml_type(const char * arg) {
|
static ggml_type parse_ggml_type(const char * arg) {
|
||||||
|
@ -210,43 +230,6 @@ static ggml_type parse_ggml_type(const char * arg) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
|
|
||||||
const char* sep = strchr(data, '=');
|
|
||||||
if (sep == nullptr || sep - data >= 128) {
|
|
||||||
fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
llama_model_kv_override kvo;
|
|
||||||
std::strncpy(kvo.key, data, sep - data);
|
|
||||||
kvo.key[sep - data] = 0;
|
|
||||||
sep++;
|
|
||||||
if (strncmp(sep, "int:", 4) == 0) {
|
|
||||||
sep += 4;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
|
||||||
kvo.int_value = std::atol(sep);
|
|
||||||
} else if (strncmp(sep, "float:", 6) == 0) {
|
|
||||||
sep += 6;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
|
|
||||||
kvo.float_value = std::atof(sep);
|
|
||||||
} else if (strncmp(sep, "bool:", 5) == 0) {
|
|
||||||
sep += 5;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
|
|
||||||
if (std::strcmp(sep, "true") == 0) {
|
|
||||||
kvo.bool_value = true;
|
|
||||||
} else if (std::strcmp(sep, "false") == 0) {
|
|
||||||
kvo.bool_value = false;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
overrides.emplace_back(std::move(kvo));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
if (argc < 3) {
|
if (argc < 3) {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
|
@ -300,6 +283,8 @@ int main(int argc, char ** argv) {
|
||||||
} else {
|
} else {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
}
|
}
|
||||||
|
} else if (strcmp(argv[arg_idx], "--keep-split")) {
|
||||||
|
params.keep_split = true;
|
||||||
} else {
|
} else {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
}
|
}
|
||||||
|
@ -313,10 +298,43 @@ int main(int argc, char ** argv) {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string imatrix_dataset;
|
||||||
std::unordered_map<std::string, std::vector<float>> imatrix_data;
|
std::unordered_map<std::string, std::vector<float>> imatrix_data;
|
||||||
prepare_imatrix(imatrix_file, included_weights, excluded_weights, imatrix_data);
|
int m_last_call = prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data);
|
||||||
if (!imatrix_data.empty()) {
|
if (!imatrix_data.empty()) {
|
||||||
params.imatrix = &imatrix_data;
|
params.imatrix = &imatrix_data;
|
||||||
|
{
|
||||||
|
llama_model_kv_override kvo;
|
||||||
|
std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_FILE);
|
||||||
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
|
||||||
|
strncpy(kvo.val_str, imatrix_file.c_str(), 127);
|
||||||
|
kvo.val_str[127] = '\0';
|
||||||
|
kv_overrides.emplace_back(std::move(kvo));
|
||||||
|
}
|
||||||
|
if (!imatrix_dataset.empty()) {
|
||||||
|
llama_model_kv_override kvo;
|
||||||
|
std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET);
|
||||||
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
|
||||||
|
strncpy(kvo.val_str, imatrix_dataset.c_str(), 127);
|
||||||
|
kvo.val_str[127] = '\0';
|
||||||
|
kv_overrides.emplace_back(std::move(kvo));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
llama_model_kv_override kvo;
|
||||||
|
std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES);
|
||||||
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
||||||
|
kvo.val_i64 = imatrix_data.size();
|
||||||
|
kv_overrides.emplace_back(std::move(kvo));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_last_call > 0) {
|
||||||
|
llama_model_kv_override kvo;
|
||||||
|
std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS);
|
||||||
|
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
||||||
|
kvo.val_i64 = m_last_call;
|
||||||
|
kv_overrides.emplace_back(std::move(kvo));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!kv_overrides.empty()) {
|
if (!kv_overrides.empty()) {
|
||||||
kv_overrides.emplace_back();
|
kv_overrides.emplace_back();
|
||||||
|
@ -332,20 +350,28 @@ int main(int argc, char ** argv) {
|
||||||
std::string fname_out;
|
std::string fname_out;
|
||||||
|
|
||||||
std::string ftype_str;
|
std::string ftype_str;
|
||||||
|
std::string suffix = ".gguf";
|
||||||
if (try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) {
|
if (try_parse_ftype(argv[arg_idx], params.ftype, ftype_str)) {
|
||||||
std::string fpath;
|
std::string fpath;
|
||||||
const size_t pos = fname_inp.find_last_of("/\\");
|
const size_t pos = fname_inp.find_last_of("/\\");
|
||||||
if (pos != std::string::npos) {
|
if (pos != std::string::npos) {
|
||||||
fpath = fname_inp.substr(0, pos + 1);
|
fpath = fname_inp.substr(0, pos + 1);
|
||||||
}
|
}
|
||||||
// export as [inp path]/ggml-model-[ftype].gguf
|
|
||||||
fname_out = fpath + "ggml-model-" + ftype_str + ".gguf";
|
// export as [inp path]/ggml-model-[ftype]. Only add extension if there is no splitting
|
||||||
|
fname_out = fpath + "ggml-model-" + ftype_str;
|
||||||
|
if (!params.keep_split) {
|
||||||
|
fname_out += suffix;
|
||||||
|
}
|
||||||
arg_idx++;
|
arg_idx++;
|
||||||
if (ftype_str == "COPY") {
|
if (ftype_str == "COPY") {
|
||||||
params.only_copy = true;
|
params.only_copy = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fname_out = argv[arg_idx];
|
fname_out = argv[arg_idx];
|
||||||
|
if (params.keep_split && fname_out.find(suffix) != std::string::npos) {
|
||||||
|
fname_out = fname_out.substr(0, fname_out.length() - suffix.length());
|
||||||
|
}
|
||||||
arg_idx++;
|
arg_idx++;
|
||||||
|
|
||||||
if (argc <= arg_idx) {
|
if (argc <= arg_idx) {
|
||||||
|
|
65
examples/quantize/tests.sh
Normal file
65
examples/quantize/tests.sh
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -eu
|
||||||
|
|
||||||
|
if [ $# -lt 1 ]
|
||||||
|
then
|
||||||
|
echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
|
||||||
|
echo "example: $0 ../../build/bin ../../tmp"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $# -gt 1 ]
|
||||||
|
then
|
||||||
|
TMP_DIR=$2
|
||||||
|
else
|
||||||
|
TMP_DIR=/tmp
|
||||||
|
fi
|
||||||
|
|
||||||
|
set -x
|
||||||
|
|
||||||
|
SPLIT=$1/gguf-split
|
||||||
|
QUANTIZE=$1/quantize
|
||||||
|
MAIN=$1/main
|
||||||
|
WORK_PATH=$TMP_DIR/quantize
|
||||||
|
ROOT_DIR=$(realpath $(dirname $0)/../../)
|
||||||
|
|
||||||
|
mkdir -p "$WORK_PATH"
|
||||||
|
|
||||||
|
# Clean up in case of previously failed test
|
||||||
|
rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf
|
||||||
|
|
||||||
|
# 1. Get a model
|
||||||
|
(
|
||||||
|
cd $WORK_PATH
|
||||||
|
"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
|
||||||
|
)
|
||||||
|
echo PASS
|
||||||
|
|
||||||
|
# 2. Split model
|
||||||
|
$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
|
||||||
|
echo PASS
|
||||||
|
echo
|
||||||
|
|
||||||
|
# 3. Requant model with '--keep_split'
|
||||||
|
$QUANTIZE --allow-requantize --keep_split $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K
|
||||||
|
echo PASS
|
||||||
|
echo
|
||||||
|
|
||||||
|
# 3a. Test the requanted model is loading properly
|
||||||
|
$MAIN --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --random-prompt --n-predict 32
|
||||||
|
echo PASS
|
||||||
|
echo
|
||||||
|
|
||||||
|
# 4. Requant mode without '--keep_split'
|
||||||
|
$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K
|
||||||
|
echo PASS
|
||||||
|
echo
|
||||||
|
|
||||||
|
# 4b. Test the requanted model is loading properly
|
||||||
|
$MAIN --model $WORK_PATH/ggml-model-requant-merge.gguf --random-prompt --n-predict 32
|
||||||
|
echo PASS
|
||||||
|
echo
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf
|
|
@ -1,12 +1,29 @@
|
||||||
set(TARGET server)
|
set(TARGET server)
|
||||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||||
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
add_executable(${TARGET}
|
set(TARGET_SRCS
|
||||||
server.cpp
|
server.cpp
|
||||||
utils.hpp
|
utils.hpp
|
||||||
httplib.h
|
httplib.h
|
||||||
)
|
)
|
||||||
|
set(PUBLIC_ASSETS
|
||||||
|
index.html
|
||||||
|
index.js
|
||||||
|
completion.js
|
||||||
|
json-schema-to-grammar.mjs
|
||||||
|
)
|
||||||
|
foreach(asset ${PUBLIC_ASSETS})
|
||||||
|
set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
|
||||||
|
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
|
||||||
|
list(APPEND TARGET_SRCS ${output})
|
||||||
|
add_custom_command(
|
||||||
|
DEPENDS "${input}"
|
||||||
|
OUTPUT "${output}"
|
||||||
|
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
|
||||||
|
)
|
||||||
|
endforeach()
|
||||||
|
add_executable(${TARGET} ${TARGET_SRCS})
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_compile_definitions(${TARGET} PRIVATE
|
target_compile_definitions(${TARGET} PRIVATE
|
||||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||||
|
|
|
@ -90,7 +90,8 @@ export default function () {
|
||||||
"model": model,
|
"model": model,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
"max_tokens": max_tokens
|
"max_tokens": max_tokens,
|
||||||
|
"stop": ["<|im_end|>"] // This is temporary for phi-2 base (i.e. not instructed) since the server expects that the model always to emit BOS
|
||||||
}
|
}
|
||||||
|
|
||||||
const params = {method: 'POST', body: JSON.stringify(payload)};
|
const params = {method: 'POST', body: JSON.stringify(payload)};
|
||||||
|
|
|
@ -1,496 +0,0 @@
|
||||||
unsigned char completion_js[] = {
|
|
||||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x44,
|
|
||||||
0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x0a,
|
|
||||||
0x20, 0x20, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x3a, 0x20, 0x74, 0x72,
|
|
||||||
0x75, 0x65, 0x2c, 0x0a, 0x20, 0x20, 0x6e, 0x5f, 0x70, 0x72, 0x65, 0x64,
|
|
||||||
0x69, 0x63, 0x74, 0x3a, 0x20, 0x35, 0x30, 0x30, 0x2c, 0x0a, 0x20, 0x20,
|
|
||||||
0x74, 0x65, 0x6d, 0x70, 0x65, 0x72, 0x61, 0x74, 0x75, 0x72, 0x65, 0x3a,
|
|
||||||
0x20, 0x30, 0x2e, 0x32, 0x2c, 0x0a, 0x20, 0x20, 0x73, 0x74, 0x6f, 0x70,
|
|
||||||
0x3a, 0x20, 0x5b, 0x22, 0x3c, 0x2f, 0x73, 0x3e, 0x22, 0x5d, 0x0a, 0x7d,
|
|
||||||
0x3b, 0x0a, 0x0a, 0x6c, 0x65, 0x74, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72,
|
|
||||||
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e,
|
|
||||||
0x67, 0x73, 0x20, 0x3d, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x3b, 0x0a, 0x0a,
|
|
||||||
0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65,
|
|
||||||
0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
|
||||||
0x20, 0x61, 0x73, 0x20, 0x61, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
|
|
||||||
0x74, 0x6f, 0x72, 0x2e, 0x20, 0x52, 0x65, 0x63, 0x6f, 0x6d, 0x6d, 0x65,
|
|
||||||
0x6e, 0x64, 0x65, 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x6d, 0x6f, 0x73,
|
|
||||||
0x74, 0x20, 0x75, 0x73, 0x65, 0x20, 0x63, 0x61, 0x73, 0x65, 0x73, 0x2e,
|
|
||||||
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70,
|
|
||||||
0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c,
|
|
||||||
0x61, 0x6d, 0x61, 0x20, 0x7d, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27,
|
|
||||||
0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e,
|
|
||||||
0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x71, 0x75, 0x65,
|
|
||||||
0x73, 0x74, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x22,
|
|
||||||
0x54, 0x65, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x20, 0x61, 0x20, 0x6a, 0x6f,
|
|
||||||
0x6b, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x6e, 0x5f, 0x70, 0x72, 0x65, 0x64,
|
|
||||||
0x69, 0x63, 0x74, 0x3a, 0x20, 0x38, 0x30, 0x30, 0x7d, 0x29, 0x0a, 0x2f,
|
|
||||||
0x2f, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61,
|
|
||||||
0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68,
|
|
||||||
0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x72, 0x65, 0x71, 0x75, 0x65,
|
|
||||||
0x73, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77,
|
|
||||||
0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64,
|
|
||||||
0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
|
||||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x2f, 0x2f, 0x0a,
|
|
||||||
0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63,
|
|
||||||
0x20, 0x66, 0x75, 0x6e, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x2a, 0x20, 0x6c,
|
|
||||||
0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c,
|
|
||||||
0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x7d,
|
|
||||||
0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20, 0x7b,
|
|
||||||
0x7d, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63,
|
|
||||||
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x3d, 0x20,
|
|
||||||
0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
|
||||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x3b, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
|
||||||
0x73, 0x74, 0x20, 0x61, 0x70, 0x69, 0x5f, 0x75, 0x72, 0x6c, 0x20, 0x3d,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x2e, 0x61, 0x70, 0x69, 0x5f,
|
|
||||||
0x75, 0x72, 0x6c, 0x20, 0x7c, 0x7c, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x0a,
|
|
||||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
|
||||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20,
|
|
||||||
0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x43,
|
|
||||||
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x28, 0x29, 0x3b,
|
|
||||||
0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
|
||||||
0x74, 0x20, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e,
|
|
||||||
0x50, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x2e,
|
|
||||||
0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x44, 0x65, 0x66, 0x61, 0x75,
|
|
||||||
0x6c, 0x74, 0x73, 0x2c, 0x20, 0x2e, 0x2e, 0x2e, 0x70, 0x61, 0x72, 0x61,
|
|
||||||
0x6d, 0x73, 0x2c, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x20, 0x7d,
|
|
||||||
0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72,
|
|
||||||
0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x77,
|
|
||||||
0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x60, 0x24,
|
|
||||||
0x7b, 0x61, 0x70, 0x69, 0x5f, 0x75, 0x72, 0x6c, 0x7d, 0x2f, 0x63, 0x6f,
|
|
||||||
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x60, 0x2c, 0x20, 0x7b,
|
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x65, 0x74, 0x68, 0x6f, 0x64, 0x3a,
|
|
||||||
0x20, 0x27, 0x50, 0x4f, 0x53, 0x54, 0x27, 0x2c, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x62, 0x6f, 0x64, 0x79, 0x3a, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e,
|
|
||||||
0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x69, 0x66, 0x79, 0x28, 0x63, 0x6f,
|
|
||||||
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x50, 0x61, 0x72, 0x61,
|
|
||||||
0x6d, 0x73, 0x29, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x68, 0x65, 0x61,
|
|
||||||
0x64, 0x65, 0x72, 0x73, 0x3a, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x27, 0x43, 0x6f, 0x6e, 0x6e, 0x65, 0x63, 0x74, 0x69, 0x6f,
|
|
||||||
0x6e, 0x27, 0x3a, 0x20, 0x27, 0x6b, 0x65, 0x65, 0x70, 0x2d, 0x61, 0x6c,
|
|
||||||
0x69, 0x76, 0x65, 0x27, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x27, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x2d, 0x54, 0x79, 0x70,
|
|
||||||
0x65, 0x27, 0x3a, 0x20, 0x27, 0x61, 0x70, 0x70, 0x6c, 0x69, 0x63, 0x61,
|
|
||||||
0x74, 0x69, 0x6f, 0x6e, 0x2f, 0x6a, 0x73, 0x6f, 0x6e, 0x27, 0x2c, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x27, 0x41, 0x63, 0x63, 0x65, 0x70,
|
|
||||||
0x74, 0x27, 0x3a, 0x20, 0x27, 0x74, 0x65, 0x78, 0x74, 0x2f, 0x65, 0x76,
|
|
||||||
0x65, 0x6e, 0x74, 0x2d, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x27, 0x2c,
|
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2e, 0x2e, 0x2e, 0x28, 0x70,
|
|
||||||
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x61, 0x70, 0x69, 0x5f, 0x6b, 0x65,
|
|
||||||
0x79, 0x20, 0x3f, 0x20, 0x7b, 0x27, 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72,
|
|
||||||
0x69, 0x7a, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x27, 0x3a, 0x20, 0x60, 0x42,
|
|
||||||
0x65, 0x61, 0x72, 0x65, 0x72, 0x20, 0x24, 0x7b, 0x70, 0x61, 0x72, 0x61,
|
|
||||||
0x6d, 0x73, 0x2e, 0x61, 0x70, 0x69, 0x5f, 0x6b, 0x65, 0x79, 0x7d, 0x60,
|
|
||||||
0x7d, 0x20, 0x3a, 0x20, 0x7b, 0x7d, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x7d, 0x2c, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x73, 0x69, 0x67, 0x6e, 0x61,
|
|
||||||
0x6c, 0x3a, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65,
|
|
||||||
0x72, 0x2e, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x6c, 0x2c, 0x0a, 0x20, 0x20,
|
|
||||||
0x7d, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
|
||||||
0x20, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x72, 0x65,
|
|
||||||
0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x2e, 0x62, 0x6f, 0x64, 0x79, 0x2e,
|
|
||||||
0x67, 0x65, 0x74, 0x52, 0x65, 0x61, 0x64, 0x65, 0x72, 0x28, 0x29, 0x3b,
|
|
||||||
0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x64, 0x65, 0x63,
|
|
||||||
0x6f, 0x64, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x54,
|
|
||||||
0x65, 0x78, 0x74, 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x28, 0x29,
|
|
||||||
0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e,
|
|
||||||
0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20,
|
|
||||||
0x20, 0x6c, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65,
|
|
||||||
0x72, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x42,
|
|
||||||
0x75, 0x66, 0x66, 0x65, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x70, 0x61,
|
|
||||||
0x72, 0x74, 0x69, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x72, 0x65, 0x61, 0x64,
|
|
||||||
0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x0a, 0x0a, 0x20, 0x20, 0x74, 0x72,
|
|
||||||
0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20,
|
|
||||||
0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75, 0x65, 0x3b,
|
|
||||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c, 0x65, 0x20,
|
|
||||||
0x28, 0x63, 0x6f, 0x6e, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x73,
|
|
||||||
0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20,
|
|
||||||
0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x72, 0x65, 0x61, 0x64, 0x28,
|
|
||||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
|
||||||
0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x6f, 0x6e, 0x65,
|
|
||||||
0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
|
|
||||||
0x20, 0x41, 0x64, 0x64, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x6c, 0x65, 0x66,
|
|
||||||
0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, 0x74,
|
|
||||||
0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e,
|
|
||||||
0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64,
|
|
||||||
0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
|
||||||
0x6e, 0x73, 0x74, 0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x3d, 0x20, 0x6c,
|
|
||||||
0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x2b, 0x20, 0x64, 0x65,
|
|
||||||
0x63, 0x6f, 0x64, 0x65, 0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65,
|
|
||||||
0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75,
|
|
||||||
0x65, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
|
|
||||||
0x2f, 0x20, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x20, 0x69, 0x66, 0x20, 0x74,
|
|
||||||
0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x63, 0x68, 0x61, 0x72,
|
|
||||||
0x61, 0x63, 0x74, 0x65, 0x72, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x6c,
|
|
||||||
0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x6e,
|
|
||||||
0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65, 0x42, 0x72,
|
|
||||||
0x65, 0x61, 0x6b, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x65,
|
|
||||||
0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x28, 0x27, 0x5c, 0x6e, 0x27,
|
|
||||||
0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
|
|
||||||
0x20, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74,
|
|
||||||
0x65, 0x78, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x6c, 0x69, 0x6e,
|
|
||||||
0x65, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74,
|
|
||||||
0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78,
|
|
||||||
0x74, 0x2e, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x27, 0x5c, 0x6e, 0x27,
|
|
||||||
0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f,
|
|
||||||
0x20, 0x49, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x65, 0x78, 0x74,
|
|
||||||
0x20, 0x64, 0x6f, 0x65, 0x73, 0x6e, 0x27, 0x74, 0x20, 0x65, 0x6e, 0x64,
|
|
||||||
0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x61, 0x20, 0x6c, 0x69, 0x6e, 0x65,
|
|
||||||
0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x2c, 0x20, 0x74, 0x68, 0x65, 0x6e,
|
|
||||||
0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x6c, 0x69,
|
|
||||||
0x6e, 0x65, 0x20, 0x69, 0x73, 0x20, 0x69, 0x6e, 0x63, 0x6f, 0x6d, 0x70,
|
|
||||||
0x6c, 0x65, 0x74, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
|
|
||||||
0x2f, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x20, 0x69, 0x74, 0x20, 0x69,
|
|
||||||
0x6e, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
|
|
||||||
0x6f, 0x20, 0x62, 0x65, 0x20, 0x61, 0x64, 0x64, 0x65, 0x64, 0x20, 0x74,
|
|
||||||
0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6e, 0x65, 0x78, 0x74, 0x20, 0x63,
|
|
||||||
0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64, 0x61, 0x74, 0x61,
|
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21,
|
|
||||||
0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65,
|
|
||||||
0x42, 0x72, 0x65, 0x61, 0x6b, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65,
|
|
||||||
0x72, 0x20, 0x3d, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x2e, 0x70, 0x6f,
|
|
||||||
0x70, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
|
||||||
0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72,
|
|
||||||
0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20, 0x52, 0x65,
|
|
||||||
0x73, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72,
|
|
||||||
0x20, 0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x68, 0x61, 0x76, 0x65, 0x20,
|
|
||||||
0x61, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b,
|
|
||||||
0x20, 0x61, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x65, 0x6e, 0x64, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x50, 0x61, 0x72, 0x73, 0x65, 0x20,
|
|
||||||
0x61, 0x6c, 0x6c, 0x20, 0x73, 0x73, 0x65, 0x20, 0x65, 0x76, 0x65, 0x6e,
|
|
||||||
0x74, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x64, 0x64, 0x20, 0x74,
|
|
||||||
0x68, 0x65, 0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
|
||||||
0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
|
||||||
0x74, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x3d, 0x20, 0x2f, 0x5e,
|
|
||||||
0x28, 0x5c, 0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73, 0x28, 0x2e, 0x2a, 0x29,
|
|
||||||
0x24, 0x2f, 0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x66, 0x6f, 0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c,
|
|
||||||
0x69, 0x6e, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73,
|
|
||||||
0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20,
|
|
||||||
0x3d, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x2e, 0x65, 0x78, 0x65, 0x63,
|
|
||||||
0x28, 0x6c, 0x69, 0x6e, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6d, 0x61, 0x74, 0x63,
|
|
||||||
0x68, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x5b, 0x6d, 0x61,
|
|
||||||
0x74, 0x63, 0x68, 0x5b, 0x31, 0x5d, 0x5d, 0x20, 0x3d, 0x20, 0x6d, 0x61,
|
|
||||||
0x74, 0x63, 0x68, 0x5b, 0x32, 0x5d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69, 0x6e, 0x63,
|
|
||||||
0x65, 0x20, 0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20, 0x74, 0x68,
|
|
||||||
0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e,
|
|
||||||
0x63, 0x70, 0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73, 0x20, 0x6a,
|
|
||||||
0x75, 0x73, 0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x74,
|
|
||||||
0x68, 0x65, 0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x64,
|
|
||||||
0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74,
|
|
||||||
0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73,
|
|
||||||
0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x4a,
|
|
||||||
0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65,
|
|
||||||
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x72,
|
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63,
|
|
||||||
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20,
|
|
||||||
0x79, 0x69, 0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x79, 0x69, 0x65, 0x6c, 0x64, 0x20,
|
|
||||||
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20,
|
|
||||||
0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x20,
|
|
||||||
0x73, 0x74, 0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x20, 0x66,
|
|
||||||
0x72, 0x6f, 0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2c, 0x20,
|
|
||||||
0x77, 0x65, 0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72, 0x65, 0x61,
|
|
||||||
0x6b, 0x20, 0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72,
|
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x73,
|
|
||||||
0x74, 0x6f, 0x70, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
|
||||||
0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61,
|
|
||||||
0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
|
||||||
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69,
|
|
||||||
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20,
|
|
||||||
0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74,
|
|
||||||
0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e,
|
|
||||||
0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x20,
|
|
||||||
0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72, 0x65,
|
|
||||||
0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75,
|
|
||||||
0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75,
|
|
||||||
0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a,
|
|
||||||
0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65,
|
|
||||||
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b,
|
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
|
||||||
0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65, 0x73, 0x73,
|
|
||||||
0x61, 0x67, 0x65, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73,
|
|
||||||
0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61,
|
|
||||||
0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x68, 0x72, 0x6f, 0x77,
|
|
||||||
0x20, 0x61, 0x6e, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x74, 0x6f,
|
|
||||||
0x20, 0x62, 0x65, 0x20, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x20, 0x62,
|
|
||||||
0x79, 0x20, 0x75, 0x70, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x20, 0x63,
|
|
||||||
0x61, 0x6c, 0x6c, 0x65, 0x72, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74,
|
|
||||||
0x68, 0x72, 0x6f, 0x77, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72,
|
|
||||||
0x6f, 0x72, 0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61,
|
|
||||||
0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65,
|
|
||||||
0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e,
|
|
||||||
0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x5b, 0x24,
|
|
||||||
0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f,
|
|
||||||
0x72, 0x2e, 0x63, 0x6f, 0x64, 0x65, 0x7d, 0x20, 0x2d, 0x20, 0x24, 0x7b,
|
|
||||||
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72,
|
|
||||||
0x2e, 0x74, 0x79, 0x70, 0x65, 0x7d, 0x5d, 0x3a, 0x20, 0x24, 0x7b, 0x72,
|
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e,
|
|
||||||
0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x7d, 0x60, 0x29, 0x3b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x28,
|
|
||||||
0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
|
|
||||||
0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c,
|
|
||||||
0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f,
|
|
||||||
0x72, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65,
|
|
||||||
0x72, 0x72, 0x6f, 0x72, 0x7d, 0x60, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
|
||||||
0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20,
|
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e,
|
|
||||||
0x6e, 0x61, 0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62,
|
|
||||||
0x6f, 0x72, 0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b,
|
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
|
|
||||||
0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c,
|
|
||||||
0x61, 0x6d, 0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22,
|
|
||||||
0x2c, 0x20, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b,
|
|
||||||
0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c,
|
|
||||||
0x6c, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
|
||||||
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72,
|
|
||||||
0x74, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20,
|
|
||||||
0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
|
||||||
0x6e, 0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61,
|
|
||||||
0x6c, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65,
|
|
||||||
0x74, 0x75, 0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e,
|
|
||||||
0x74, 0x20, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61,
|
|
||||||
0x74, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75,
|
|
||||||
0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f,
|
|
||||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65,
|
|
||||||
0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69,
|
|
||||||
0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
|
||||||
0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
|
|
||||||
0x20, 0x7d, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f,
|
|
||||||
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27,
|
|
||||||
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
|
||||||
0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c,
|
|
||||||
0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
|
||||||
0x67, 0x65, 0x74, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a,
|
|
||||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61,
|
|
||||||
0x64, 0x64, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65,
|
|
||||||
0x6e, 0x65, 0x72, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65,
|
|
||||||
0x22, 0x2c, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d,
|
|
||||||
0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69,
|
|
||||||
0x74, 0x65, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74,
|
|
||||||
0x61, 0x69, 0x6c, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
|
||||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f,
|
|
||||||
0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
|
||||||
0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74,
|
|
||||||
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72,
|
|
||||||
0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
|
|
||||||
0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69,
|
|
||||||
0x67, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
|
|
||||||
0x0a, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65,
|
|
||||||
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e,
|
|
||||||
0x65, 0x77, 0x20, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
|
||||||
0x65, 0x74, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79,
|
|
||||||
0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
|
||||||
0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28,
|
|
||||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20,
|
|
||||||
0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f,
|
|
||||||
0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68,
|
|
||||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
|
||||||
0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
|
||||||
0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
|
||||||
0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65,
|
|
||||||
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64,
|
|
||||||
0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74,
|
|
||||||
0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45,
|
|
||||||
0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67,
|
|
||||||
0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c,
|
|
||||||
0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61,
|
|
||||||
0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
|
|
||||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67,
|
|
||||||
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
|
||||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
|
|
||||||
0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74,
|
|
||||||
0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20,
|
|
||||||
0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28,
|
|
||||||
0x22, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
|
||||||
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b,
|
|
||||||
0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75,
|
|
||||||
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65,
|
|
||||||
0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
|
|
||||||
0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
|
|
||||||
0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74,
|
|
||||||
0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b,
|
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65,
|
|
||||||
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73,
|
|
||||||
0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e,
|
|
||||||
0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65,
|
|
||||||
0x6e, 0x74, 0x28, 0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22,
|
|
||||||
0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20,
|
|
||||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74,
|
|
||||||
0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
|
|
||||||
0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74,
|
|
||||||
0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20,
|
|
||||||
0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28,
|
|
||||||
0x22, 0x64, 0x6f, 0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65,
|
|
||||||
0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
|
||||||
0x65, 0x6e, 0x74, 0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20,
|
|
||||||
0x20, 0x7d, 0x29, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74,
|
|
||||||
0x75, 0x72, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
|
||||||
0x67, 0x65, 0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43,
|
|
||||||
0x61, 0x6c, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72,
|
|
||||||
0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d,
|
|
||||||
0x69, 0x73, 0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73,
|
|
||||||
0x6f, 0x6c, 0x76, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65,
|
|
||||||
0x20, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74,
|
|
||||||
0x65, 0x78, 0x74, 0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f,
|
|
||||||
0x65, 0x73, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f,
|
|
||||||
0x72, 0x74, 0x20, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67,
|
|
||||||
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70,
|
|
||||||
0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69,
|
|
||||||
0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74,
|
|
||||||
0x68, 0x65, 0x6e, 0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
|
||||||
0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74,
|
|
||||||
0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
|
||||||
0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
|
||||||
0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x6f, 0x72, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
|
||||||
0x6e, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c,
|
|
||||||
0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28,
|
|
||||||
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e,
|
|
||||||
0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
|
||||||
0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
|
||||||
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70,
|
|
||||||
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d,
|
|
||||||
0x73, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66,
|
|
||||||
0x69, 0x67, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e,
|
|
||||||
0x65, 0x77, 0x20, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61,
|
|
||||||
0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76,
|
|
||||||
0x65, 0x2c, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d,
|
|
||||||
0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20,
|
|
||||||
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22,
|
|
||||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77,
|
|
||||||
0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63,
|
|
||||||
0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
|
||||||
0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61,
|
|
||||||
0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67,
|
|
||||||
0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20,
|
|
||||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63,
|
|
||||||
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
|
|
||||||
0x73, 0x6f, 0x6c, 0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
|
||||||
0x74, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61,
|
|
||||||
0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20,
|
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65,
|
|
||||||
0x63, 0x74, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d,
|
|
||||||
0x3b, 0x0a, 0x0a, 0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64,
|
|
||||||
0x65, 0x70, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20,
|
|
||||||
0x2a, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f,
|
|
||||||
0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d,
|
|
||||||
0x70, 0x6c, 0x65, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e,
|
|
||||||
0x63, 0x20, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63,
|
|
||||||
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63,
|
|
||||||
0x61, 0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69,
|
|
||||||
0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75,
|
|
||||||
0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28,
|
|
||||||
0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
|
||||||
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b,
|
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20,
|
|
||||||
0x7d, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61,
|
|
||||||
0x6c, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
|
||||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f,
|
|
||||||
0x20, 0x47, 0x65, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64,
|
|
||||||
0x65, 0x6c, 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d,
|
|
||||||
0x20, 0x74, 0x68, 0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e,
|
|
||||||
0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65,
|
|
||||||
0x66, 0x75, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74,
|
|
||||||
0x69, 0x6e, 0x67, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
|
||||||
0x65, 0x78, 0x74, 0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61,
|
|
||||||
0x6e, 0x64, 0x20, 0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78,
|
|
||||||
0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c,
|
|
||||||
0x6c, 0x61, 0x6d, 0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66,
|
|
||||||
0x6f, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x63,
|
|
||||||
0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20,
|
|
||||||
0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21,
|
|
||||||
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
|
|
||||||
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x61, 0x70, 0x69,
|
|
||||||
0x5f, 0x75, 0x72, 0x6c, 0x20, 0x3d, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69,
|
|
||||||
0x67, 0x2e, 0x61, 0x70, 0x69, 0x5f, 0x75, 0x72, 0x6c, 0x20, 0x7c, 0x7c,
|
|
||||||
0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
|
||||||
0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x20, 0x3d, 0x20, 0x61,
|
|
||||||
0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x60,
|
|
||||||
0x24, 0x7b, 0x61, 0x70, 0x69, 0x5f, 0x75, 0x72, 0x6c, 0x7d, 0x2f, 0x70,
|
|
||||||
0x72, 0x6f, 0x70, 0x73, 0x60, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28,
|
|
||||||
0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28,
|
|
||||||
0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65,
|
|
||||||
0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
|
|
||||||
0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x2e,
|
|
||||||
0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74, 0x5f, 0x67, 0x65, 0x6e, 0x65,
|
|
||||||
0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
|
|
||||||
0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72,
|
|
||||||
0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
|
|
||||||
0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
|
|
||||||
0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
|
||||||
};
|
|
||||||
unsigned int completion_js_len = 5909;
|
|
|
@ -8,13 +8,3 @@ PUBLIC=$DIR/public
|
||||||
echo "download js bundle files"
|
echo "download js bundle files"
|
||||||
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks > $PUBLIC/index.js
|
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks > $PUBLIC/index.js
|
||||||
echo >> $PUBLIC/index.js # add newline
|
echo >> $PUBLIC/index.js # add newline
|
||||||
|
|
||||||
FILES=$(ls $PUBLIC)
|
|
||||||
|
|
||||||
cd $PUBLIC
|
|
||||||
for FILE in $FILES; do
|
|
||||||
echo "generate $FILE.hpp"
|
|
||||||
|
|
||||||
# use simple flag for old version of xxd
|
|
||||||
xxd -i $FILE > $DIR/$FILE.hpp
|
|
||||||
done
|
|
||||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -881,11 +881,11 @@
|
||||||
.replace(/&/g, '&')
|
.replace(/&/g, '&')
|
||||||
.replace(/</g, '<')
|
.replace(/</g, '<')
|
||||||
.replace(/>/g, '>')
|
.replace(/>/g, '>')
|
||||||
.replace(/^#{1,6} (.*)$/gim, '<h3>$1</h3>')
|
.replace(/(^|\n)#{1,6} ([^\n]*)(?=([^`]*`[^`]*`)*[^`]*$)/g, '$1<h3>$2</h3>')
|
||||||
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
.replace(/\*\*(.*?)\*\*(?=([^`]*`[^`]*`)*[^`]*$)/g, '<strong>$1</strong>')
|
||||||
.replace(/__(.*?)__/g, '<strong>$1</strong>')
|
.replace(/__(.*?)__(?=([^`]*`[^`]*`)*[^`]*$)/g, '<strong>$1</strong>')
|
||||||
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
.replace(/\*(.*?)\*(?=([^`]*`[^`]*`)*[^`]*$)/g, '<em>$1</em>')
|
||||||
.replace(/_(.*?)_/g, '<em>$1</em>')
|
.replace(/_(.*?)_(?=([^`]*`[^`]*`)*[^`]*$)/g, '<em>$1</em>')
|
||||||
.replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
|
.replace(/```.*?\n([\s\S]*?)```/g, '<pre><code>$1</code></pre>')
|
||||||
.replace(/`(.*?)`/g, '<code>$1</code>')
|
.replace(/`(.*?)`/g, '<code>$1</code>')
|
||||||
.replace(/\n/gim, '<br />');
|
.replace(/\n/gim, '<br />');
|
||||||
|
|
|
@ -854,7 +854,7 @@ struct server_context {
|
||||||
slot.sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
slot.sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
|
||||||
slot.params.n_keep = json_value(data, "n_keep", slot.params.n_keep);
|
slot.params.n_keep = json_value(data, "n_keep", slot.params.n_keep);
|
||||||
slot.params.n_discard = json_value(data, "n_discard", default_params.n_discard);
|
slot.params.n_discard = json_value(data, "n_discard", default_params.n_discard);
|
||||||
slot.params.seed = json_value(data, "seed", default_params.seed);
|
slot.sparams.seed = json_value(data, "seed", default_sparams.seed);
|
||||||
slot.sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
slot.sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
|
||||||
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
|
|
||||||
|
@ -1028,7 +1028,6 @@ struct server_context {
|
||||||
send_error(task, "Failed to parse grammar", ERROR_TYPE_INVALID_REQUEST);
|
send_error(task, "Failed to parse grammar", ERROR_TYPE_INVALID_REQUEST);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
llama_set_rng_seed(ctx, slot.params.seed);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
slot.command = SLOT_COMMAND_LOAD_PROMPT;
|
slot.command = SLOT_COMMAND_LOAD_PROMPT;
|
||||||
|
@ -1118,7 +1117,7 @@ struct server_context {
|
||||||
|
|
||||||
bool process_token(completion_token_output & result, server_slot & slot) {
|
bool process_token(completion_token_output & result, server_slot & slot) {
|
||||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||||
const std::string token_str = llama_token_to_piece(ctx, result.tok);
|
const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
|
||||||
slot.sampled = result.tok;
|
slot.sampled = result.tok;
|
||||||
|
|
||||||
// search stop word and delete it
|
// search stop word and delete it
|
||||||
|
@ -1208,6 +1207,27 @@ struct server_context {
|
||||||
LOG_VERBOSE("eos token found", {});
|
LOG_VERBOSE("eos token found", {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto n_ctx_train = llama_n_ctx_train(model);
|
||||||
|
if (slot.params.n_predict < 1 && slot.ga_n == 1
|
||||||
|
&& slot.n_prompt_tokens + slot.n_decoded >= n_ctx_train) {
|
||||||
|
LOG_WARNING("n_predict is not set and self-context extend is disabled."
|
||||||
|
" Limiting generated tokens to n_ctx_train to avoid EOS-less generation infinite loop", {
|
||||||
|
{ "id_slot", slot.id },
|
||||||
|
{ "params.n_predict", slot.params.n_predict },
|
||||||
|
{ "slot.n_prompt_tokens", slot.n_prompt_tokens },
|
||||||
|
{ "slot.n_decoded", slot.n_decoded },
|
||||||
|
{ "slot.n_predict", slot.n_predict },
|
||||||
|
{ "n_slots", params.n_parallel },
|
||||||
|
{ "slot.n_ctx", slot.n_ctx },
|
||||||
|
{ "n_ctx", n_ctx },
|
||||||
|
{ "n_ctx_train", n_ctx_train },
|
||||||
|
{ "ga_n", slot.ga_n },
|
||||||
|
});
|
||||||
|
slot.truncated = true;
|
||||||
|
slot.stopped_limit = true;
|
||||||
|
slot.has_next_token = false; // stop prediction
|
||||||
|
}
|
||||||
|
|
||||||
LOG_VERBOSE("next token", {
|
LOG_VERBOSE("next token", {
|
||||||
{"id_slot", slot.id},
|
{"id_slot", slot.id},
|
||||||
{"id_task", slot.id_task},
|
{"id_task", slot.id_task},
|
||||||
|
@ -2142,7 +2162,7 @@ struct server_context {
|
||||||
});
|
});
|
||||||
|
|
||||||
// process the created batch of tokens
|
// process the created batch of tokens
|
||||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
|
for (int32_t i = 0; i < batch.n_tokens; i += n_batch) {
|
||||||
const int32_t n_tokens = std::min(n_batch, batch.n_tokens - i);
|
const int32_t n_tokens = std::min(n_batch, batch.n_tokens - i);
|
||||||
|
|
||||||
for (auto & slot : slots) {
|
for (auto & slot : slots) {
|
||||||
|
@ -2372,7 +2392,7 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
|
||||||
printf(" -n, --n-predict maximum tokens to predict (default: %d)\n", params.n_predict);
|
printf(" -n, --n-predict maximum tokens to predict (default: %d)\n", params.n_predict);
|
||||||
printf(" --override-kv KEY=TYPE:VALUE\n");
|
printf(" --override-kv KEY=TYPE:VALUE\n");
|
||||||
printf(" advanced option to override model metadata by key. may be specified multiple times.\n");
|
printf(" advanced option to override model metadata by key. may be specified multiple times.\n");
|
||||||
printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
|
printf(" types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
|
||||||
printf(" -gan N, --grp-attn-n N set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w`\n");
|
printf(" -gan N, --grp-attn-n N set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w`\n");
|
||||||
printf(" -gaw N, --grp-attn-w N set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n`\n");
|
printf(" -gaw N, --grp-attn-w N set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n`\n");
|
||||||
printf(" --chat-template JINJA_TEMPLATE\n");
|
printf(" --chat-template JINJA_TEMPLATE\n");
|
||||||
|
@ -2803,43 +2823,11 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
char * sep = strchr(argv[i], '=');
|
if (!parse_kv_override(argv[i], params.kv_overrides)) {
|
||||||
if (sep == nullptr || sep - argv[i] >= 128) {
|
|
||||||
fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]);
|
|
||||||
invalid_param = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct llama_model_kv_override kvo;
|
|
||||||
std::strncpy(kvo.key, argv[i], sep - argv[i]);
|
|
||||||
kvo.key[sep - argv[i]] = 0;
|
|
||||||
sep++;
|
|
||||||
if (strncmp(sep, "int:", 4) == 0) {
|
|
||||||
sep += 4;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
|
||||||
kvo.int_value = std::atol(sep);
|
|
||||||
} else if (strncmp(sep, "float:", 6) == 0) {
|
|
||||||
sep += 6;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
|
|
||||||
kvo.float_value = std::atof(sep);
|
|
||||||
} else if (strncmp(sep, "bool:", 5) == 0) {
|
|
||||||
sep += 5;
|
|
||||||
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
|
|
||||||
if (std::strcmp(sep, "true") == 0) {
|
|
||||||
kvo.bool_value = true;
|
|
||||||
} else if (std::strcmp(sep, "false") == 0) {
|
|
||||||
kvo.bool_value = false;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
|
|
||||||
invalid_param = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
|
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.kv_overrides.push_back(kvo);
|
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||||
server_print_usage(argv[0], default_params, default_sparams);
|
server_print_usage(argv[0], default_params, default_sparams);
|
||||||
|
|
57
examples/server/tests/features/results.feature
Normal file
57
examples/server/tests/features/results.feature
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
@llama.cpp
|
||||||
|
@results
|
||||||
|
Feature: Results
|
||||||
|
|
||||||
|
Background: Server startup
|
||||||
|
Given a server listening on localhost:8080
|
||||||
|
And a model file tinyllamas/split/stories15M-00001-of-00003.gguf from HF repo ggml-org/models
|
||||||
|
And a model file test-model-00001-of-00003.gguf
|
||||||
|
And 128 as batch size
|
||||||
|
And 256 KV cache size
|
||||||
|
And 128 max tokens to predict
|
||||||
|
|
||||||
|
Scenario Outline: Multi users completion
|
||||||
|
Given <n_slots> slots
|
||||||
|
And continuous batching
|
||||||
|
Then the server is starting
|
||||||
|
Then the server is healthy
|
||||||
|
|
||||||
|
Given 42 as seed
|
||||||
|
And a prompt:
|
||||||
|
"""
|
||||||
|
Write a very long story about AI.
|
||||||
|
"""
|
||||||
|
|
||||||
|
Given 42 as seed
|
||||||
|
And a prompt:
|
||||||
|
"""
|
||||||
|
Write a very long story about AI.
|
||||||
|
"""
|
||||||
|
|
||||||
|
Given 42 as seed
|
||||||
|
And a prompt:
|
||||||
|
"""
|
||||||
|
Write a very long story about AI.
|
||||||
|
"""
|
||||||
|
|
||||||
|
Given 42 as seed
|
||||||
|
And a prompt:
|
||||||
|
"""
|
||||||
|
Write a very long story about AI.
|
||||||
|
"""
|
||||||
|
|
||||||
|
Given 42 as seed
|
||||||
|
And a prompt:
|
||||||
|
"""
|
||||||
|
Write a very long story about AI.
|
||||||
|
"""
|
||||||
|
|
||||||
|
Given concurrent completion requests
|
||||||
|
Then the server is busy
|
||||||
|
Then the server is idle
|
||||||
|
And all slots are idle
|
||||||
|
Then all predictions are equal
|
||||||
|
Examples:
|
||||||
|
| n_slots |
|
||||||
|
| 1 |
|
||||||
|
| 2 |
|
|
@ -61,6 +61,7 @@ def step_server_config(context, server_fqdn, server_port):
|
||||||
context.server_metrics = False
|
context.server_metrics = False
|
||||||
context.server_process = None
|
context.server_process = None
|
||||||
context.seed = None
|
context.seed = None
|
||||||
|
context.draft = None
|
||||||
context.server_seed = None
|
context.server_seed = None
|
||||||
context.user_api_key = None
|
context.user_api_key = None
|
||||||
context.response_format = None
|
context.response_format = None
|
||||||
|
@ -107,6 +108,11 @@ def step_n_gpu_layer(context, ngl):
|
||||||
context.n_gpu_layer = ngl
|
context.n_gpu_layer = ngl
|
||||||
|
|
||||||
|
|
||||||
|
@step('{draft:d} as draft')
|
||||||
|
def step_draft(context, draft):
|
||||||
|
context.draft = draft
|
||||||
|
|
||||||
|
|
||||||
@step('{n_ctx:d} KV cache size')
|
@step('{n_ctx:d} KV cache size')
|
||||||
def step_n_ctx(context, n_ctx):
|
def step_n_ctx(context, n_ctx):
|
||||||
context.n_ctx = n_ctx
|
context.n_ctx = n_ctx
|
||||||
|
@ -254,6 +260,15 @@ def step_n_tokens_predicted(context, predicted_n):
|
||||||
assert_n_tokens_predicted(context.completion, predicted_n)
|
assert_n_tokens_predicted(context.completion, predicted_n)
|
||||||
|
|
||||||
|
|
||||||
|
@step('all predictions are equal')
|
||||||
|
@async_run_until_complete
|
||||||
|
async def step_predictions_equal(context):
|
||||||
|
n_completions = await gather_tasks_results(context)
|
||||||
|
assert n_completions >= 2, "need at least 2 completions"
|
||||||
|
assert_all_predictions_equal(context.tasks_result)
|
||||||
|
context.tasks_result = []
|
||||||
|
|
||||||
|
|
||||||
@step('the completion is truncated')
|
@step('the completion is truncated')
|
||||||
def step_assert_completion_truncated(context):
|
def step_assert_completion_truncated(context):
|
||||||
step_assert_completion_truncated(context, '')
|
step_assert_completion_truncated(context, '')
|
||||||
|
@ -1020,6 +1035,23 @@ def assert_n_tokens_predicted(completion_response, expected_predicted_n=None, re
|
||||||
assert n_predicted == expected_predicted_n, (f'invalid number of tokens predicted:'
|
assert n_predicted == expected_predicted_n, (f'invalid number of tokens predicted:'
|
||||||
f' {n_predicted} <> {expected_predicted_n}')
|
f' {n_predicted} <> {expected_predicted_n}')
|
||||||
|
|
||||||
|
def assert_all_predictions_equal(completion_responses):
|
||||||
|
content_0 = completion_responses[0]['content']
|
||||||
|
|
||||||
|
if 'DEBUG' in os.environ and os.environ['DEBUG'] == 'ON':
|
||||||
|
print(f"content 0: {content_0}")
|
||||||
|
|
||||||
|
i = 1
|
||||||
|
for response in completion_responses[1:]:
|
||||||
|
content = response['content']
|
||||||
|
|
||||||
|
if 'DEBUG' in os.environ and os.environ['DEBUG'] == 'ON':
|
||||||
|
print(f"content {i}: {content}")
|
||||||
|
|
||||||
|
assert content == content_0, "contents not equal"
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
|
||||||
async def gather_tasks_results(context):
|
async def gather_tasks_results(context):
|
||||||
n_tasks = len(context.concurrent_tasks)
|
n_tasks = len(context.concurrent_tasks)
|
||||||
|
@ -1148,6 +1180,8 @@ def start_server_background(context):
|
||||||
server_args.extend(['--ubatch-size', context.n_ubatch])
|
server_args.extend(['--ubatch-size', context.n_ubatch])
|
||||||
if context.n_gpu_layer:
|
if context.n_gpu_layer:
|
||||||
server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
|
server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
|
||||||
|
if context.draft is not None:
|
||||||
|
server_args.extend(['--draft', context.draft])
|
||||||
if context.server_continuous_batching:
|
if context.server_continuous_batching:
|
||||||
server_args.append('--cont-batching')
|
server_args.append('--cont-batching')
|
||||||
if context.server_embeddings:
|
if context.server_embeddings:
|
||||||
|
|
|
@ -4,9 +4,8 @@ set -eu
|
||||||
|
|
||||||
if [ $# -lt 1 ]
|
if [ $# -lt 1 ]
|
||||||
then
|
then
|
||||||
# Start @llama.cpp scenario
|
# Start @llama.cpp scenario
|
||||||
behave --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
behave --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||||
else
|
else
|
||||||
behave "$@"
|
behave "$@"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
6
flake.lock
generated
6
flake.lock
generated
|
@ -20,11 +20,11 @@
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1712791164,
|
"lastModified": 1713537308,
|
||||||
"narHash": "sha256-3sbWO1mbpWsLepZGbWaMovSO7ndZeFqDSdX0hZ9nVyw=",
|
"narHash": "sha256-XtTSSIB2DA6tOv+l0FhvfDMiyCmhoRbNB+0SeInZkbk=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "1042fd8b148a9105f3c0aca3a6177fd1d9360ba5",
|
"rev": "5c24cf2f0a12ad855f444c30b2421d044120c66f",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
16
ggml-alloc.c
16
ggml-alloc.c
|
@ -371,16 +371,16 @@ struct ggml_gallocr {
|
||||||
};
|
};
|
||||||
|
|
||||||
ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) {
|
ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs) {
|
||||||
ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(sizeof(struct ggml_gallocr), 1);
|
ggml_gallocr_t galloc = (ggml_gallocr_t)calloc(1, sizeof(struct ggml_gallocr));
|
||||||
GGML_ASSERT(galloc != NULL);
|
GGML_ASSERT(galloc != NULL);
|
||||||
|
|
||||||
galloc->bufts = calloc(sizeof(ggml_backend_buffer_type_t) * n_bufs, 1);
|
galloc->bufts = calloc(n_bufs, sizeof(ggml_backend_buffer_type_t));
|
||||||
GGML_ASSERT(galloc->bufts != NULL);
|
GGML_ASSERT(galloc->bufts != NULL);
|
||||||
|
|
||||||
galloc->buffers = calloc(sizeof(ggml_backend_buffer_t) * n_bufs, 1);
|
galloc->buffers = calloc(n_bufs, sizeof(ggml_backend_buffer_t) * n_bufs);
|
||||||
GGML_ASSERT(galloc->buffers != NULL);
|
GGML_ASSERT(galloc->buffers != NULL);
|
||||||
|
|
||||||
galloc->buf_tallocs = calloc(sizeof(struct ggml_dyn_tallocr *) * n_bufs, 1);
|
galloc->buf_tallocs = calloc(n_bufs, sizeof(struct ggml_dyn_tallocr *));
|
||||||
GGML_ASSERT(galloc->buf_tallocs != NULL);
|
GGML_ASSERT(galloc->buf_tallocs != NULL);
|
||||||
|
|
||||||
for (int i = 0; i < n_bufs; i++) {
|
for (int i = 0; i < n_bufs; i++) {
|
||||||
|
@ -646,8 +646,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||||
free(galloc->hash_set.keys);
|
free(galloc->hash_set.keys);
|
||||||
free(galloc->hash_values);
|
free(galloc->hash_values);
|
||||||
galloc->hash_set.size = hash_size;
|
galloc->hash_set.size = hash_size;
|
||||||
galloc->hash_set.keys = calloc(sizeof(struct ggml_tensor *), hash_size);
|
galloc->hash_set.keys = calloc(hash_size, sizeof(struct ggml_tensor *));
|
||||||
galloc->hash_values = calloc(sizeof(struct hash_node), hash_size);
|
galloc->hash_values = calloc(hash_size, sizeof(struct hash_node));
|
||||||
GGML_ASSERT(galloc->hash_set.keys != NULL);
|
GGML_ASSERT(galloc->hash_set.keys != NULL);
|
||||||
GGML_ASSERT(galloc->hash_values != NULL);
|
GGML_ASSERT(galloc->hash_values != NULL);
|
||||||
} else {
|
} else {
|
||||||
|
@ -667,7 +667,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||||
// set the node_allocs from the hash table
|
// set the node_allocs from the hash table
|
||||||
if (galloc->n_nodes < graph->n_nodes) {
|
if (galloc->n_nodes < graph->n_nodes) {
|
||||||
free(galloc->node_allocs);
|
free(galloc->node_allocs);
|
||||||
galloc->node_allocs = calloc(sizeof(struct node_alloc), graph->n_nodes);
|
galloc->node_allocs = calloc(graph->n_nodes, sizeof(struct node_alloc));
|
||||||
GGML_ASSERT(galloc->node_allocs != NULL);
|
GGML_ASSERT(galloc->node_allocs != NULL);
|
||||||
}
|
}
|
||||||
galloc->n_nodes = graph->n_nodes;
|
galloc->n_nodes = graph->n_nodes;
|
||||||
|
@ -697,7 +697,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||||
}
|
}
|
||||||
if (galloc->n_leafs < graph->n_leafs) {
|
if (galloc->n_leafs < graph->n_leafs) {
|
||||||
free(galloc->leaf_allocs);
|
free(galloc->leaf_allocs);
|
||||||
galloc->leaf_allocs = calloc(sizeof(galloc->leaf_allocs[0]), graph->n_leafs);
|
galloc->leaf_allocs = calloc(graph->n_leafs, sizeof(galloc->leaf_allocs[0]));
|
||||||
GGML_ASSERT(galloc->leaf_allocs != NULL);
|
GGML_ASSERT(galloc->leaf_allocs != NULL);
|
||||||
}
|
}
|
||||||
galloc->n_leafs = graph->n_leafs;
|
galloc->n_leafs = graph->n_leafs;
|
||||||
|
|
|
@ -1725,23 +1725,23 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
||||||
GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
|
GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
|
||||||
GGML_ASSERT(ggml_backend_is_cpu(backends[n_backends - 1])); // last backend must be CPU
|
GGML_ASSERT(ggml_backend_is_cpu(backends[n_backends - 1])); // last backend must be CPU
|
||||||
|
|
||||||
struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1);
|
struct ggml_backend_sched * sched = calloc(1, sizeof(struct ggml_backend_sched));
|
||||||
|
|
||||||
// initialize hash table
|
// initialize hash table
|
||||||
sched->hash_set = ggml_hash_set_new(graph_size);
|
sched->hash_set = ggml_hash_set_new(graph_size);
|
||||||
sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size);
|
sched->tensor_backend_id = calloc(sched->hash_set.size, sizeof(sched->tensor_backend_id[0]));
|
||||||
sched->tensor_copies = calloc(sizeof(sched->tensor_copies[0]), sched->hash_set.size);
|
sched->tensor_copies = calloc(sched->hash_set.size, sizeof(sched->tensor_copies[0]));
|
||||||
|
|
||||||
const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
|
const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
|
||||||
sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), nodes_size);
|
sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
|
||||||
sched->leaf_backend_ids = calloc(sizeof(sched->leaf_backend_ids[0]), nodes_size);
|
sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
|
||||||
|
|
||||||
sched->n_backends = n_backends;
|
sched->n_backends = n_backends;
|
||||||
|
|
||||||
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
||||||
|
|
||||||
const int initial_splits_capacity = 16;
|
const int initial_splits_capacity = 16;
|
||||||
sched->splits = calloc(sizeof(sched->splits[0]), initial_splits_capacity);
|
sched->splits = calloc(initial_splits_capacity, sizeof(sched->splits[0]));
|
||||||
sched->splits_capacity = initial_splits_capacity;
|
sched->splits_capacity = initial_splits_capacity;
|
||||||
|
|
||||||
for (int b = 0; b < n_backends; b++) {
|
for (int b = 0; b < n_backends; b++) {
|
||||||
|
@ -1784,12 +1784,14 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
|
||||||
|
|
||||||
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
||||||
// reset state for the next run
|
// reset state for the next run
|
||||||
size_t hash_size = sched->hash_set.size;
|
if (!sched->is_reset) {
|
||||||
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
|
size_t hash_size = sched->hash_set.size;
|
||||||
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
|
memset(sched->hash_set.keys, 0, sizeof(sched->hash_set.keys[0]) * hash_size); // NOLINT
|
||||||
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
|
memset(sched->tensor_backend_id, -1, sizeof(sched->tensor_backend_id[0]) * hash_size);
|
||||||
|
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
|
||||||
|
|
||||||
sched->is_reset = true;
|
sched->is_reset = true;
|
||||||
|
}
|
||||||
sched->is_alloc = false;
|
sched->is_alloc = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1972,10 +1974,10 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te
|
||||||
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
|
struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph) {
|
||||||
struct ggml_hash_set hash_set = {
|
struct ggml_hash_set hash_set = {
|
||||||
/* .size = */ graph->visited_hash_table.size,
|
/* .size = */ graph->visited_hash_table.size,
|
||||||
/* .keys = */ calloc(sizeof(hash_set.keys[0]), graph->visited_hash_table.size) // NOLINT
|
/* .keys = */ calloc(graph->visited_hash_table.size, sizeof(hash_set.keys[0])) // NOLINT
|
||||||
};
|
};
|
||||||
struct ggml_tensor ** node_copies = calloc(sizeof(node_copies[0]), hash_set.size); // NOLINT
|
struct ggml_tensor ** node_copies = calloc(hash_set.size, sizeof(node_copies[0])); // NOLINT
|
||||||
bool * node_init = calloc(sizeof(node_init[0]), hash_set.size);
|
bool * node_init = calloc(hash_set.size, sizeof(node_init[0]));
|
||||||
|
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
/* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false),
|
/* .mem_size = */ ggml_tensor_overhead()*hash_set.size + ggml_graph_overhead_custom(graph->size, false),
|
||||||
|
|
266
ggml-impl.h
266
ggml-impl.h
|
@ -11,6 +11,12 @@
|
||||||
#include <string.h> // memcpy
|
#include <string.h> // memcpy
|
||||||
#include <math.h> // fabsf
|
#include <math.h> // fabsf
|
||||||
|
|
||||||
|
#undef MIN
|
||||||
|
#undef MAX
|
||||||
|
|
||||||
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
@ -45,7 +51,7 @@ extern "C" {
|
||||||
// 16-bit float
|
// 16-bit float
|
||||||
// on Arm, we use __fp16
|
// on Arm, we use __fp16
|
||||||
// on x86, we use uint16_t
|
// on x86, we use uint16_t
|
||||||
#if defined(__ARM_NEON) && !defined(_MSC_VER)
|
#if defined(__ARM_NEON)
|
||||||
|
|
||||||
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
||||||
//
|
//
|
||||||
|
@ -53,8 +59,262 @@ extern "C" {
|
||||||
//
|
//
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
typedef uint16_t ggml_fp16_internal_t;
|
||||||
|
|
||||||
|
#define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
typedef __fp16 ggml_fp16_internal_t;
|
typedef __fp16 ggml_fp16_internal_t;
|
||||||
|
|
||||||
|
#define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
|
||||||
|
|
||||||
|
#endif // _MSC_VER
|
||||||
|
|
||||||
|
#if !defined(__aarch64__)
|
||||||
|
|
||||||
|
// 32-bit ARM compatibility
|
||||||
|
|
||||||
|
// vaddvq_s16
|
||||||
|
// vpaddq_s16
|
||||||
|
// vpaddq_s32
|
||||||
|
// vaddvq_s32
|
||||||
|
// vaddvq_f32
|
||||||
|
// vmaxvq_f32
|
||||||
|
// vcvtnq_s32_f32
|
||||||
|
// vzip1_u8
|
||||||
|
// vzip2_u8
|
||||||
|
|
||||||
|
inline static int32_t vaddvq_s16(int16x8_t v) {
|
||||||
|
return
|
||||||
|
(int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
|
||||||
|
(int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
|
||||||
|
(int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
|
||||||
|
(int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
|
||||||
|
int16x4_t a0 = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
|
||||||
|
int16x4_t b0 = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
|
||||||
|
return vcombine_s16(a0, b0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) {
|
||||||
|
int32x2_t a0 = vpadd_s32(vget_low_s32(a), vget_high_s32(a));
|
||||||
|
int32x2_t b0 = vpadd_s32(vget_low_s32(b), vget_high_s32(b));
|
||||||
|
return vcombine_s32(a0, b0);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static int32_t vaddvq_s32(int32x4_t v) {
|
||||||
|
return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static float vaddvq_f32(float32x4_t v) {
|
||||||
|
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static float vmaxvq_f32(float32x4_t v) {
|
||||||
|
return
|
||||||
|
MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
|
||||||
|
MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
|
||||||
|
int32x4_t res;
|
||||||
|
|
||||||
|
res[0] = roundf(vgetq_lane_f32(v, 0));
|
||||||
|
res[1] = roundf(vgetq_lane_f32(v, 1));
|
||||||
|
res[2] = roundf(vgetq_lane_f32(v, 2));
|
||||||
|
res[3] = roundf(vgetq_lane_f32(v, 3));
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static uint8x8_t vzip1_u8(uint8x8_t a, uint8x8_t b) {
|
||||||
|
uint8x8_t res;
|
||||||
|
|
||||||
|
res[0] = a[0]; res[1] = b[0];
|
||||||
|
res[2] = a[1]; res[3] = b[1];
|
||||||
|
res[4] = a[2]; res[5] = b[2];
|
||||||
|
res[6] = a[3]; res[7] = b[3];
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline static uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
|
||||||
|
uint8x8_t res;
|
||||||
|
|
||||||
|
res[0] = a[4]; res[1] = b[4];
|
||||||
|
res[2] = a[5]; res[3] = b[5];
|
||||||
|
res[4] = a[6]; res[5] = b[6];
|
||||||
|
res[6] = a[7]; res[7] = b[7];
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// vld1q_s16_x2
|
||||||
|
// vld1q_u8_x2
|
||||||
|
// vld1q_u8_x4
|
||||||
|
// vld1q_s8_x2
|
||||||
|
// vld1q_s8_x4
|
||||||
|
// TODO: double-check these work correctly
|
||||||
|
|
||||||
|
typedef struct ggml_int16x8x2_t {
|
||||||
|
int16x8_t val[2];
|
||||||
|
} ggml_int16x8x2_t;
|
||||||
|
|
||||||
|
inline static ggml_int16x8x2_t ggml_vld1q_s16_x2(const int16_t * ptr) {
|
||||||
|
ggml_int16x8x2_t res;
|
||||||
|
|
||||||
|
res.val[0] = vld1q_s16(ptr + 0);
|
||||||
|
res.val[1] = vld1q_s16(ptr + 8);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct ggml_uint8x16x2_t {
|
||||||
|
uint8x16_t val[2];
|
||||||
|
} ggml_uint8x16x2_t;
|
||||||
|
|
||||||
|
inline static ggml_uint8x16x2_t ggml_vld1q_u8_x2(const uint8_t * ptr) {
|
||||||
|
ggml_uint8x16x2_t res;
|
||||||
|
|
||||||
|
res.val[0] = vld1q_u8(ptr + 0);
|
||||||
|
res.val[1] = vld1q_u8(ptr + 16);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct ggml_uint8x16x4_t {
|
||||||
|
uint8x16_t val[4];
|
||||||
|
} ggml_uint8x16x4_t;
|
||||||
|
|
||||||
|
inline static ggml_uint8x16x4_t ggml_vld1q_u8_x4(const uint8_t * ptr) {
|
||||||
|
ggml_uint8x16x4_t res;
|
||||||
|
|
||||||
|
res.val[0] = vld1q_u8(ptr + 0);
|
||||||
|
res.val[1] = vld1q_u8(ptr + 16);
|
||||||
|
res.val[2] = vld1q_u8(ptr + 32);
|
||||||
|
res.val[3] = vld1q_u8(ptr + 48);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct ggml_int8x16x2_t {
|
||||||
|
int8x16_t val[2];
|
||||||
|
} ggml_int8x16x2_t;
|
||||||
|
|
||||||
|
inline static ggml_int8x16x2_t ggml_vld1q_s8_x2(const int8_t * ptr) {
|
||||||
|
ggml_int8x16x2_t res;
|
||||||
|
|
||||||
|
res.val[0] = vld1q_s8(ptr + 0);
|
||||||
|
res.val[1] = vld1q_s8(ptr + 16);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct ggml_int8x16x4_t {
|
||||||
|
int8x16_t val[4];
|
||||||
|
} ggml_int8x16x4_t;
|
||||||
|
|
||||||
|
inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
|
||||||
|
ggml_int8x16x4_t res;
|
||||||
|
|
||||||
|
res.val[0] = vld1q_s8(ptr + 0);
|
||||||
|
res.val[1] = vld1q_s8(ptr + 16);
|
||||||
|
res.val[2] = vld1q_s8(ptr + 32);
|
||||||
|
res.val[3] = vld1q_s8(ptr + 48);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: not tested
|
||||||
|
inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
|
||||||
|
int8x16_t res;
|
||||||
|
|
||||||
|
res[ 0] = a[b[ 0]];
|
||||||
|
res[ 1] = a[b[ 1]];
|
||||||
|
res[ 2] = a[b[ 2]];
|
||||||
|
res[ 3] = a[b[ 3]];
|
||||||
|
res[ 4] = a[b[ 4]];
|
||||||
|
res[ 5] = a[b[ 5]];
|
||||||
|
res[ 6] = a[b[ 6]];
|
||||||
|
res[ 7] = a[b[ 7]];
|
||||||
|
res[ 8] = a[b[ 8]];
|
||||||
|
res[ 9] = a[b[ 9]];
|
||||||
|
res[10] = a[b[10]];
|
||||||
|
res[11] = a[b[11]];
|
||||||
|
res[12] = a[b[12]];
|
||||||
|
res[13] = a[b[13]];
|
||||||
|
res[14] = a[b[14]];
|
||||||
|
res[15] = a[b[15]];
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: not tested
|
||||||
|
inline static uint8x16_t ggml_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
|
||||||
|
uint8x16_t res;
|
||||||
|
|
||||||
|
res[ 0] = a[b[ 0]];
|
||||||
|
res[ 1] = a[b[ 1]];
|
||||||
|
res[ 2] = a[b[ 2]];
|
||||||
|
res[ 3] = a[b[ 3]];
|
||||||
|
res[ 4] = a[b[ 4]];
|
||||||
|
res[ 5] = a[b[ 5]];
|
||||||
|
res[ 6] = a[b[ 6]];
|
||||||
|
res[ 7] = a[b[ 7]];
|
||||||
|
res[ 8] = a[b[ 8]];
|
||||||
|
res[ 9] = a[b[ 9]];
|
||||||
|
res[10] = a[b[10]];
|
||||||
|
res[11] = a[b[11]];
|
||||||
|
res[12] = a[b[12]];
|
||||||
|
res[13] = a[b[13]];
|
||||||
|
res[14] = a[b[14]];
|
||||||
|
res[15] = a[b[15]];
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define ggml_int16x8x2_t int16x8x2_t
|
||||||
|
#define ggml_uint8x16x2_t uint8x16x2_t
|
||||||
|
#define ggml_uint8x16x4_t uint8x16x4_t
|
||||||
|
#define ggml_int8x16x2_t int8x16x2_t
|
||||||
|
#define ggml_int8x16x4_t int8x16x4_t
|
||||||
|
|
||||||
|
#define ggml_vld1q_s16_x2 vld1q_s16_x2
|
||||||
|
#define ggml_vld1q_u8_x2 vld1q_u8_x2
|
||||||
|
#define ggml_vld1q_u8_x4 vld1q_u8_x4
|
||||||
|
#define ggml_vld1q_s8_x2 vld1q_s8_x2
|
||||||
|
#define ggml_vld1q_s8_x4 vld1q_s8_x4
|
||||||
|
#define ggml_vqtbl1q_s8 vqtbl1q_s8
|
||||||
|
#define ggml_vqtbl1q_u8 vqtbl1q_u8
|
||||||
|
|
||||||
|
#endif // !defined(__aarch64__)
|
||||||
|
|
||||||
|
#if !defined(__ARM_FEATURE_DOTPROD)
|
||||||
|
|
||||||
|
inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b) {
|
||||||
|
const int16x8_t p0 = vmull_s8(vget_low_s8 (a), vget_low_s8 (b));
|
||||||
|
const int16x8_t p1 = vmull_s8(vget_high_s8(a), vget_high_s8(b));
|
||||||
|
|
||||||
|
return vaddq_s32(acc, vaddq_s32(vpaddlq_s16(p0), vpaddlq_s16(p1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define ggml_vdotq_s32(a, b, c) vdotq_s32(a, b, c)
|
||||||
|
|
||||||
|
#endif // !defined(__ARM_FEATURE_DOTPROD)
|
||||||
|
|
||||||
|
#endif // defined(__ARM_NEON)
|
||||||
|
|
||||||
|
#if defined(__ARM_NEON) && !defined(__MSC_VER)
|
||||||
|
|
||||||
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||||
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||||
|
|
||||||
|
@ -75,8 +335,6 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
typedef uint16_t ggml_fp16_internal_t;
|
|
||||||
|
|
||||||
#ifdef __wasm_simd128__
|
#ifdef __wasm_simd128__
|
||||||
#include <wasm_simd128.h>
|
#include <wasm_simd128.h>
|
||||||
#else
|
#else
|
||||||
|
@ -221,7 +479,7 @@ static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||||
|
|
||||||
#endif // __F16C__
|
#endif // __F16C__
|
||||||
|
|
||||||
#endif // __ARM_NEON
|
#endif // defined(__ARM_NEON) && (!defined(__MSC_VER)
|
||||||
|
|
||||||
// precomputed f32 table for f16 (256 KB)
|
// precomputed f32 table for f16 (256 KB)
|
||||||
// defined in ggml.c, initialized in ggml_init()
|
// defined in ggml.c, initialized in ggml_init()
|
||||||
|
|
577
ggml-quants.c
577
ggml-quants.c
|
@ -14,47 +14,6 @@
|
||||||
#include <stdlib.h> // for qsort
|
#include <stdlib.h> // for qsort
|
||||||
#include <stdio.h> // for GGML_ASSERT
|
#include <stdio.h> // for GGML_ASSERT
|
||||||
|
|
||||||
#ifdef __ARM_NEON
|
|
||||||
|
|
||||||
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
|
||||||
//
|
|
||||||
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
|
||||||
//
|
|
||||||
#include <arm_neon.h>
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#ifdef __wasm_simd128__
|
|
||||||
#include <wasm_simd128.h>
|
|
||||||
#else
|
|
||||||
#if defined(__POWER9_VECTOR__) || defined(__powerpc64__)
|
|
||||||
#include <altivec.h>
|
|
||||||
#undef bool
|
|
||||||
#define bool _Bool
|
|
||||||
#else
|
|
||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
|
||||||
#include <intrin.h>
|
|
||||||
#else
|
|
||||||
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__) || defined(__SSE3__)
|
|
||||||
#if !defined(__riscv)
|
|
||||||
#include <immintrin.h>
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __riscv_v_intrinsic
|
|
||||||
#include <riscv_vector.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#undef MIN
|
|
||||||
#undef MAX
|
|
||||||
|
|
||||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
||||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
||||||
|
|
||||||
#define UNUSED GGML_UNUSED
|
#define UNUSED GGML_UNUSED
|
||||||
|
|
||||||
// some compilers don't provide _mm256_set_m128i, e.g. gcc 7
|
// some compilers don't provide _mm256_set_m128i, e.g. gcc 7
|
||||||
|
@ -276,258 +235,6 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128
|
||||||
#endif // __AVX__ || __AVX2__ || __AVX512F__
|
#endif // __AVX__ || __AVX2__ || __AVX512F__
|
||||||
#endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
#endif // defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) || defined(__SSSE3__)
|
||||||
|
|
||||||
#if defined(__ARM_NEON)
|
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
|
|
||||||
#define ggml_vld1q_u32(w,x,y,z) { ((w) + ((uint64_t)(x) << 32)), ((y) + ((uint64_t)(z) << 32)) }
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define ggml_vld1q_u32(w,x,y,z) { (w), (x), (y), (z) }
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__aarch64__)
|
|
||||||
|
|
||||||
// 64-bit compatibility
|
|
||||||
|
|
||||||
// vaddvq_s16
|
|
||||||
// vpaddq_s16
|
|
||||||
// vpaddq_s32
|
|
||||||
// vaddvq_s32
|
|
||||||
// vaddvq_f32
|
|
||||||
// vmaxvq_f32
|
|
||||||
// vcvtnq_s32_f32
|
|
||||||
// vzip1_u8
|
|
||||||
// vzip2_u8
|
|
||||||
|
|
||||||
inline static int32_t vaddvq_s16(int16x8_t v) {
|
|
||||||
return
|
|
||||||
(int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
|
|
||||||
(int32_t)vgetq_lane_s16(v, 2) + (int32_t)vgetq_lane_s16(v, 3) +
|
|
||||||
(int32_t)vgetq_lane_s16(v, 4) + (int32_t)vgetq_lane_s16(v, 5) +
|
|
||||||
(int32_t)vgetq_lane_s16(v, 6) + (int32_t)vgetq_lane_s16(v, 7);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static int16x8_t vpaddq_s16(int16x8_t a, int16x8_t b) {
|
|
||||||
int16x4_t a0 = vpadd_s16(vget_low_s16(a), vget_high_s16(a));
|
|
||||||
int16x4_t b0 = vpadd_s16(vget_low_s16(b), vget_high_s16(b));
|
|
||||||
return vcombine_s16(a0, b0);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static int32x4_t vpaddq_s32(int32x4_t a, int32x4_t b) {
|
|
||||||
int32x2_t a0 = vpadd_s32(vget_low_s32(a), vget_high_s32(a));
|
|
||||||
int32x2_t b0 = vpadd_s32(vget_low_s32(b), vget_high_s32(b));
|
|
||||||
return vcombine_s32(a0, b0);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static int32_t vaddvq_s32(int32x4_t v) {
|
|
||||||
return vgetq_lane_s32(v, 0) + vgetq_lane_s32(v, 1) + vgetq_lane_s32(v, 2) + vgetq_lane_s32(v, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static float vaddvq_f32(float32x4_t v) {
|
|
||||||
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static float vmaxvq_f32(float32x4_t v) {
|
|
||||||
return
|
|
||||||
MAX(MAX(vgetq_lane_f32(v, 0), vgetq_lane_f32(v, 1)),
|
|
||||||
MAX(vgetq_lane_f32(v, 2), vgetq_lane_f32(v, 3)));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
|
|
||||||
int32x4_t res;
|
|
||||||
|
|
||||||
res[0] = roundf(vgetq_lane_f32(v, 0));
|
|
||||||
res[1] = roundf(vgetq_lane_f32(v, 1));
|
|
||||||
res[2] = roundf(vgetq_lane_f32(v, 2));
|
|
||||||
res[3] = roundf(vgetq_lane_f32(v, 3));
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static uint8x8_t vzip1_u8(uint8x8_t a, uint8x8_t b) {
|
|
||||||
uint8x8_t res;
|
|
||||||
|
|
||||||
res[0] = a[0]; res[1] = b[0];
|
|
||||||
res[2] = a[1]; res[3] = b[1];
|
|
||||||
res[4] = a[2]; res[5] = b[2];
|
|
||||||
res[6] = a[3]; res[7] = b[3];
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline static uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
|
|
||||||
uint8x8_t res;
|
|
||||||
|
|
||||||
res[0] = a[4]; res[1] = b[4];
|
|
||||||
res[2] = a[5]; res[3] = b[5];
|
|
||||||
res[4] = a[6]; res[5] = b[6];
|
|
||||||
res[6] = a[7]; res[7] = b[7];
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
// vld1q_s16_x2
|
|
||||||
// vld1q_u8_x2
|
|
||||||
// vld1q_u8_x4
|
|
||||||
// vld1q_s8_x2
|
|
||||||
// vld1q_s8_x4
|
|
||||||
// TODO: double-check these work correctly
|
|
||||||
|
|
||||||
typedef struct ggml_int16x8x2_t {
|
|
||||||
int16x8_t val[2];
|
|
||||||
} ggml_int16x8x2_t;
|
|
||||||
|
|
||||||
inline static ggml_int16x8x2_t ggml_vld1q_s16_x2(const int16_t * ptr) {
|
|
||||||
ggml_int16x8x2_t res;
|
|
||||||
|
|
||||||
res.val[0] = vld1q_s16(ptr + 0);
|
|
||||||
res.val[1] = vld1q_s16(ptr + 8);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct ggml_uint8x16x2_t {
|
|
||||||
uint8x16_t val[2];
|
|
||||||
} ggml_uint8x16x2_t;
|
|
||||||
|
|
||||||
inline static ggml_uint8x16x2_t ggml_vld1q_u8_x2(const uint8_t * ptr) {
|
|
||||||
ggml_uint8x16x2_t res;
|
|
||||||
|
|
||||||
res.val[0] = vld1q_u8(ptr + 0);
|
|
||||||
res.val[1] = vld1q_u8(ptr + 16);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct ggml_uint8x16x4_t {
|
|
||||||
uint8x16_t val[4];
|
|
||||||
} ggml_uint8x16x4_t;
|
|
||||||
|
|
||||||
inline static ggml_uint8x16x4_t ggml_vld1q_u8_x4(const uint8_t * ptr) {
|
|
||||||
ggml_uint8x16x4_t res;
|
|
||||||
|
|
||||||
res.val[0] = vld1q_u8(ptr + 0);
|
|
||||||
res.val[1] = vld1q_u8(ptr + 16);
|
|
||||||
res.val[2] = vld1q_u8(ptr + 32);
|
|
||||||
res.val[3] = vld1q_u8(ptr + 48);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct ggml_int8x16x2_t {
|
|
||||||
int8x16_t val[2];
|
|
||||||
} ggml_int8x16x2_t;
|
|
||||||
|
|
||||||
inline static ggml_int8x16x2_t ggml_vld1q_s8_x2(const int8_t * ptr) {
|
|
||||||
ggml_int8x16x2_t res;
|
|
||||||
|
|
||||||
res.val[0] = vld1q_s8(ptr + 0);
|
|
||||||
res.val[1] = vld1q_s8(ptr + 16);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef struct ggml_int8x16x4_t {
|
|
||||||
int8x16_t val[4];
|
|
||||||
} ggml_int8x16x4_t;
|
|
||||||
|
|
||||||
inline static ggml_int8x16x4_t ggml_vld1q_s8_x4(const int8_t * ptr) {
|
|
||||||
ggml_int8x16x4_t res;
|
|
||||||
|
|
||||||
res.val[0] = vld1q_s8(ptr + 0);
|
|
||||||
res.val[1] = vld1q_s8(ptr + 16);
|
|
||||||
res.val[2] = vld1q_s8(ptr + 32);
|
|
||||||
res.val[3] = vld1q_s8(ptr + 48);
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: not tested
|
|
||||||
inline static int8x16_t ggml_vqtbl1q_s8(int8x16_t a, uint8x16_t b) {
|
|
||||||
int8x16_t res;
|
|
||||||
|
|
||||||
res[ 0] = a[b[ 0]];
|
|
||||||
res[ 1] = a[b[ 1]];
|
|
||||||
res[ 2] = a[b[ 2]];
|
|
||||||
res[ 3] = a[b[ 3]];
|
|
||||||
res[ 4] = a[b[ 4]];
|
|
||||||
res[ 5] = a[b[ 5]];
|
|
||||||
res[ 6] = a[b[ 6]];
|
|
||||||
res[ 7] = a[b[ 7]];
|
|
||||||
res[ 8] = a[b[ 8]];
|
|
||||||
res[ 9] = a[b[ 9]];
|
|
||||||
res[10] = a[b[10]];
|
|
||||||
res[11] = a[b[11]];
|
|
||||||
res[12] = a[b[12]];
|
|
||||||
res[13] = a[b[13]];
|
|
||||||
res[14] = a[b[14]];
|
|
||||||
res[15] = a[b[15]];
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: not tested
|
|
||||||
inline static uint8x16_t ggml_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
|
|
||||||
uint8x16_t res;
|
|
||||||
|
|
||||||
res[ 0] = a[b[ 0]];
|
|
||||||
res[ 1] = a[b[ 1]];
|
|
||||||
res[ 2] = a[b[ 2]];
|
|
||||||
res[ 3] = a[b[ 3]];
|
|
||||||
res[ 4] = a[b[ 4]];
|
|
||||||
res[ 5] = a[b[ 5]];
|
|
||||||
res[ 6] = a[b[ 6]];
|
|
||||||
res[ 7] = a[b[ 7]];
|
|
||||||
res[ 8] = a[b[ 8]];
|
|
||||||
res[ 9] = a[b[ 9]];
|
|
||||||
res[10] = a[b[10]];
|
|
||||||
res[11] = a[b[11]];
|
|
||||||
res[12] = a[b[12]];
|
|
||||||
res[13] = a[b[13]];
|
|
||||||
res[14] = a[b[14]];
|
|
||||||
res[15] = a[b[15]];
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define ggml_int16x8x2_t int16x8x2_t
|
|
||||||
#define ggml_uint8x16x2_t uint8x16x2_t
|
|
||||||
#define ggml_uint8x16x4_t uint8x16x4_t
|
|
||||||
#define ggml_int8x16x2_t int8x16x2_t
|
|
||||||
#define ggml_int8x16x4_t int8x16x4_t
|
|
||||||
|
|
||||||
#define ggml_vld1q_s16_x2 vld1q_s16_x2
|
|
||||||
#define ggml_vld1q_u8_x2 vld1q_u8_x2
|
|
||||||
#define ggml_vld1q_u8_x4 vld1q_u8_x4
|
|
||||||
#define ggml_vld1q_s8_x2 vld1q_s8_x2
|
|
||||||
#define ggml_vld1q_s8_x4 vld1q_s8_x4
|
|
||||||
#define ggml_vqtbl1q_s8 vqtbl1q_s8
|
|
||||||
#define ggml_vqtbl1q_u8 vqtbl1q_u8
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__ARM_FEATURE_DOTPROD)
|
|
||||||
|
|
||||||
inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b) {
|
|
||||||
const int16x8_t p0 = vmull_s8(vget_low_s8 (a), vget_low_s8 (b));
|
|
||||||
const int16x8_t p1 = vmull_s8(vget_high_s8(a), vget_high_s8(b));
|
|
||||||
|
|
||||||
return vaddq_s32(acc, vaddq_s32(vpaddlq_s16(p0), vpaddlq_s16(p1)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define ggml_vdotq_s32(a, b, c) vdotq_s32(a, b, c)
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__ARM_NEON) || defined(__wasm_simd128__)
|
#if defined(__ARM_NEON) || defined(__wasm_simd128__)
|
||||||
#define B1(c,s,n) 0x ## n ## c , 0x ## n ## s
|
#define B1(c,s,n) 0x ## n ## c , 0x ## n ## s
|
||||||
#define B2(c,s,n) B1(c,s,n ## c), B1(c,s,n ## s)
|
#define B2(c,s,n) B1(c,s,n ## c), B1(c,s,n ## s)
|
||||||
|
@ -12676,3 +12383,287 @@ void quantize_row_iq2_s(const float * restrict x, void * restrict vy, int64_t k)
|
||||||
block_iq2_s * restrict y = vy;
|
block_iq2_s * restrict y = vy;
|
||||||
quantize_row_iq2_s_reference(x, y, k);
|
quantize_row_iq2_s_reference(x, y, k);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool validate_float(float f, size_t i) {
|
||||||
|
if (isinf(f)) {
|
||||||
|
fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isnan(f)) {
|
||||||
|
fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isinf_fp16(ggml_fp16_t f) {
|
||||||
|
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isnan_fp16(ggml_fp16_t f) {
|
||||||
|
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool validate_fp16(ggml_fp16_t f, size_t i) {
|
||||||
|
if (isinf_fp16(f)) {
|
||||||
|
fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isnan_fp16(f)) {
|
||||||
|
fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define VALIDATE_ROW_DATA_D_F16_IMPL(type, data, nb) \
|
||||||
|
const type * q = (const type *) (data); \
|
||||||
|
for (size_t i = 0; i < (nb); ++i) { \
|
||||||
|
if (!validate_fp16(q[i].d, i)) { \
|
||||||
|
return false; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define VALIDATE_ROW_DATA_DM_F16_IMPL(type, data, nb, d, m) \
|
||||||
|
const type * q = (const type *) (data); \
|
||||||
|
for (size_t i = 0; i < (nb); ++i) { \
|
||||||
|
if (!validate_fp16(q[i].d, i) || !validate_fp16(q[i].m, i)) { \
|
||||||
|
return false; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes) {
|
||||||
|
if (type < 0 || type >= GGML_TYPE_COUNT) {
|
||||||
|
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nbytes % ggml_type_size(type) != 0) {
|
||||||
|
fprintf(stderr, "%s: invalid size %zu for type %d\n", __func__, nbytes, type);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t nb = nbytes/ggml_type_size(type);
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case GGML_TYPE_F16:
|
||||||
|
{
|
||||||
|
const ggml_fp16_t * f = (const ggml_fp16_t *) data;
|
||||||
|
size_t i = 0;
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
for (; i + 15 < nb; i += 16) {
|
||||||
|
__m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
|
||||||
|
__m256i vexp = _mm256_and_si256(v, _mm256_set1_epi16(0x7c00));
|
||||||
|
__m256i cmp = _mm256_cmpeq_epi16(vexp, _mm256_set1_epi16(0x7c00));
|
||||||
|
int mask = _mm256_movemask_epi8(cmp);
|
||||||
|
if (mask) {
|
||||||
|
for (size_t j = 0; j < 16; ++j) {
|
||||||
|
if (!validate_fp16(f[i + j], i + j)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
GGML_UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif defined(__ARM_NEON)
|
||||||
|
for (; i + 7 < nb; i += 8) {
|
||||||
|
uint16x8_t v = vld1q_u16(f + i);
|
||||||
|
uint16x8_t vexp = vandq_u16(v, vdupq_n_u16(0x7c00));
|
||||||
|
uint16x8_t cmp = vceqq_u16(vexp, vdupq_n_u16(0x7c00));
|
||||||
|
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(cmp, 4)), 0);
|
||||||
|
if (mask) {
|
||||||
|
for (size_t j = 0; j < 8; ++j) {
|
||||||
|
if (!validate_fp16(f[i + j], i + j)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
GGML_UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; i < nb; ++i) {
|
||||||
|
if (!validate_fp16(f[i], i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
{
|
||||||
|
const float * f = (const float *) data;
|
||||||
|
size_t i = 0;
|
||||||
|
#if defined(__AVX2__)
|
||||||
|
for (; i + 7 < nb; i += 8) {
|
||||||
|
__m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
|
||||||
|
__m256i vexp = _mm256_and_si256(v, _mm256_set1_epi32(0x7f800000));
|
||||||
|
__m256i cmp = _mm256_cmpeq_epi32(vexp, _mm256_set1_epi32(0x7f800000));
|
||||||
|
int mask = _mm256_movemask_epi8(cmp);
|
||||||
|
if (mask) {
|
||||||
|
for (size_t j = 0; j < 8; ++j) {
|
||||||
|
if (!validate_float(f[i + j], i + j)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
GGML_UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif defined(__ARM_NEON)
|
||||||
|
for (; i + 3 < nb; i += 4) {
|
||||||
|
uint32x4_t v = vld1q_u32((const uint32_t *)f + i);
|
||||||
|
uint32x4_t vexp = vandq_u32(v, vdupq_n_u32(0x7f800000));
|
||||||
|
uint32x4_t cmp = vceqq_u32(vexp, vdupq_n_u32(0x7f800000));
|
||||||
|
uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(vshrn_n_u32(cmp, 8)), 0);
|
||||||
|
if (mask) {
|
||||||
|
for (size_t j = 0; j < 4; ++j) {
|
||||||
|
if (!validate_float(f[i + j], i + j)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
GGML_UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
for (; i < nb; ++i) {
|
||||||
|
if (!validate_float(f[i], i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_F64:
|
||||||
|
{
|
||||||
|
const double * f = (const double *) data;
|
||||||
|
for (size_t i = 0; i < nb; ++i) {
|
||||||
|
if (!validate_float(f[i], i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q4_0:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_q4_0, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q4_1:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_1, data, nb, d, m);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q5_0:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_0, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q5_1:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_1, data, nb, d, m);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q8_0:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_q8_0, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q2_K:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q2_K, data, nb, d, dmin);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q3_K:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_q3_K, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q4_K:
|
||||||
|
{
|
||||||
|
#ifdef GGML_QKK_64
|
||||||
|
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d[0], d[1]);
|
||||||
|
#else
|
||||||
|
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d, dmin);
|
||||||
|
#endif
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q5_K:
|
||||||
|
{
|
||||||
|
#ifdef GGML_QKK_64
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_K, data, nb);
|
||||||
|
#else
|
||||||
|
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_K, data, nb, d, dmin);
|
||||||
|
#endif
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q6_K:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_q6_K, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_Q8_K:
|
||||||
|
{
|
||||||
|
const block_q8_K * q = (const block_q8_K *) data;
|
||||||
|
for (size_t i = 0; i < nb; ++i) {
|
||||||
|
if (!validate_float(q[i].d, i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_IQ1_S:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq1_s, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_IQ1_M:
|
||||||
|
{
|
||||||
|
const block_iq1_m * q = (const block_iq1_m *) data;
|
||||||
|
for (size_t i = 0; i < nb; ++i) {
|
||||||
|
#if QK_K == 64
|
||||||
|
if (!validate_fp16(q[i].d, i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
iq1m_scale_t scale;
|
||||||
|
const uint16_t * sc = (const uint16_t *)q[i].scales;
|
||||||
|
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
|
||||||
|
if (!validate_fp16(scale.f16, i)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_IQ2_XXS:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xxs, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_IQ2_XS:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xs, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_IQ2_S:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_s, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_IQ3_XXS:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_xxs, data, nb);
|
||||||
|
} break;
|
||||||
|
|
||||||
|
case GGML_TYPE_IQ3_S:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_s, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_IQ4_XS:
|
||||||
|
#if QK_K != 64
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_xs, data, nb);
|
||||||
|
} break;
|
||||||
|
#endif
|
||||||
|
// with QK_K == 64, iq4_xs is iq4_nl
|
||||||
|
case GGML_TYPE_IQ4_NL:
|
||||||
|
{
|
||||||
|
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb);
|
||||||
|
} break;
|
||||||
|
case GGML_TYPE_I8:
|
||||||
|
case GGML_TYPE_I16:
|
||||||
|
case GGML_TYPE_I32:
|
||||||
|
case GGML_TYPE_I64:
|
||||||
|
// nothing to validate
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
51
ggml.c
51
ggml.c
|
@ -858,18 +858,6 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
||||||
// simd mappings
|
// simd mappings
|
||||||
//
|
//
|
||||||
|
|
||||||
#if defined(__ARM_NEON)
|
|
||||||
#if !defined(__aarch64__)
|
|
||||||
|
|
||||||
// 64-bit compatibility
|
|
||||||
|
|
||||||
inline static float vaddvq_f32(float32x4_t v) {
|
|
||||||
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// we define a common set of C macros which map to specific intrinsics based on the current architecture
|
// we define a common set of C macros which map to specific intrinsics based on the current architecture
|
||||||
// we then implement the fundamental computation operations below using only these macros
|
// we then implement the fundamental computation operations below using only these macros
|
||||||
// adding support for new architectures requires to define the corresponding SIMD macros
|
// adding support for new architectures requires to define the corresponding SIMD macros
|
||||||
|
@ -10825,7 +10813,7 @@ static void ggml_compute_forward_mul_mat(
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if GGML_USE_LLAMAFILE
|
#if GGML_USE_LLAMAFILE
|
||||||
if (nb10 == ggml_type_size(src1->type)) {
|
if (src1_cont) {
|
||||||
for (int64_t i13 = 0; i13 < ne13; i13++)
|
for (int64_t i13 = 0; i13 < ne13; i13++)
|
||||||
for (int64_t i12 = 0; i12 < ne12; i12++)
|
for (int64_t i12 = 0; i12 < ne12; i12++)
|
||||||
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
|
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
|
||||||
|
@ -10878,15 +10866,13 @@ UseGgmlGemm1:;
|
||||||
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
|
||||||
|
|
||||||
#if GGML_USE_LLAMAFILE
|
#if GGML_USE_LLAMAFILE
|
||||||
if (nb10 == ggml_type_size(src1->type) || src1->type != vec_dot_type) {
|
if (src1->type != vec_dot_type) {
|
||||||
for (int64_t i13 = 0; i13 < ne13; i13++)
|
for (int64_t i13 = 0; i13 < ne13; i13++)
|
||||||
for (int64_t i12 = 0; i12 < ne12; i12++)
|
for (int64_t i12 = 0; i12 < ne12; i12++)
|
||||||
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
|
if (!llamafile_sgemm(ne01, ne11, ne00/ggml_blck_size(src0->type),
|
||||||
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
|
(const char *)src0->data + i12/r2*nb02 + i13/r3*nb03,
|
||||||
nb01/ggml_type_size(src0->type),
|
nb01/ggml_type_size(src0->type),
|
||||||
(const char *)wdata + ggml_row_size(vec_dot_type,
|
(const char *)wdata + (i12*ne11 + i13*ne12*ne11)*row_size,
|
||||||
nb12/ggml_type_size(src1->type)*i12 +
|
|
||||||
nb13/ggml_type_size(src1->type)*i13),
|
|
||||||
row_size/ggml_type_size(vec_dot_type),
|
row_size/ggml_type_size(vec_dot_type),
|
||||||
(char *)dst->data + i12*nb2 + i13*nb3,
|
(char *)dst->data + i12*nb2 + i13*nb3,
|
||||||
nb1/ggml_type_size(dst->type),
|
nb1/ggml_type_size(dst->type),
|
||||||
|
@ -20628,7 +20614,7 @@ static void gguf_free_kv(struct gguf_kv * kv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct gguf_context * gguf_init_empty(void) {
|
struct gguf_context * gguf_init_empty(void) {
|
||||||
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
struct gguf_context * ctx = GGML_CALLOC(1, sizeof(struct gguf_context));
|
||||||
|
|
||||||
memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
|
memcpy(ctx->header.magic, GGUF_MAGIC, sizeof(ctx->header.magic));
|
||||||
ctx->header.version = GGUF_VERSION;
|
ctx->header.version = GGUF_VERSION;
|
||||||
|
@ -20673,7 +20659,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
bool ok = true;
|
bool ok = true;
|
||||||
|
|
||||||
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
|
struct gguf_context * ctx = GGML_CALLOC(1, sizeof(struct gguf_context));
|
||||||
|
|
||||||
// read the header
|
// read the header
|
||||||
{
|
{
|
||||||
|
@ -20710,9 +20696,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
// read the kv pairs
|
// read the kv pairs
|
||||||
{
|
{
|
||||||
ctx->kv = GGML_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
|
const uint64_t n_kv = ctx->header.n_kv;
|
||||||
|
|
||||||
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
// header.n_kv will hold the actual value of pairs that were successfully read in the loop below
|
||||||
|
ctx->header.n_kv = 0;
|
||||||
|
ctx->kv = GGML_CALLOC(n_kv, sizeof(struct gguf_kv));
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < n_kv; ++i) {
|
||||||
struct gguf_kv * kv = &ctx->kv[i];
|
struct gguf_kv * kv = &ctx->kv[i];
|
||||||
|
|
||||||
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
||||||
|
@ -20761,7 +20751,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * gguf_type_size(kv->value.arr.type));
|
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
|
||||||
|
|
||||||
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
||||||
} break;
|
} break;
|
||||||
|
@ -20775,7 +20765,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * sizeof(struct gguf_str));
|
kv->value.arr.data = GGML_CALLOC(kv->value.arr.n, sizeof(struct gguf_str));
|
||||||
|
|
||||||
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
||||||
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
||||||
|
@ -20791,6 +20781,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx->header.n_kv++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
|
@ -20803,7 +20795,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
|
|
||||||
// read the tensor infos
|
// read the tensor infos
|
||||||
{
|
{
|
||||||
ctx->infos = GGML_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
ctx->infos = GGML_CALLOC(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
|
||||||
|
|
||||||
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
||||||
struct gguf_tensor_info * info = &ctx->infos[i];
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
||||||
|
@ -20824,6 +20816,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
||||||
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
||||||
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
||||||
|
|
||||||
|
// TODO: return an error instead of crashing with GGML_ASSERT
|
||||||
gguf_tensor_info_sanitize(info);
|
gguf_tensor_info_sanitize(info);
|
||||||
|
|
||||||
if (!ok) {
|
if (!ok) {
|
||||||
|
@ -20994,7 +20987,7 @@ void gguf_free(struct gguf_context * ctx) {
|
||||||
GGML_FREE(ctx->infos);
|
GGML_FREE(ctx->infos);
|
||||||
}
|
}
|
||||||
|
|
||||||
GGML_ALIGNED_FREE(ctx);
|
GGML_FREE(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * gguf_type_name(enum gguf_type type) {
|
const char * gguf_type_name(enum gguf_type type) {
|
||||||
|
@ -21305,7 +21298,7 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
|
||||||
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
||||||
ctx->kv[idx].value.arr.type = type;
|
ctx->kv[idx].value.arr.type = type;
|
||||||
ctx->kv[idx].value.arr.n = n;
|
ctx->kv[idx].value.arr.n = n;
|
||||||
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
|
ctx->kv[idx].value.arr.data = GGML_CALLOC(n, gguf_type_size(type));
|
||||||
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
|
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21315,7 +21308,7 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
|
||||||
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
||||||
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
|
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
|
||||||
ctx->kv[idx].value.arr.n = n;
|
ctx->kv[idx].value.arr.n = n;
|
||||||
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
|
ctx->kv[idx].value.arr.data = GGML_CALLOC(n, sizeof(struct gguf_str));
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
|
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
|
||||||
str->n = strlen(data[i]);
|
str->n = strlen(data[i]);
|
||||||
|
@ -21342,7 +21335,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
||||||
case GGUF_TYPE_ARRAY:
|
case GGUF_TYPE_ARRAY:
|
||||||
{
|
{
|
||||||
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
|
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
|
||||||
const char ** data = GGML_MALLOC(src->kv[i].value.arr.n*sizeof(char *));
|
const char ** data = GGML_CALLOC(src->kv[i].value.arr.n, sizeof(char *));
|
||||||
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
|
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
|
||||||
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
|
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
|
||||||
}
|
}
|
||||||
|
@ -21430,7 +21423,7 @@ struct gguf_buf {
|
||||||
|
|
||||||
static struct gguf_buf gguf_buf_init(size_t size) {
|
static struct gguf_buf gguf_buf_init(size_t size) {
|
||||||
struct gguf_buf buf = {
|
struct gguf_buf buf = {
|
||||||
/*buf.data =*/ size == 0 ? NULL : GGML_MALLOC(size),
|
/*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
|
||||||
/*buf.size =*/ size,
|
/*buf.size =*/ size,
|
||||||
/*buf.offset =*/ 0,
|
/*buf.offset =*/ 0,
|
||||||
};
|
};
|
||||||
|
|
2
ggml.h
2
ggml.h
|
@ -762,6 +762,8 @@ extern "C" {
|
||||||
// use this to compute the memory overhead of a tensor
|
// use this to compute the memory overhead of a tensor
|
||||||
GGML_API size_t ggml_tensor_overhead(void);
|
GGML_API size_t ggml_tensor_overhead(void);
|
||||||
|
|
||||||
|
GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes);
|
||||||
|
|
||||||
// main
|
// main
|
||||||
|
|
||||||
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
|
||||||
|
|
|
@ -124,6 +124,7 @@ class MODEL_ARCH(IntEnum):
|
||||||
QWEN2 = auto()
|
QWEN2 = auto()
|
||||||
QWEN2MOE = auto()
|
QWEN2MOE = auto()
|
||||||
PHI2 = auto()
|
PHI2 = auto()
|
||||||
|
PHI3 = auto()
|
||||||
PLAMO = auto()
|
PLAMO = auto()
|
||||||
CODESHELL = auto()
|
CODESHELL = auto()
|
||||||
ORION = auto()
|
ORION = auto()
|
||||||
|
@ -200,6 +201,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
MODEL_ARCH.QWEN2: "qwen2",
|
MODEL_ARCH.QWEN2: "qwen2",
|
||||||
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
||||||
MODEL_ARCH.PHI2: "phi2",
|
MODEL_ARCH.PHI2: "phi2",
|
||||||
|
MODEL_ARCH.PHI3: "phi3",
|
||||||
MODEL_ARCH.PLAMO: "plamo",
|
MODEL_ARCH.PLAMO: "plamo",
|
||||||
MODEL_ARCH.CODESHELL: "codeshell",
|
MODEL_ARCH.CODESHELL: "codeshell",
|
||||||
MODEL_ARCH.ORION: "orion",
|
MODEL_ARCH.ORION: "orion",
|
||||||
|
@ -550,6 +552,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
|
MODEL_ARCH.PHI3: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_QKV,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
MODEL_ARCH.CODESHELL: [
|
MODEL_ARCH.CODESHELL: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.POS_EMBD,
|
MODEL_TENSOR.POS_EMBD,
|
||||||
|
|
|
@ -117,6 +117,7 @@ class TensorNameMap:
|
||||||
"h.{bid}.attn.c_attn", # gpt2
|
"h.{bid}.attn.c_attn", # gpt2
|
||||||
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
||||||
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
||||||
|
"model.layers.{bid}.self_attn.qkv_proj" # phi3
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention query
|
# Attention query
|
||||||
|
@ -234,6 +235,7 @@ class TensorNameMap:
|
||||||
"h.{bid}.mlp.c_fc", # gpt2
|
"h.{bid}.mlp.c_fc", # gpt2
|
||||||
"transformer.h.{bid}.mlp.fc1", # phi2
|
"transformer.h.{bid}.mlp.fc1", # phi2
|
||||||
"model.layers.{bid}.mlp.fc1", # phi2
|
"model.layers.{bid}.mlp.fc1", # phi2
|
||||||
|
"model.layers.{bid}.mlp.gate_up_proj", # phi3
|
||||||
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
||||||
"model.layers.{bid}.feed_forward.w3", # internlm2
|
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||||
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||||
|
|
474
llama.cpp
474
llama.cpp
|
@ -75,6 +75,7 @@
|
||||||
#include <forward_list>
|
#include <forward_list>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <future>
|
||||||
#include <initializer_list>
|
#include <initializer_list>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@ -211,6 +212,7 @@ enum llm_arch {
|
||||||
LLM_ARCH_QWEN2,
|
LLM_ARCH_QWEN2,
|
||||||
LLM_ARCH_QWEN2MOE,
|
LLM_ARCH_QWEN2MOE,
|
||||||
LLM_ARCH_PHI2,
|
LLM_ARCH_PHI2,
|
||||||
|
LLM_ARCH_PHI3,
|
||||||
LLM_ARCH_PLAMO,
|
LLM_ARCH_PLAMO,
|
||||||
LLM_ARCH_CODESHELL,
|
LLM_ARCH_CODESHELL,
|
||||||
LLM_ARCH_ORION,
|
LLM_ARCH_ORION,
|
||||||
|
@ -246,6 +248,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||||
{ LLM_ARCH_QWEN2, "qwen2" },
|
{ LLM_ARCH_QWEN2, "qwen2" },
|
||||||
{ LLM_ARCH_QWEN2MOE, "qwen2moe" },
|
{ LLM_ARCH_QWEN2MOE, "qwen2moe" },
|
||||||
{ LLM_ARCH_PHI2, "phi2" },
|
{ LLM_ARCH_PHI2, "phi2" },
|
||||||
|
{ LLM_ARCH_PHI3, "phi3" },
|
||||||
{ LLM_ARCH_PLAMO, "plamo" },
|
{ LLM_ARCH_PLAMO, "plamo" },
|
||||||
{ LLM_ARCH_CODESHELL, "codeshell" },
|
{ LLM_ARCH_CODESHELL, "codeshell" },
|
||||||
{ LLM_ARCH_ORION, "orion" },
|
{ LLM_ARCH_ORION, "orion" },
|
||||||
|
@ -793,6 +796,23 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
|
||||||
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
LLM_ARCH_PHI3,
|
||||||
|
{
|
||||||
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||||
|
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||||
|
{ LLM_TENSOR_OUTPUT, "output" },
|
||||||
|
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
||||||
|
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
||||||
|
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
||||||
|
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
||||||
|
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
||||||
|
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
||||||
|
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
||||||
|
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
|
||||||
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
LLM_ARCH_PLAMO,
|
LLM_ARCH_PLAMO,
|
||||||
{
|
{
|
||||||
|
@ -2863,6 +2883,7 @@ namespace GGUFMeta {
|
||||||
case LLAMA_KV_OVERRIDE_TYPE_BOOL: return "bool";
|
case LLAMA_KV_OVERRIDE_TYPE_BOOL: return "bool";
|
||||||
case LLAMA_KV_OVERRIDE_TYPE_INT: return "int";
|
case LLAMA_KV_OVERRIDE_TYPE_INT: return "int";
|
||||||
case LLAMA_KV_OVERRIDE_TYPE_FLOAT: return "float";
|
case LLAMA_KV_OVERRIDE_TYPE_FLOAT: return "float";
|
||||||
|
case LLAMA_KV_OVERRIDE_TYPE_STR: return "str";
|
||||||
}
|
}
|
||||||
return "unknown";
|
return "unknown";
|
||||||
}
|
}
|
||||||
|
@ -2874,13 +2895,16 @@ namespace GGUFMeta {
|
||||||
__func__, override_type_to_str(ovrd->tag), ovrd->key);
|
__func__, override_type_to_str(ovrd->tag), ovrd->key);
|
||||||
switch (ovrd->tag) {
|
switch (ovrd->tag) {
|
||||||
case LLAMA_KV_OVERRIDE_TYPE_BOOL: {
|
case LLAMA_KV_OVERRIDE_TYPE_BOOL: {
|
||||||
LLAMA_LOG_INFO("%s\n", ovrd->bool_value ? "true" : "false");
|
LLAMA_LOG_INFO("%s\n", ovrd->val_bool ? "true" : "false");
|
||||||
} break;
|
} break;
|
||||||
case LLAMA_KV_OVERRIDE_TYPE_INT: {
|
case LLAMA_KV_OVERRIDE_TYPE_INT: {
|
||||||
LLAMA_LOG_INFO("%" PRId64 "\n", ovrd->int_value);
|
LLAMA_LOG_INFO("%" PRId64 "\n", ovrd->val_i64);
|
||||||
} break;
|
} break;
|
||||||
case LLAMA_KV_OVERRIDE_TYPE_FLOAT: {
|
case LLAMA_KV_OVERRIDE_TYPE_FLOAT: {
|
||||||
LLAMA_LOG_INFO("%.6f\n", ovrd->float_value);
|
LLAMA_LOG_INFO("%.6f\n", ovrd->val_f64);
|
||||||
|
} break;
|
||||||
|
case LLAMA_KV_OVERRIDE_TYPE_STR: {
|
||||||
|
LLAMA_LOG_INFO("%s\n", ovrd->val_str);
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
// Shouldn't be possible to end up here, but just in case...
|
// Shouldn't be possible to end up here, but just in case...
|
||||||
|
@ -2899,7 +2923,7 @@ namespace GGUFMeta {
|
||||||
static typename std::enable_if<std::is_same<OT, bool>::value, bool>::type
|
static typename std::enable_if<std::is_same<OT, bool>::value, bool>::type
|
||||||
try_override(OT & target, const struct llama_model_kv_override * ovrd) {
|
try_override(OT & target, const struct llama_model_kv_override * ovrd) {
|
||||||
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_BOOL, ovrd)) {
|
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_BOOL, ovrd)) {
|
||||||
target = ovrd->bool_value;
|
target = ovrd->val_bool;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -2909,7 +2933,7 @@ namespace GGUFMeta {
|
||||||
static typename std::enable_if<!std::is_same<OT, bool>::value && std::is_integral<OT>::value, bool>::type
|
static typename std::enable_if<!std::is_same<OT, bool>::value && std::is_integral<OT>::value, bool>::type
|
||||||
try_override(OT & target, const struct llama_model_kv_override * ovrd) {
|
try_override(OT & target, const struct llama_model_kv_override * ovrd) {
|
||||||
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_INT, ovrd)) {
|
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_INT, ovrd)) {
|
||||||
target = ovrd->int_value;
|
target = ovrd->val_i64;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -2919,7 +2943,7 @@ namespace GGUFMeta {
|
||||||
static typename std::enable_if<std::is_floating_point<OT>::value, bool>::type
|
static typename std::enable_if<std::is_floating_point<OT>::value, bool>::type
|
||||||
try_override(T & target, const struct llama_model_kv_override * ovrd) {
|
try_override(T & target, const struct llama_model_kv_override * ovrd) {
|
||||||
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_FLOAT, ovrd)) {
|
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_FLOAT, ovrd)) {
|
||||||
target = ovrd->float_value;
|
target = ovrd->val_f64;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -2928,12 +2952,11 @@ namespace GGUFMeta {
|
||||||
template<typename OT>
|
template<typename OT>
|
||||||
static typename std::enable_if<std::is_same<OT, std::string>::value, bool>::type
|
static typename std::enable_if<std::is_same<OT, std::string>::value, bool>::type
|
||||||
try_override(T & target, const struct llama_model_kv_override * ovrd) {
|
try_override(T & target, const struct llama_model_kv_override * ovrd) {
|
||||||
(void)target;
|
if (validate_override(LLAMA_KV_OVERRIDE_TYPE_STR, ovrd)) {
|
||||||
(void)ovrd;
|
target = ovrd->val_str;
|
||||||
if (!ovrd) { return false; }
|
return true;
|
||||||
// Currently, we should never end up here so it would be a bug if we do.
|
}
|
||||||
throw std::runtime_error(format("Unsupported attempt to override string type for metadata key %s\n",
|
return false;
|
||||||
ovrd ? ovrd->key : "NULL"));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool set(const gguf_context * ctx, const int k, T & target, const struct llama_model_kv_override * ovrd = nullptr) {
|
static bool set(const gguf_context * ctx, const int k, T & target, const struct llama_model_kv_override * ovrd = nullptr) {
|
||||||
|
@ -2966,6 +2989,7 @@ struct llama_model_loader {
|
||||||
size_t n_bytes = 0;
|
size_t n_bytes = 0;
|
||||||
|
|
||||||
bool use_mmap = false;
|
bool use_mmap = false;
|
||||||
|
bool check_tensors;
|
||||||
|
|
||||||
llama_files files;
|
llama_files files;
|
||||||
llama_ftype ftype;
|
llama_ftype ftype;
|
||||||
|
@ -2980,9 +3004,13 @@ struct llama_model_loader {
|
||||||
|
|
||||||
ggml_tensor * tensor;
|
ggml_tensor * tensor;
|
||||||
|
|
||||||
llama_tensor_weight(uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
|
llama_tensor_weight(const llama_file * file, uint16_t idx, const char * name, const struct gguf_context * gguf_ctx, ggml_tensor * tensor) : idx(idx), tensor(tensor) {
|
||||||
const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
|
const int tensor_idx = gguf_find_tensor(gguf_ctx, name);
|
||||||
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
|
offs = gguf_get_data_offset(gguf_ctx) + gguf_get_tensor_offset(gguf_ctx, tensor_idx);
|
||||||
|
|
||||||
|
if (offs + ggml_nbytes(tensor) < offs || offs + ggml_nbytes(tensor) > file->size) {
|
||||||
|
throw std::runtime_error(format("tensor '%s' data is not within the file bounds, model is corrupted or incomplete", name));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
std::vector<llama_tensor_weight> weights;
|
std::vector<llama_tensor_weight> weights;
|
||||||
|
@ -2995,7 +3023,7 @@ struct llama_model_loader {
|
||||||
std::string arch_name;
|
std::string arch_name;
|
||||||
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
||||||
|
|
||||||
llama_model_loader(const std::string & fname, bool use_mmap, const struct llama_model_kv_override * param_overrides_p) {
|
llama_model_loader(const std::string & fname, bool use_mmap, bool check_tensors, const struct llama_model_kv_override * param_overrides_p) {
|
||||||
int trace = 0;
|
int trace = 0;
|
||||||
if (getenv("LLAMA_TRACE")) {
|
if (getenv("LLAMA_TRACE")) {
|
||||||
trace = atoi(getenv("LLAMA_TRACE"));
|
trace = atoi(getenv("LLAMA_TRACE"));
|
||||||
|
@ -3021,15 +3049,15 @@ struct llama_model_loader {
|
||||||
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
|
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
|
||||||
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
|
llm_kv = LLM_KV(llm_arch_from_string(arch_name));
|
||||||
|
|
||||||
|
files.emplace_back(new llama_file(fname.c_str(), "rb"));
|
||||||
|
contexts.emplace_back(ctx);
|
||||||
|
|
||||||
// Save tensors data offset of the main file.
|
// Save tensors data offset of the main file.
|
||||||
// For subsidiary files, `meta` tensor data offset must not be used,
|
// For subsidiary files, `meta` tensor data offset must not be used,
|
||||||
// so we build a unified tensors index for weights.
|
// so we build a unified tensors index for weights.
|
||||||
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
|
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
|
||||||
weights.emplace_back(0, cur->name, meta, cur);
|
weights.emplace_back(files.back().get(), 0, cur->name, meta, cur);
|
||||||
}
|
}
|
||||||
files.emplace_back(new llama_file(fname.c_str(), "rb"));
|
|
||||||
contexts.emplace_back(ctx);
|
|
||||||
|
|
||||||
uint16_t n_split = 0;
|
uint16_t n_split = 0;
|
||||||
get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false);
|
get_key(llm_kv(LLM_KV_SPLIT_COUNT), n_split, false);
|
||||||
|
|
||||||
|
@ -3063,13 +3091,14 @@ struct llama_model_loader {
|
||||||
throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path));
|
throw std::runtime_error(format("%s: failed to load GGUF split from %s\n", __func__, split_path));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save tensors data offset info of the shard.
|
|
||||||
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
|
|
||||||
weights.emplace_back(idx, cur->name, ctx_gguf, cur);
|
|
||||||
}
|
|
||||||
files.emplace_back(new llama_file(split_path, "rb"));
|
files.emplace_back(new llama_file(split_path, "rb"));
|
||||||
contexts.emplace_back(ctx);
|
contexts.emplace_back(ctx);
|
||||||
|
|
||||||
|
// Save tensors data offset info of the shard.
|
||||||
|
for (ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
|
||||||
|
weights.emplace_back(files.back().get(), idx, cur->name, ctx_gguf, cur);
|
||||||
|
}
|
||||||
|
|
||||||
gguf_free(ctx_gguf);
|
gguf_free(ctx_gguf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3199,6 +3228,7 @@ struct llama_model_loader {
|
||||||
}
|
}
|
||||||
|
|
||||||
this->use_mmap = use_mmap;
|
this->use_mmap = use_mmap;
|
||||||
|
this->check_tensors = check_tensors;
|
||||||
}
|
}
|
||||||
|
|
||||||
~llama_model_loader() {
|
~llama_model_loader() {
|
||||||
|
@ -3278,6 +3308,10 @@ struct llama_model_loader {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const llama_tensor_weight * get_weight(int i) const {
|
||||||
|
return get_weight(get_tensor_name(i));
|
||||||
|
}
|
||||||
|
|
||||||
const llama_tensor_weight & require_weight(const char * name) const {
|
const llama_tensor_weight & require_weight(const char * name) const {
|
||||||
const llama_tensor_weight * weight = get_weight(name);
|
const llama_tensor_weight * weight = get_weight(name);
|
||||||
if (!weight) {
|
if (!weight) {
|
||||||
|
@ -3453,6 +3487,10 @@ struct llama_model_loader {
|
||||||
file->seek(w.offs, SEEK_SET);
|
file->seek(w.offs, SEEK_SET);
|
||||||
file->read_raw(cur->data, ggml_nbytes(cur));
|
file->read_raw(cur->data, ggml_nbytes(cur));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (check_tensors && !ggml_validate_row_data(cur->type, cur->data, ggml_nbytes(cur))) {
|
||||||
|
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t size_done = 0;
|
size_t size_done = 0;
|
||||||
|
@ -3469,6 +3507,8 @@ struct llama_model_loader {
|
||||||
GGML_ASSERT(size_data != 0 && "call init_mappings() first");
|
GGML_ASSERT(size_data != 0 && "call init_mappings() first");
|
||||||
|
|
||||||
std::vector<no_init<uint8_t>> read_buf;
|
std::vector<no_init<uint8_t>> read_buf;
|
||||||
|
std::vector<std::future<std::pair<ggml_tensor *, bool>>> validation_result;
|
||||||
|
|
||||||
for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur != NULL; cur = ggml_get_next_tensor(ctx, cur)) {
|
for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur != NULL; cur = ggml_get_next_tensor(ctx, cur)) {
|
||||||
const auto * weight = get_weight(ggml_get_name(cur));
|
const auto * weight = get_weight(ggml_get_name(cur));
|
||||||
if (weight == nullptr) {
|
if (weight == nullptr) {
|
||||||
|
@ -3490,37 +3530,66 @@ struct llama_model_loader {
|
||||||
if (bufs_mmap.count(weight->idx)) {
|
if (bufs_mmap.count(weight->idx)) {
|
||||||
buf_mmap = bufs_mmap.at(weight->idx);
|
buf_mmap = bufs_mmap.at(weight->idx);
|
||||||
}
|
}
|
||||||
|
uint8_t * data = (uint8_t *) mapping->addr + weight->offs;
|
||||||
|
|
||||||
|
if (check_tensors) {
|
||||||
|
validation_result.emplace_back(std::async(std::launch::async, [cur, data, n_size] {
|
||||||
|
return std::make_pair(cur, ggml_validate_row_data(cur->type, data, n_size));
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
GGML_ASSERT(buf_mmap || cur->data); // either we have a buffer to allocate the tensor in, or it is already allocated
|
GGML_ASSERT(buf_mmap || cur->data); // either we have a buffer to allocate the tensor in, or it is already allocated
|
||||||
if (buf_mmap && cur->data == nullptr) {
|
if (buf_mmap && cur->data == nullptr) {
|
||||||
ggml_backend_tensor_alloc(buf_mmap, cur, (uint8_t *) mapping->addr + weight->offs);
|
ggml_backend_tensor_alloc(buf_mmap, cur, data);
|
||||||
if (lmlocks) {
|
if (lmlocks) {
|
||||||
const auto & lmlock = lmlocks->at(weight->idx);
|
const auto & lmlock = lmlocks->at(weight->idx);
|
||||||
lmlock->grow_to(weight->offs + ggml_nbytes(cur));
|
lmlock->grow_to(weight->offs + n_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto & mmap_used = mmaps_used[weight->idx];
|
auto & mmap_used = mmaps_used[weight->idx];
|
||||||
mmap_used.first = std::min(mmap_used.first, weight->offs);
|
mmap_used.first = std::min(mmap_used.first, weight->offs);
|
||||||
mmap_used.second = std::max(mmap_used.second, weight->offs + n_size);
|
mmap_used.second = std::max(mmap_used.second, weight->offs + n_size);
|
||||||
} else {
|
} else {
|
||||||
ggml_backend_tensor_set(cur, (uint8_t *) mapping->addr + weight->offs, 0, n_size);
|
ggml_backend_tensor_set(cur, data, 0, n_size);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
GGML_ASSERT(weight->idx < files.size());
|
GGML_ASSERT(weight->idx < files.size());
|
||||||
const auto & file = files.at(weight->idx);
|
const auto & file = files.at(weight->idx);
|
||||||
if (ggml_backend_buffer_is_host(cur->buffer)) {
|
if (ggml_backend_buffer_is_host(cur->buffer)) {
|
||||||
file->seek(weight->offs, SEEK_SET);
|
file->seek(weight->offs, SEEK_SET);
|
||||||
file->read_raw(cur->data, ggml_nbytes(cur));
|
file->read_raw(cur->data, n_size);
|
||||||
|
if (check_tensors) {
|
||||||
|
validation_result.emplace_back(std::async(std::launch::async, [cur, n_size] {
|
||||||
|
return std::make_pair(cur, ggml_validate_row_data(cur->type, cur->data, n_size));
|
||||||
|
}));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
read_buf.resize(ggml_nbytes(cur));
|
read_buf.resize(n_size);
|
||||||
file->seek(weight->offs, SEEK_SET);
|
file->seek(weight->offs, SEEK_SET);
|
||||||
file->read_raw(read_buf.data(), ggml_nbytes(cur));
|
file->read_raw(read_buf.data(), n_size);
|
||||||
ggml_backend_tensor_set(cur, read_buf.data(), 0, n_size);
|
ggml_backend_tensor_set(cur, read_buf.data(), 0, n_size);
|
||||||
|
if (check_tensors && !ggml_validate_row_data(cur->type, read_buf.data(), n_size)) {
|
||||||
|
throw std::runtime_error(format("tensor '%s' has invalid data", ggml_get_name(cur)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_done += n_size;
|
size_done += n_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check validation results
|
||||||
|
bool validation_failed = false;
|
||||||
|
for (auto & future : validation_result) {
|
||||||
|
auto result = future.get();
|
||||||
|
if (!result.second) {
|
||||||
|
LLAMA_LOG_ERROR("%s: tensor '%s' has invalid data\n", __func__, ggml_get_name(result.first));
|
||||||
|
validation_failed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (validation_failed) {
|
||||||
|
throw std::runtime_error("found tensors with invalid data");
|
||||||
|
}
|
||||||
|
|
||||||
// check if this is the last call and do final cleanup
|
// check if this is the last call and do final cleanup
|
||||||
if (size_done >= size_data) {
|
if (size_done >= size_data) {
|
||||||
// unmap offloaded tensors and metadata
|
// unmap offloaded tensors and metadata
|
||||||
|
@ -3955,6 +4024,16 @@ static void llm_load_hparams(
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||||
|
|
||||||
|
switch (hparams.n_layer) {
|
||||||
|
case 24: model.type = e_model::MODEL_1B; break;
|
||||||
|
case 32: model.type = e_model::MODEL_3B; break;
|
||||||
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case LLM_ARCH_PHI3:
|
||||||
|
{
|
||||||
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
case 24: model.type = e_model::MODEL_1B; break;
|
case 24: model.type = e_model::MODEL_1B; break;
|
||||||
case 32: model.type = e_model::MODEL_3B; break;
|
case 32: model.type = e_model::MODEL_3B; break;
|
||||||
|
@ -4340,7 +4419,7 @@ static void llm_load_vocab(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
|
// find EOT token: "<|eot_id|>", "<|im_end|>", "<end_of_turn>", etc.
|
||||||
//
|
//
|
||||||
// TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
|
// TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
|
||||||
// for now, we apply this workaround to find the EOT token based on its text
|
// for now, we apply this workaround to find the EOT token based on its text
|
||||||
|
@ -4351,7 +4430,8 @@ static void llm_load_vocab(
|
||||||
// need to fix convert script
|
// need to fix convert script
|
||||||
//vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
|
//vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
|
||||||
(t.first == "<|eot_id|>" ||
|
(t.first == "<|eot_id|>" ||
|
||||||
t.first == "<|im_emd|>" ||
|
t.first == "<|im_end|>" ||
|
||||||
|
t.first == "<|end|>" ||
|
||||||
t.first == "<end_of_turn>"
|
t.first == "<end_of_turn>"
|
||||||
)
|
)
|
||||||
) {
|
) {
|
||||||
|
@ -5375,6 +5455,33 @@ static bool llm_load_tensors(
|
||||||
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff});
|
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff});
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
case LLM_ARCH_PHI3:
|
||||||
|
{
|
||||||
|
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), { n_embd, n_vocab });
|
||||||
|
|
||||||
|
// output
|
||||||
|
{
|
||||||
|
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), { n_embd });
|
||||||
|
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), { n_embd, n_vocab });
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < n_layer; ++i) {
|
||||||
|
ggml_context* ctx_layer = ctx_for_layer(i);
|
||||||
|
ggml_context* ctx_split = ctx_for_layer_split(i);
|
||||||
|
|
||||||
|
auto& layer = model.layers[i];
|
||||||
|
|
||||||
|
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), { n_embd });
|
||||||
|
|
||||||
|
layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), { n_embd, n_embd + 2 * n_embd_gqa }, false);
|
||||||
|
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd, n_embd });
|
||||||
|
|
||||||
|
layer.ffn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_NORM, "weight", i), { n_embd });
|
||||||
|
|
||||||
|
layer.ffn_down = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd });
|
||||||
|
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), { n_embd, 2 * n_ff });
|
||||||
|
}
|
||||||
|
} break;
|
||||||
case LLM_ARCH_PLAMO:
|
case LLM_ARCH_PLAMO:
|
||||||
{
|
{
|
||||||
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||||
|
@ -5909,7 +6016,7 @@ static bool llm_load_tensors(
|
||||||
// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
|
// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
|
||||||
static int llama_model_load(const std::string & fname, llama_model & model, llama_model_params & params) {
|
static int llama_model_load(const std::string & fname, llama_model & model, llama_model_params & params) {
|
||||||
try {
|
try {
|
||||||
llama_model_loader ml(fname, params.use_mmap, params.kv_overrides);
|
llama_model_loader ml(fname, params.use_mmap, params.check_tensors, params.kv_overrides);
|
||||||
|
|
||||||
model.hparams.vocab_only = params.vocab_only;
|
model.hparams.vocab_only = params.vocab_only;
|
||||||
|
|
||||||
|
@ -6326,7 +6433,7 @@ static struct ggml_tensor * llm_build_kqv(
|
||||||
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
|
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
|
||||||
cb(kq, "kq", il);
|
cb(kq, "kq", il);
|
||||||
|
|
||||||
if (model.arch == LLM_ARCH_PHI2) {
|
if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3) {
|
||||||
// for this arch, we need to perform the KQ multiplication with F32 precision, otherwise we get NaNs
|
// for this arch, we need to perform the KQ multiplication with F32 precision, otherwise we get NaNs
|
||||||
// ref: https://github.com/ggerganov/llama.cpp/pull/4490#issuecomment-1859055847
|
// ref: https://github.com/ggerganov/llama.cpp/pull/4490#issuecomment-1859055847
|
||||||
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
|
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);
|
||||||
|
@ -8967,12 +9074,140 @@ struct llm_build_context {
|
||||||
|
|
||||||
cur = ggml_add(ctx0, cur, model.output_b);
|
cur = ggml_add(ctx0, cur, model.output_b);
|
||||||
cb(cur, "result_output", -1);
|
cb(cur, "result_output", -1);
|
||||||
|
ggml_build_forward_expand(gf, cur);
|
||||||
|
return gf;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_cgraph * build_phi3() {
|
||||||
|
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
|
||||||
|
|
||||||
|
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||||
|
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
|
||||||
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
|
|
||||||
|
struct ggml_tensor * cur;
|
||||||
|
struct ggml_tensor * inpL;
|
||||||
|
|
||||||
|
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
||||||
|
|
||||||
|
// inp_pos - contains the positions
|
||||||
|
struct ggml_tensor * inp_pos = build_inp_pos();
|
||||||
|
|
||||||
|
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||||
|
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
||||||
|
|
||||||
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
|
auto residual = inpL;
|
||||||
|
|
||||||
|
// self-attention
|
||||||
|
{
|
||||||
|
struct ggml_tensor* attn_norm_output = llm_build_norm(ctx0, inpL, hparams,
|
||||||
|
model.layers[il].attn_norm,
|
||||||
|
NULL,
|
||||||
|
LLM_NORM_RMS, cb, il);
|
||||||
|
cb(attn_norm_output, "attn_norm", il);
|
||||||
|
|
||||||
|
struct ggml_tensor * Qcur = nullptr;
|
||||||
|
struct ggml_tensor * Kcur = nullptr;
|
||||||
|
struct ggml_tensor * Vcur = nullptr;
|
||||||
|
|
||||||
|
if (model.layers[il].wqkv) {
|
||||||
|
cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, attn_norm_output);
|
||||||
|
cb(cur, "wqkv", il);
|
||||||
|
|
||||||
|
Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0 * sizeof(float) * (n_embd)));
|
||||||
|
Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1 * sizeof(float) * (n_embd)));
|
||||||
|
Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1 * sizeof(float) * (n_embd + n_embd_gqa)));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Qcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, attn_norm_output), model.layers[il].bq);
|
||||||
|
Kcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, attn_norm_output), model.layers[il].bk);
|
||||||
|
Vcur = ggml_add(ctx0, ggml_mul_mat(ctx0, model.layers[il].wv, attn_norm_output), model.layers[il].bv);
|
||||||
|
}
|
||||||
|
|
||||||
|
cb(Qcur, "Qcur", il);
|
||||||
|
cb(Kcur, "Kcur", il);
|
||||||
|
cb(Vcur, "Vcur", il);
|
||||||
|
|
||||||
|
Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
|
||||||
|
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
|
||||||
|
|
||||||
|
Qcur = ggml_rope_custom(
|
||||||
|
ctx0, Qcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
||||||
|
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
||||||
|
);
|
||||||
|
cb(Qcur, "Qcur", il);
|
||||||
|
|
||||||
|
Qcur = ggml_scale(ctx0, Qcur, 1.0f / sqrtf(float(n_embd_head)));
|
||||||
|
cb(Qcur, "Qcur", il);
|
||||||
|
|
||||||
|
Kcur = ggml_rope_custom(
|
||||||
|
ctx0, Kcur, inp_pos, n_rot, rope_type, 0, n_orig_ctx,
|
||||||
|
freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow
|
||||||
|
);
|
||||||
|
cb(Kcur, "Kcur", il);
|
||||||
|
|
||||||
|
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
|
||||||
|
model.layers[il].wo, NULL,
|
||||||
|
Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f, cb, il);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (il == n_layer - 1) {
|
||||||
|
// skip computing output for unused tokens
|
||||||
|
struct ggml_tensor* inp_out_ids = build_inp_out_ids();
|
||||||
|
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
||||||
|
residual = ggml_get_rows(ctx0, residual, inp_out_ids);
|
||||||
|
}
|
||||||
|
|
||||||
|
cur = ggml_add(ctx0, cur, residual);
|
||||||
|
residual = cur;
|
||||||
|
|
||||||
|
cur = llm_build_norm(ctx0, cur, hparams,
|
||||||
|
model.layers[il].ffn_norm, NULL,
|
||||||
|
LLM_NORM_RMS, cb, il);
|
||||||
|
cb(cur, "ffn_norm", il);
|
||||||
|
|
||||||
|
// FF
|
||||||
|
// special-case: the up and gate tensors are merged into a single tensor
|
||||||
|
// TOOD: support into llm_build_ffn
|
||||||
|
{
|
||||||
|
struct ggml_tensor* up = ggml_mul_mat(ctx0, model.layers[il].ffn_up, cur);
|
||||||
|
cb(up, "ffn_up", il);
|
||||||
|
|
||||||
|
auto g = ggml_cont(ctx0, ggml_view_2d(ctx0, up, up->ne[0] / 2, up->ne[1], ggml_row_size(up->type, up->ne[0]), 0));
|
||||||
|
auto y = ggml_cont(ctx0, ggml_view_2d(ctx0, up, up->ne[0] / 2, up->ne[1], ggml_row_size(up->type, up->ne[0]), up->nb[1] / 2));
|
||||||
|
|
||||||
|
y = ggml_mul(ctx0, y, ggml_silu(ctx0, g));
|
||||||
|
cb(y, "ffn_gate", il);
|
||||||
|
|
||||||
|
auto down = ggml_mul_mat(ctx0, model.layers[il].ffn_down, y);
|
||||||
|
cb(down, "ffn_down", il);
|
||||||
|
|
||||||
|
cur = down;
|
||||||
|
cb(cur, "ffn_out", il);
|
||||||
|
}
|
||||||
|
|
||||||
|
cur = ggml_add(ctx0, residual, cur);
|
||||||
|
cb(cur, "l_out", il);
|
||||||
|
|
||||||
|
inpL = cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
cur = llm_build_norm(ctx0, inpL, hparams,
|
||||||
|
model.output_norm,
|
||||||
|
NULL,
|
||||||
|
LLM_NORM_RMS, cb, -1);
|
||||||
|
cb(cur, "result_norm", -1);
|
||||||
|
|
||||||
|
cur = ggml_mul_mat(ctx0, model.output, cur);
|
||||||
|
cb(cur, "result_output", -1);
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, cur);
|
ggml_build_forward_expand(gf, cur);
|
||||||
|
|
||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct ggml_cgraph * build_plamo() {
|
struct ggml_cgraph * build_plamo() {
|
||||||
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
|
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
|
||||||
|
|
||||||
|
@ -10474,6 +10709,10 @@ static struct ggml_cgraph * llama_build_graph(
|
||||||
{
|
{
|
||||||
result = llm.build_phi2();
|
result = llm.build_phi2();
|
||||||
} break;
|
} break;
|
||||||
|
case LLM_ARCH_PHI3:
|
||||||
|
{
|
||||||
|
result = llm.build_phi3();
|
||||||
|
} break;
|
||||||
case LLM_ARCH_PLAMO:
|
case LLM_ARCH_PLAMO:
|
||||||
{
|
{
|
||||||
result = llm.build_plamo();
|
result = llm.build_plamo();
|
||||||
|
@ -11234,6 +11473,10 @@ static int llama_decode_internal(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reset state for the next token before backend sync, to allow the CPU activities in the reset to
|
||||||
|
// overlap with device computation.
|
||||||
|
ggml_backend_sched_reset(lctx.sched);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13497,7 +13740,7 @@ llama_token llama_sample_token_greedy(struct llama_context * ctx, llama_token_da
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates) {
|
llama_token llama_sample_token_with_rng(struct llama_context * ctx, llama_token_data_array * candidates, std::mt19937 & rng) {
|
||||||
GGML_ASSERT(ctx);
|
GGML_ASSERT(ctx);
|
||||||
|
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
@ -13510,7 +13753,6 @@ llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_arra
|
||||||
}
|
}
|
||||||
|
|
||||||
std::discrete_distribution<> dist(probs.begin(), probs.end());
|
std::discrete_distribution<> dist(probs.begin(), probs.end());
|
||||||
auto & rng = ctx->rng;
|
|
||||||
int idx = dist(rng);
|
int idx = dist(rng);
|
||||||
|
|
||||||
llama_token result = candidates->data[idx].id;
|
llama_token result = candidates->data[idx].id;
|
||||||
|
@ -13520,6 +13762,10 @@ llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_arra
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates) {
|
||||||
|
return llama_sample_token_with_rng(ctx, candidates, ctx->rng);
|
||||||
|
}
|
||||||
|
|
||||||
void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token) {
|
void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token) {
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
|
@ -14180,14 +14426,20 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
|
static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const float * f32_data, void * new_data, const int64_t chunk_size, int64_t nrows, int64_t n_per_row, const float * imatrix, std::vector<std::thread> & workers, const int nthread) {
|
||||||
|
if (nthread < 2) {
|
||||||
|
// single-thread
|
||||||
|
size_t new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
|
||||||
|
if (!ggml_validate_row_data(new_type, new_data, new_size)) {
|
||||||
|
throw std::runtime_error("quantized data validation failed");
|
||||||
|
}
|
||||||
|
return new_size;
|
||||||
|
}
|
||||||
|
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
int64_t counter = 0;
|
int64_t counter = 0;
|
||||||
size_t new_size = 0;
|
size_t new_size = 0;
|
||||||
if (nthread < 2) {
|
bool valid = true;
|
||||||
// single-thread
|
auto compute = [&mutex, &counter, &new_size, &valid, new_type, f32_data, new_data, chunk_size,
|
||||||
return ggml_quantize_chunk(new_type, f32_data, new_data, 0, nrows, n_per_row, imatrix);
|
|
||||||
}
|
|
||||||
auto compute = [&mutex, &counter, &new_size, new_type, f32_data, new_data, chunk_size,
|
|
||||||
nrows, n_per_row, imatrix]() {
|
nrows, n_per_row, imatrix]() {
|
||||||
const int64_t nrows_per_chunk = chunk_size / n_per_row;
|
const int64_t nrows_per_chunk = chunk_size / n_per_row;
|
||||||
size_t local_size = 0;
|
size_t local_size = 0;
|
||||||
|
@ -14202,7 +14454,17 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
|
||||||
}
|
}
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
const int64_t this_nrow = std::min(nrows - first_row, nrows_per_chunk);
|
const int64_t this_nrow = std::min(nrows - first_row, nrows_per_chunk);
|
||||||
local_size += ggml_quantize_chunk(new_type, f32_data, new_data, first_row * n_per_row, this_nrow, n_per_row, imatrix);
|
size_t this_size = ggml_quantize_chunk(new_type, f32_data, new_data, first_row * n_per_row, this_nrow, n_per_row, imatrix);
|
||||||
|
local_size += this_size;
|
||||||
|
|
||||||
|
// validate the quantized data
|
||||||
|
const size_t row_size = ggml_row_size(new_type, n_per_row);
|
||||||
|
void * this_data = (char *) new_data + first_row * row_size;
|
||||||
|
if (!ggml_validate_row_data(new_type, this_data, this_size)) {
|
||||||
|
std::unique_lock<std::mutex> lock(mutex);
|
||||||
|
valid = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
for (int it = 0; it < nthread - 1; ++it) {
|
for (int it = 0; it < nthread - 1; ++it) {
|
||||||
|
@ -14211,6 +14473,9 @@ static size_t llama_tensor_quantize_internal(enum ggml_type new_type, const floa
|
||||||
compute();
|
compute();
|
||||||
for (auto & w : workers) { w.join(); }
|
for (auto & w : workers) { w.join(); }
|
||||||
workers.clear();
|
workers.clear();
|
||||||
|
if (!valid) {
|
||||||
|
throw std::runtime_error("quantized data validation failed");
|
||||||
|
}
|
||||||
return new_size;
|
return new_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14273,7 +14538,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
auto v = (std::vector<llama_model_kv_override>*)params->kv_overrides;
|
auto v = (std::vector<llama_model_kv_override>*)params->kv_overrides;
|
||||||
kv_overrides = v->data();
|
kv_overrides = v->data();
|
||||||
}
|
}
|
||||||
llama_model_loader ml(fname_inp, use_mmap, kv_overrides);
|
llama_model_loader ml(fname_inp, use_mmap, /*check_tensors*/ true, kv_overrides);
|
||||||
ml.init_mappings(false); // no prefetching
|
ml.init_mappings(false); // no prefetching
|
||||||
|
|
||||||
llama_model model;
|
llama_model model;
|
||||||
|
@ -14311,11 +14576,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
for (auto & o : overrides) {
|
for (auto & o : overrides) {
|
||||||
if (o.key[0] == 0) break;
|
if (o.key[0] == 0) break;
|
||||||
if (o.tag == LLAMA_KV_OVERRIDE_TYPE_FLOAT) {
|
if (o.tag == LLAMA_KV_OVERRIDE_TYPE_FLOAT) {
|
||||||
gguf_set_val_f32(ctx_out, o.key, o.float_value);
|
gguf_set_val_f32(ctx_out, o.key, o.val_f64);
|
||||||
} else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_INT) {
|
} else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_INT) {
|
||||||
gguf_set_val_i32(ctx_out, o.key, o.int_value);
|
gguf_set_val_i32(ctx_out, o.key, o.val_i64);
|
||||||
} else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_BOOL) {
|
} else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_BOOL) {
|
||||||
gguf_set_val_bool(ctx_out, o.key, o.bool_value);
|
gguf_set_val_bool(ctx_out, o.key, o.val_bool);
|
||||||
|
} else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_STR) {
|
||||||
|
gguf_set_val_str(ctx_out, o.key, o.val_str);
|
||||||
} else {
|
} else {
|
||||||
LLAMA_LOG_WARN("%s: unknown KV override type for key %s\n", __func__, o.key);
|
LLAMA_LOG_WARN("%s: unknown KV override type for key %s\n", __func__, o.key);
|
||||||
}
|
}
|
||||||
|
@ -14357,26 +14624,74 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
std::vector<no_init<uint8_t>> work;
|
std::vector<no_init<uint8_t>> work;
|
||||||
std::vector<no_init<float>> f32_conv_buf;
|
std::vector<no_init<float>> f32_conv_buf;
|
||||||
|
|
||||||
|
uint16_t n_split = 1;
|
||||||
|
// Assume split index is continuous
|
||||||
|
if (params->keep_split) {
|
||||||
|
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||||
|
n_split = std::max(uint16_t(ml.get_weight(i)->idx+1), n_split);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::vector<gguf_context*> ctx_outs(n_split, NULL);
|
||||||
|
ctx_outs[0] = ctx_out;
|
||||||
|
|
||||||
// populate the original tensors so we get an initial meta data
|
// populate the original tensors so we get an initial meta data
|
||||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||||
const struct ggml_tensor * meta = ml.get_tensor_meta(i);
|
auto weight = ml.get_weight(i);
|
||||||
gguf_add_tensor(ctx_out, meta);
|
uint16_t i_split = params->keep_split ? weight->idx : 0;
|
||||||
|
struct ggml_tensor * tensor = weight->tensor;
|
||||||
|
if (ctx_outs[i_split] == NULL) {
|
||||||
|
ctx_outs[i_split] = gguf_init_empty();
|
||||||
|
}
|
||||||
|
gguf_add_tensor(ctx_outs[i_split], tensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ofstream fout(fname_out, std::ios::binary);
|
// Set split info if needed
|
||||||
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
if (n_split > 1) {
|
||||||
|
for (size_t i = 0; i < ctx_outs.size(); ++i) {
|
||||||
|
gguf_set_val_u16(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_NO).c_str(), i);
|
||||||
|
gguf_set_val_u16(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str(), n_split);
|
||||||
|
gguf_set_val_i32(ctx_outs[i], ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str(), ml.n_tensors);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const size_t meta_size = gguf_get_meta_size(ctx_out);
|
int cur_split = -1;
|
||||||
|
std::ofstream fout;
|
||||||
|
auto close_ofstream = [&]() {
|
||||||
|
// Write metadata and close file handler
|
||||||
|
if (fout.is_open()) {
|
||||||
|
fout.seekp(0);
|
||||||
|
std::vector<uint8_t> data(gguf_get_meta_size(ctx_outs[cur_split]));
|
||||||
|
gguf_get_meta_data(ctx_outs[cur_split], data.data());
|
||||||
|
fout.write((const char *) data.data(), data.size());
|
||||||
|
fout.close();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
auto new_ofstream = [&](int index) {
|
||||||
|
cur_split = index;
|
||||||
|
GGML_ASSERT(ctx_outs[cur_split] && "Find uninitialized gguf_context");
|
||||||
|
std::string fname = fname_out;
|
||||||
|
if (params->keep_split) {
|
||||||
|
char split_path[PATH_MAX] = {0};
|
||||||
|
llama_split_path(split_path, sizeof(split_path), fname_out.c_str(), cur_split, n_split);
|
||||||
|
fname = std::string(split_path);
|
||||||
|
}
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: meta size = %zu bytes\n", __func__, meta_size);
|
fout = std::ofstream(fname, std::ios::binary);
|
||||||
|
fout.exceptions(std::ofstream::failbit); // fail fast on write errors
|
||||||
// placeholder for the meta data
|
const size_t meta_size = gguf_get_meta_size(ctx_outs[cur_split]);
|
||||||
::zeros(fout, meta_size);
|
// placeholder for the meta data
|
||||||
|
::zeros(fout, meta_size);
|
||||||
|
};
|
||||||
|
|
||||||
const auto tn = LLM_TN(model.arch);
|
const auto tn = LLM_TN(model.arch);
|
||||||
|
new_ofstream(0);
|
||||||
for (int i = 0; i < ml.n_tensors; ++i) {
|
for (int i = 0; i < ml.n_tensors; ++i) {
|
||||||
struct ggml_tensor * tensor = ml.get_tensor_meta(i);
|
auto weight = ml.get_weight(i);
|
||||||
|
struct ggml_tensor * tensor = weight->tensor;
|
||||||
|
if (weight->idx != cur_split && params->keep_split) {
|
||||||
|
close_ofstream();
|
||||||
|
new_ofstream(weight->idx);
|
||||||
|
}
|
||||||
|
|
||||||
const std::string name = ggml_get_name(tensor);
|
const std::string name = ggml_get_name(tensor);
|
||||||
|
|
||||||
|
@ -14531,26 +14846,18 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||||
total_size_new += new_size;
|
total_size_new += new_size;
|
||||||
|
|
||||||
// update the gguf meta data as we go
|
// update the gguf meta data as we go
|
||||||
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
|
gguf_set_tensor_type(ctx_outs[cur_split], name.c_str(), new_type);
|
||||||
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
|
gguf_set_tensor_data(ctx_outs[cur_split], name.c_str(), new_data, new_size);
|
||||||
|
|
||||||
// write tensor data + padding
|
// write tensor data + padding
|
||||||
fout.write((const char *) new_data, new_size);
|
fout.write((const char *) new_data, new_size);
|
||||||
zeros(fout, GGML_PAD(new_size, align) - new_size);
|
zeros(fout, GGML_PAD(new_size, align) - new_size);
|
||||||
}
|
}
|
||||||
|
close_ofstream();
|
||||||
// go back to beginning of file and write the updated meta data
|
for (auto & c:ctx_outs) {
|
||||||
{
|
gguf_free(c);
|
||||||
fout.seekp(0);
|
|
||||||
std::vector<uint8_t> data(gguf_get_meta_size(ctx_out));
|
|
||||||
gguf_get_meta_data(ctx_out, data.data());
|
|
||||||
fout.write((const char *) data.data(), data.size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fout.close();
|
|
||||||
|
|
||||||
gguf_free(ctx_out);
|
|
||||||
|
|
||||||
LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
|
LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
|
||||||
LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0);
|
LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0);
|
||||||
|
|
||||||
|
@ -14594,7 +14901,7 @@ static int llama_apply_lora_from_file_internal(
|
||||||
std::unique_ptr<llama_model_loader> ml;
|
std::unique_ptr<llama_model_loader> ml;
|
||||||
if (path_base_model) {
|
if (path_base_model) {
|
||||||
LLAMA_LOG_INFO("%s: loading base model from '%s'\n", __func__, path_base_model);
|
LLAMA_LOG_INFO("%s: loading base model from '%s'\n", __func__, path_base_model);
|
||||||
ml.reset(new llama_model_loader(path_base_model, /*use_mmap*/ true, /*kv_overrides*/ nullptr));
|
ml.reset(new llama_model_loader(path_base_model, /*use_mmap*/ true, /*check_tensors*/ false, /*kv_overrides*/ nullptr));
|
||||||
ml->init_mappings(/*prefetch*/ false); // no prefetching
|
ml->init_mappings(/*prefetch*/ false); // no prefetching
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14853,6 +15160,7 @@ struct llama_model_params llama_model_default_params() {
|
||||||
/*.vocab_only =*/ false,
|
/*.vocab_only =*/ false,
|
||||||
/*.use_mmap =*/ true,
|
/*.use_mmap =*/ true,
|
||||||
/*.use_mlock =*/ false,
|
/*.use_mlock =*/ false,
|
||||||
|
/*.check_tensors =*/ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
|
@ -14906,6 +15214,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
|
||||||
/*.quantize_output_tensor =*/ true,
|
/*.quantize_output_tensor =*/ true,
|
||||||
/*.only_copy =*/ false,
|
/*.only_copy =*/ false,
|
||||||
/*.pure =*/ false,
|
/*.pure =*/ false,
|
||||||
|
/*.keep_split =*/ false,
|
||||||
/*.imatrix =*/ nullptr,
|
/*.imatrix =*/ nullptr,
|
||||||
/*.kv_overrides =*/ nullptr,
|
/*.kv_overrides =*/ nullptr,
|
||||||
};
|
};
|
||||||
|
@ -15414,6 +15723,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||||
case LLM_ARCH_QWEN2:
|
case LLM_ARCH_QWEN2:
|
||||||
case LLM_ARCH_QWEN2MOE:
|
case LLM_ARCH_QWEN2MOE:
|
||||||
case LLM_ARCH_PHI2:
|
case LLM_ARCH_PHI2:
|
||||||
|
case LLM_ARCH_PHI3:
|
||||||
case LLM_ARCH_GEMMA:
|
case LLM_ARCH_GEMMA:
|
||||||
case LLM_ARCH_STARCODER2:
|
case LLM_ARCH_STARCODER2:
|
||||||
return LLAMA_ROPE_TYPE_NEOX;
|
return LLAMA_ROPE_TYPE_NEOX;
|
||||||
|
@ -15427,6 +15737,10 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||||
return LLAMA_ROPE_TYPE_NONE;
|
return LLAMA_ROPE_TYPE_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx) {
|
||||||
|
return ctx->cparams.pooling_type;
|
||||||
|
}
|
||||||
|
|
||||||
int32_t llama_n_vocab(const struct llama_model * model) {
|
int32_t llama_n_vocab(const struct llama_model * model) {
|
||||||
return model->hparams.n_vocab;
|
return model->hparams.n_vocab;
|
||||||
}
|
}
|
||||||
|
@ -15905,6 +16219,8 @@ struct llama_data_file_context : llama_data_context {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static void llama_state_get_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
|
static void llama_state_get_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
|
||||||
|
llama_synchronize(ctx);
|
||||||
|
|
||||||
// copy rng
|
// copy rng
|
||||||
{
|
{
|
||||||
std::ostringstream rng_ss;
|
std::ostringstream rng_ss;
|
||||||
|
@ -16057,6 +16373,8 @@ size_t llama_state_get_data(struct llama_context * ctx, uint8_t * dst) {
|
||||||
|
|
||||||
// Sets the state reading from the specified source address
|
// Sets the state reading from the specified source address
|
||||||
size_t llama_state_set_data(struct llama_context * ctx, const uint8_t * src) {
|
size_t llama_state_set_data(struct llama_context * ctx, const uint8_t * src) {
|
||||||
|
llama_synchronize(ctx);
|
||||||
|
|
||||||
const uint8_t * inp = src;
|
const uint8_t * inp = src;
|
||||||
|
|
||||||
// set rng
|
// set rng
|
||||||
|
@ -16361,6 +16679,8 @@ size_t llama_state_seq_get_size(struct llama_context* ctx, llama_seq_id seq_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t llama_state_seq_get_data_internal(struct llama_context * ctx, llama_data_context & data_ctx, llama_seq_id seq_id) {
|
static size_t llama_state_seq_get_data_internal(struct llama_context * ctx, llama_data_context & data_ctx, llama_seq_id seq_id) {
|
||||||
|
llama_synchronize(ctx);
|
||||||
|
|
||||||
const auto & kv_self = ctx->kv_self;
|
const auto & kv_self = ctx->kv_self;
|
||||||
GGML_ASSERT(!kv_self.recurrent); // not implemented
|
GGML_ASSERT(!kv_self.recurrent); // not implemented
|
||||||
|
|
||||||
|
@ -16478,6 +16798,8 @@ size_t llama_state_seq_get_data(struct llama_context* ctx, uint8_t* dst, llama_s
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src, llama_seq_id dest_seq_id) {
|
size_t llama_state_seq_set_data(struct llama_context * ctx, const uint8_t * src, llama_seq_id dest_seq_id) {
|
||||||
|
llama_synchronize(ctx);
|
||||||
|
|
||||||
auto & kv_self = ctx->kv_self;
|
auto & kv_self = ctx->kv_self;
|
||||||
GGML_ASSERT(!kv_self.recurrent); // not implemented
|
GGML_ASSERT(!kv_self.recurrent); // not implemented
|
||||||
|
|
||||||
|
@ -17278,6 +17600,15 @@ static int32_t llama_chat_apply_template_internal(
|
||||||
if (add_ass) {
|
if (add_ass) {
|
||||||
ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
|
ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
|
||||||
}
|
}
|
||||||
|
} else if (tmpl == "phi3" || (tmpl.find("<|assistant|>") != std::string::npos && tmpl.find("<|end|>") != std::string::npos )) {
|
||||||
|
// Phi 3
|
||||||
|
for (auto message : chat) {
|
||||||
|
std::string role(message->role);
|
||||||
|
ss << "<|" << role << "|>\n" << trim(message->content) << "<|end|>\n";
|
||||||
|
}
|
||||||
|
if (add_ass) {
|
||||||
|
ss << "<|assistant|>\n";
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// template not supported
|
// template not supported
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -17410,6 +17741,11 @@ const char * llama_print_system_info(void) {
|
||||||
s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | ";
|
s += "SSSE3 = " + std::to_string(ggml_cpu_has_ssse3()) + " | ";
|
||||||
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
|
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
|
||||||
s += "MATMUL_INT8 = " + std::to_string(ggml_cpu_has_matmul_int8()) + " | ";
|
s += "MATMUL_INT8 = " + std::to_string(ggml_cpu_has_matmul_int8()) + " | ";
|
||||||
|
#ifdef GGML_USE_LLAMAFILE
|
||||||
|
s += "LAMMAFILE = 1 | ";
|
||||||
|
#else
|
||||||
|
s += "LAMMAFILE = 0 | ";
|
||||||
|
#endif
|
||||||
|
|
||||||
return s.c_str();
|
return s.c_str();
|
||||||
}
|
}
|
||||||
|
|
35
llama.h
35
llama.h
|
@ -195,15 +195,19 @@ extern "C" {
|
||||||
LLAMA_KV_OVERRIDE_TYPE_INT,
|
LLAMA_KV_OVERRIDE_TYPE_INT,
|
||||||
LLAMA_KV_OVERRIDE_TYPE_FLOAT,
|
LLAMA_KV_OVERRIDE_TYPE_FLOAT,
|
||||||
LLAMA_KV_OVERRIDE_TYPE_BOOL,
|
LLAMA_KV_OVERRIDE_TYPE_BOOL,
|
||||||
|
LLAMA_KV_OVERRIDE_TYPE_STR,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llama_model_kv_override {
|
struct llama_model_kv_override {
|
||||||
char key[128];
|
|
||||||
enum llama_model_kv_override_type tag;
|
enum llama_model_kv_override_type tag;
|
||||||
|
|
||||||
|
char key[128];
|
||||||
|
|
||||||
union {
|
union {
|
||||||
int64_t int_value;
|
int64_t val_i64;
|
||||||
double float_value;
|
double val_f64;
|
||||||
bool bool_value;
|
bool val_bool;
|
||||||
|
char val_str[128];
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -232,9 +236,10 @@ extern "C" {
|
||||||
const struct llama_model_kv_override * kv_overrides;
|
const struct llama_model_kv_override * kv_overrides;
|
||||||
|
|
||||||
// Keep the booleans together to avoid misalignment during copy-by-value.
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
||||||
bool vocab_only; // only load the vocabulary, no weights
|
bool vocab_only; // only load the vocabulary, no weights
|
||||||
bool use_mmap; // use mmap if possible
|
bool use_mmap; // use mmap if possible
|
||||||
bool use_mlock; // force system to keep model in RAM
|
bool use_mlock; // force system to keep model in RAM
|
||||||
|
bool check_tensors; // validate model tensor data
|
||||||
};
|
};
|
||||||
|
|
||||||
struct llama_context_params {
|
struct llama_context_params {
|
||||||
|
@ -288,6 +293,7 @@ extern "C" {
|
||||||
bool quantize_output_tensor; // quantize output.weight
|
bool quantize_output_tensor; // quantize output.weight
|
||||||
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
|
||||||
bool pure; // quantize all tensors to the default type
|
bool pure; // quantize all tensors to the default type
|
||||||
|
bool keep_split; // quantize to the same number of shards
|
||||||
void * imatrix; // pointer to importance matrix data
|
void * imatrix; // pointer to importance matrix data
|
||||||
void * kv_overrides; // pointer to vector containing overrides
|
void * kv_overrides; // pointer to vector containing overrides
|
||||||
} llama_model_quantize_params;
|
} llama_model_quantize_params;
|
||||||
|
@ -390,8 +396,10 @@ extern "C" {
|
||||||
LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx);
|
LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx);
|
||||||
LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx);
|
LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx);
|
||||||
|
|
||||||
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx);
|
||||||
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
|
||||||
|
LLAMA_API enum llama_vocab_type llama_vocab_type (const struct llama_model * model);
|
||||||
|
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
||||||
|
|
||||||
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
LLAMA_API int32_t llama_n_vocab (const struct llama_model * model);
|
||||||
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model);
|
||||||
|
@ -987,7 +995,7 @@ extern "C" {
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
llama_token_data_array * candidates);
|
llama_token_data_array * candidates);
|
||||||
|
|
||||||
/// @details Randomly selects a token from the candidates based on their probabilities.
|
/// @details Randomly selects a token from the candidates based on their probabilities using the RNG of ctx.
|
||||||
LLAMA_API llama_token llama_sample_token(
|
LLAMA_API llama_token llama_sample_token(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
llama_token_data_array * candidates);
|
llama_token_data_array * candidates);
|
||||||
|
@ -1074,8 +1082,9 @@ extern "C" {
|
||||||
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
|
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
|
||||||
#ifdef LLAMA_API_INTERNAL
|
#ifdef LLAMA_API_INTERNAL
|
||||||
|
|
||||||
#include <vector>
|
#include <random>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
struct ggml_tensor;
|
struct ggml_tensor;
|
||||||
|
|
||||||
|
@ -1117,6 +1126,10 @@ std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
|
||||||
const std::string & src,
|
const std::string & src,
|
||||||
llama_partial_utf8 partial_start);
|
llama_partial_utf8 partial_start);
|
||||||
|
|
||||||
|
// Randomly selects a token from the candidates based on their probabilities using given std::mt19937.
|
||||||
|
// This is a temporary workaround in order to fix race conditions when sampling with multiple sequences.
|
||||||
|
llama_token llama_sample_token_with_rng(struct llama_context * ctx, llama_token_data_array * candidates, std::mt19937 & rng);
|
||||||
|
|
||||||
#endif // LLAMA_API_INTERNAL
|
#endif // LLAMA_API_INTERNAL
|
||||||
|
|
||||||
#endif // LLAMA_H
|
#endif // LLAMA_H
|
||||||
|
|
BIN
media/matmul.png
Normal file
BIN
media/matmul.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 260 KiB |
1238
media/matmul.svg
Normal file
1238
media/matmul.svg
Normal file
File diff suppressed because it is too large
Load diff
After Width: | Height: | Size: 51 KiB |
16
scripts/xxd.cmake
Normal file
16
scripts/xxd.cmake
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# CMake equivalent of `xxd -i ${INPUT} ${OUTPUT}`
|
||||||
|
# Usage: cmake -DINPUT=examples/server/public/index.html -DOUTPUT=examples/server/index.html.hpp -P scripts/xxd.cmake
|
||||||
|
|
||||||
|
SET(INPUT "" CACHE STRING "Input File")
|
||||||
|
SET(OUTPUT "" CACHE STRING "Output File")
|
||||||
|
|
||||||
|
get_filename_component(filename "${INPUT}" NAME)
|
||||||
|
string(REGEX REPLACE "\\.|-" "_" name "${filename}")
|
||||||
|
|
||||||
|
file(READ "${INPUT}" hex_data HEX)
|
||||||
|
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," hex_sequence "${hex_data}")
|
||||||
|
|
||||||
|
string(LENGTH ${hex_data} hex_len)
|
||||||
|
math(EXPR len "${hex_len} / 2")
|
||||||
|
|
||||||
|
file(WRITE "${OUTPUT}" "unsigned char ${name}[] = {${hex_sequence}};\nunsigned int ${name}_len = ${len};\n")
|
6
sgemm.h
6
sgemm.h
|
@ -1,11 +1,13 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include <stdint.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool llamafile_sgemm(int, int, int, const void *, int, const void *, int,
|
bool llamafile_sgemm(int64_t, int64_t, int64_t, const void *, int64_t,
|
||||||
void *, int, int, int, int, int, int, int);
|
const void *, int64_t, void *, int64_t, int, int,
|
||||||
|
int, int, int, int);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,8 @@ int main(void) {
|
||||||
"{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
|
"{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
|
||||||
// Llama-3
|
// Llama-3
|
||||||
"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
|
"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
|
||||||
|
// Phi-3
|
||||||
|
"{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + ' ' + message['content'] + '<|end|> ' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|> ' }}{% else %}{{ eos_token }}{% endif %}"
|
||||||
};
|
};
|
||||||
std::vector<std::string> expected_output = {
|
std::vector<std::string> expected_output = {
|
||||||
// teknium/OpenHermes-2.5-Mistral-7B
|
// teknium/OpenHermes-2.5-Mistral-7B
|
||||||
|
@ -77,6 +79,8 @@ int main(void) {
|
||||||
"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
|
||||||
// Llama 3
|
// Llama 3
|
||||||
"<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
"<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi there<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI am an assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nAnother question<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||||
|
// Phi 3
|
||||||
|
"<|system|>\nYou are a helpful assistant<|end|>\n<|user|>\nHello<|end|>\n<|assistant|>\nHi there<|end|>\n<|user|>\nWho are you<|end|>\n<|assistant|>\nI am an assistant<|end|>\n<|user|>\nAnother question<|end|>\n<|assistant|>\n",
|
||||||
};
|
};
|
||||||
std::vector<char> formatted_chat(1024);
|
std::vector<char> formatted_chat(1024);
|
||||||
int32_t res;
|
int32_t res;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue