Merge remote-tracking branch 'origin/master' into json-fixes

This commit is contained in:
ochafik 2024-03-14 23:37:13 +00:00
commit 3feac66d0f
70 changed files with 6795 additions and 3837 deletions

View file

@ -333,6 +333,7 @@ jobs:
mkdir build mkdir build
cd build cd build
cmake -G Xcode .. \ cmake -G Xcode .. \
-DLLAMA_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \ -DLLAMA_BUILD_SERVER=OFF \
@ -361,6 +362,7 @@ jobs:
mkdir build mkdir build
cd build cd build
cmake -G Xcode .. \ cmake -G Xcode .. \
-DLLAMA_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_EXAMPLES=OFF \ -DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TESTS=OFF \ -DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \ -DLLAMA_BUILD_SERVER=OFF \
@ -425,6 +427,8 @@ jobs:
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON' defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
- build: 'vulkan' - build: 'vulkan'
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON' defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
- build: 'arm64'
defines: '-A ARM64 -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
steps: steps:
- name: Clone - name: Clone
@ -520,7 +524,7 @@ jobs:
- name: Test - name: Test
id: cmake_test id: cmake_test
# not all machines have native AVX-512 # not all machines have native AVX-512
if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} if: ${{ matrix.build != 'arm64' && matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
run: | run: |
cd build cd build
ctest -L main -C Release --verbose --timeout 900 ctest -L main -C Release --verbose --timeout 900

View file

@ -25,17 +25,14 @@ jobs:
strategy: strategy:
matrix: matrix:
sanitizer: [ADDRESS, THREAD, UNDEFINED] sanitizer: [ADDRESS, THREAD, UNDEFINED]
build_type: [Debug, Release] build_type: [Debug]
include: include:
- build_type: Release - build_type: Release
sanitizer: "" sanitizer: ""
exclude: - build_type: Debug
- build_type: Release
sanitizer: ADDRESS
- build_type: Release
sanitizer: THREAD sanitizer: THREAD
- build_type: Release disabled_on_pr: true
sanitizer: UNDEFINED fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
container: container:
image: ubuntu:latest image: ubuntu:latest
@ -47,6 +44,8 @@ jobs:
- name: Clone - name: Clone
id: checkout id: checkout
uses: actions/checkout@v3 uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Dependencies - name: Dependencies
id: depends id: depends
@ -58,7 +57,6 @@ jobs:
cmake \ cmake \
python3-pip \ python3-pip \
wget \ wget \
psmisc \
language-pack-en language-pack-en
- name: Build - name: Build
@ -80,13 +78,58 @@ jobs:
- name: Tests - name: Tests
id: server_integration_tests id: server_integration_tests
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
run: | run: |
cd examples/server/tests cd examples/server/tests
PORT=8888 ./tests.sh PORT=8888 ./tests.sh
- name: Slow tests - name: Slow tests
id: server_integration_tests_slow id: server_integration_tests_slow
if: ${{ github.event.schedule != '' && matrix.build_type == 'Release' || github.event.inputs.slow_tests == 'true' }} if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: | run: |
cd examples/server/tests cd examples/server/tests
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
server-windows:
runs-on: windows-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
- name: Tests
id: server_integration_tests
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
run: |
cd examples/server/tests
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd examples/server/tests
behave.exe --stop --no-skipped --no-capture --tags slow

View file

@ -1,20 +0,0 @@
name: clang-tidy review post comments
on:
workflow_dispatch:
workflows: ["clang-tidy-review"]
types:
- completed
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: ZedThree/clang-tidy-review/post@v0.13.0
# lgtm_comment_body, max_comments, and annotations need to be set on the posting workflow in a split setup
with:
# adjust options as necessary
lgtm_comment_body: ''
annotations: false
max_comments: 25

View file

@ -1,23 +0,0 @@
name: clang-tidy-review
on:
pull_request:
branches:
- master
jobs:
clang-tidy-review:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: ZedThree/clang-tidy-review@v0.13.0
id: review
with:
lgtm_comment_body: ''
build_dir: build
cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on
split_workflow: true
- uses: ZedThree/clang-tidy-review/upload@v0.13.0

3
.gitignore vendored
View file

@ -25,6 +25,8 @@
.vscode/ .vscode/
.idea/ .idea/
ggml-metal-embed.metal
lcov-report/ lcov-report/
gcovr-report/ gcovr-report/
@ -45,6 +47,7 @@ models-mnt
/embedding /embedding
/gguf /gguf
/gguf-llama-simple /gguf-llama-simple
/gritlm
/imatrix /imatrix
/infill /infill
/libllama.so /libllama.so

View file

@ -116,7 +116,9 @@ option(LLAMA_MPI "llama: use MPI"
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF) option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
option(LLAMA_SYCL "llama: use SYCL" OFF) option(LLAMA_SYCL "llama: use SYCL" OFF)
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)
set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism")
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@ -146,6 +148,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
# enable libstdc++ assertions for debug builds # enable libstdc++ assertions for debug builds
if (CMAKE_SYSTEM_NAME MATCHES "Linux") if (CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>) add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
@ -196,9 +200,6 @@ if (LLAMA_METAL)
add_compile_definitions(GGML_METAL_NDEBUG) add_compile_definitions(GGML_METAL_NDEBUG)
endif() endif()
# get full path to the file
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
# copy ggml-common.h and ggml-metal.metal to bin directory # copy ggml-common.h and ggml-metal.metal to bin directory
configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY) configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY) configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
@ -207,25 +208,31 @@ if (LLAMA_METAL)
enable_language(ASM) enable_language(ASM)
add_compile_definitions(GGML_METAL_EMBED_LIBRARY) add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
set(METALLIB_SOURCE "${CMAKE_SOURCE_DIR}/ggml-metal.metal") set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s")
# merge ggml-common.h and ggml-metal.metal into a single file
set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")
add_custom_command( add_custom_command(
OUTPUT ${EMBED_METALLIB_ASSEMBLY} OUTPUT ${METALLIB_EMBED_ASM}
COMMAND echo ".section __DATA,__ggml_metallib" > ${EMBED_METALLIB_ASSEMBLY} COMMAND echo "Embedding Metal library"
COMMAND echo ".globl _ggml_metallib_start" >> ${EMBED_METALLIB_ASSEMBLY} COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY} COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM}
COMMAND echo ".incbin \\\"${METALLIB_SOURCE}\\\"" >> ${EMBED_METALLIB_ASSEMBLY} COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM}
COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY} COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM}
COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY} COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
DEPENDS ${METALLIB_SOURCE} COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM}
COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM}
DEPENDS ggml-metal.metal ggml-common.h
COMMENT "Generate assembly for embedded Metal library" COMMENT "Generate assembly for embedded Metal library"
) )
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY}) set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
endif() else()
if (LLAMA_METAL_SHADER_DEBUG) if (LLAMA_METAL_SHADER_DEBUG)
# custom command to do the following: # custom command to do the following:
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
@ -237,15 +244,18 @@ if (LLAMA_METAL)
# note: unfortunately, we have to call it default.metallib instead of ggml.metallib # note: unfortunately, we have to call it default.metallib instead of ggml.metallib
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720 # ref: https://github.com/ggerganov/whisper.cpp/issues/1720
set(XC_FLAGS -fno-fast-math -fno-inline -g) set(XC_FLAGS -fno-fast-math -fno-inline -g)
if (LLAMA_QKK_64) else()
set(XC_FLAGS ${XC_FLAGS} -DQK_K=64) set(XC_FLAGS -O3)
endif() endif()
add_custom_command( add_custom_command(
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
DEPENDS ggml-metal.metal COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
DEPENDS ggml-metal.metal ggml-common.h
COMMENT "Compiling Metal kernels" COMMENT "Compiling Metal kernels"
) )
@ -253,7 +263,7 @@ if (LLAMA_METAL)
ggml-metal ALL ggml-metal ALL
DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
) )
endif() endif() # LLAMA_METAL_EMBED_LIBRARY
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
${FOUNDATION_LIBRARY} ${FOUNDATION_LIBRARY}
@ -534,6 +544,10 @@ if (LLAMA_HIPBLAS)
endif() endif()
if (LLAMA_SYCL) if (LLAMA_SYCL)
if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA")
endif()
if ( NOT DEFINED ENV{ONEAPI_ROOT}) if ( NOT DEFINED ENV{ONEAPI_ROOT})
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
endif() endif()
@ -555,6 +569,9 @@ if (LLAMA_SYCL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
endif()
set(GGML_HEADERS_SYCL ggml-sycl.h) set(GGML_HEADERS_SYCL ggml-sycl.h)
set(GGML_SOURCES_SYCL ggml-sycl.cpp) set(GGML_SOURCES_SYCL ggml-sycl.cpp)
@ -562,7 +579,11 @@ if (LLAMA_SYCL)
if (WIN32) if (WIN32)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
else() else()
if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl)
endif()
endif() endif()
endif() endif()
@ -1129,6 +1150,8 @@ endif()
add_library(llama add_library(llama
llama.cpp llama.cpp
llama.h llama.h
unicode.h
unicode.cpp
) )
target_include_directories(llama PUBLIC .) target_include_directories(llama PUBLIC .)

View file

@ -2,7 +2,7 @@
BUILD_TARGETS = \ BUILD_TARGETS = \
main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \ main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \ simple batched batched-bench save-load-state server gguf llama-bench libllava.a llava-cli baby-llama beam-search \
speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey tests/test-c.o speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm tests/test-c.o
# Binaries only useful for tests # Binaries only useful for tests
TEST_TARGETS = \ TEST_TARGETS = \
@ -168,6 +168,10 @@ ifeq ($(UNAME_S),OpenBSD)
MK_CPPFLAGS += -D_BSD_SOURCE MK_CPPFLAGS += -D_BSD_SOURCE
endif endif
ifdef LLAMA_SCHED_MAX_COPIES
MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES)
endif
ifdef LLAMA_DEBUG ifdef LLAMA_DEBUG
MK_CFLAGS += -O0 -g MK_CFLAGS += -O0 -g
MK_CXXFLAGS += -O0 -g MK_CXXFLAGS += -O0 -g
@ -554,13 +558,14 @@ ggml-metal.o: ggml-metal.m ggml-metal.h
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -c $< -o $@
ifdef LLAMA_METAL_EMBED_LIBRARY ifdef LLAMA_METAL_EMBED_LIBRARY
ggml-metal-embed.o: ggml-metal.metal ggml-metal-embed.o: ggml-metal.metal ggml-common.h
@echo "Embedding Metal library" @echo "Embedding Metal library"
@sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
$(eval TEMP_ASSEMBLY=$(shell mktemp)) $(eval TEMP_ASSEMBLY=$(shell mktemp))
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY) @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY) @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY) @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
@echo ".incbin \"$<\"" >> $(TEMP_ASSEMBLY) @echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY) @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY) @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
@$(AS) $(TEMP_ASSEMBLY) -o $@ @$(AS) $(TEMP_ASSEMBLY) -o $@
@ -634,9 +639,12 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -c $< -o $@
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o: unicode.cpp unicode.h
$(CXX) $(CXXFLAGS) -c $< -o $@
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h COMMON_H_DEPS = common/common.h common/sampling.h common/log.h
@ -730,6 +738,10 @@ embedding: examples/embedding/embedding.cpp ggml.o llama.o $(C
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
gritlm: examples/gritlm/gritlm.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

View file

@ -31,6 +31,7 @@ let package = Package(
sources: [ sources: [
"ggml.c", "ggml.c",
"llama.cpp", "llama.cpp",
"unicode.cpp",
"ggml-alloc.c", "ggml-alloc.c",
"ggml-backend.c", "ggml-backend.c",
"ggml-quants.c", "ggml-quants.c",

View file

@ -73,6 +73,29 @@ For iGPU, please make sure the shared memory from host memory is enough. For lla
For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+. For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+.
## Nvidia GPU
### Verified
|Intel GPU| Status | Verified Model|
|-|-|-|
|Ampere Series| Support| A100|
### oneMKL
The current oneMKL release does not contain the oneMKL cuBlas backend.
As a result for Nvidia GPU's oneMKL must be built from source.
```
git clone https://github.com/oneapi-src/oneMKL
cd oneMKL
mkdir build
cd build
cmake -G Ninja .. -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON
ninja
// Add paths as necessary
```
## Docker ## Docker
Note: Note:
@ -186,6 +209,9 @@ source /opt/intel/oneapi/setvars.sh
# Or, for FP32: # Or, for FP32:
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
# For Nvidia GPUs
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
# Build example/main only # Build example/main only
#cmake --build . --config Release --target main #cmake --build . --config Release --target main

View file

@ -8,19 +8,18 @@
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
> [!IMPORTANT]
> **Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962**
>
> Vote for which quantization type provides better responses, all other parameters being the same.
### Recent API changes ### Recent API changes
- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_max_seq()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328 - [2024 Mar 13] Add `llama_synchronize()` + `llama_context_params.n_ubatch` https://github.com/ggerganov/llama.cpp/pull/6017
- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_seq_max()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328
- [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796 - [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796
- [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849 - [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849
### Hot topics ### Hot topics
- Multi-GPU pipeline parallelizm support https://github.com/ggerganov/llama.cpp/pull/6017
- Looking for contributions to add Deepseek support: https://github.com/ggerganov/llama.cpp/issues/5981
- Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962
- Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328 - Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328
---- ----
@ -905,6 +904,9 @@ First, install the essential packages for termux:
pkg install clang wget git cmake pkg install clang wget git cmake
``` ```
Second, obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake: Second, obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake:
You can execute the following commands on your computer to avoid downloading the NDK to your mobile. Of course, you can also do this in Termux.
``` ```
$ mkdir build-android $ mkdir build-android
$ cd build-android $ cd build-android
@ -913,7 +915,28 @@ $ cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROI
$ make $ make
``` ```
Install [termux](https://termux.dev/) on your device and run `termux-setup-storage` to get access to your SD card. Install [termux](https://termux.dev/) on your device and run `termux-setup-storage` to get access to your SD card.
Finally, copy the `llama` binary and the model files to your device storage. Here is a demo of an interactive session running on Pixel 5 phone: Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission:
(Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`)
```
$cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/
$cd /data/data/com.termux/files/home/bin
$chmod +x ./*
```
Download model [llama-2-7b-chat.Q4_K_M.gguf](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q4_K_M.gguf), and push it to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/`
```
$mv /sdcard/llama.cpp/llama-2-7b-chat.Q4_K_M.gguf /data/data/com.termux/files/home/model/
```
Now, you can start chatting:
```
$cd /data/data/com.termux/files/home/bin
$./main -m ../model/llama-2-7b-chat.Q4_K_M.gguf -n 128 -cml
```
Here is a demo of an interactive session running on Pixel 5 phone:
https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4 https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4

View file

@ -115,6 +115,7 @@ pub fn build(b: *std.build.Builder) !void {
const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c"); const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
const ggml_backend = make.obj("ggml-backend", "ggml-backend.c"); const ggml_backend = make.obj("ggml-backend", "ggml-backend.c");
const ggml_quants = make.obj("ggml-quants", "ggml-quants.c"); const ggml_quants = make.obj("ggml-quants", "ggml-quants.c");
const unicode = make.obj("unicode", "unicode.cpp");
const llama = make.obj("llama", "llama.cpp"); const llama = make.obj("llama", "llama.cpp");
const buildinfo = make.obj("common", "common/build-info.cpp"); const buildinfo = make.obj("common", "common/build-info.cpp");
const common = make.obj("common", "common/common.cpp"); const common = make.obj("common", "common/common.cpp");
@ -125,14 +126,14 @@ pub fn build(b: *std.build.Builder) !void {
const clip = make.obj("clip", "examples/llava/clip.cpp"); const clip = make.obj("clip", "examples/llava/clip.cpp");
const llava = make.obj("llava", "examples/llava/llava.cpp"); const llava = make.obj("llava", "examples/llava/llava.cpp");
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser }); _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, sampling, console, grammar_parser });
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo }); _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo });
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo }); _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo });
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo }); _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo });
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train }); _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, train });
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train }); _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, train });
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip, llava }); const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, sampling, grammar_parser, clip, llava });
if (server.target.isWindows()) { if (server.target.isWindows()) {
server.linkSystemLibrary("ws2_32"); server.linkSystemLibrary("ws2_32");
} }

View file

@ -483,6 +483,12 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
break; break;
} }
params.n_batch = std::stoi(argv[i]); params.n_batch = std::stoi(argv[i]);
} else if (arg == "-ub" || arg == "--ubatch-size") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.n_ubatch = std::stoi(argv[i]);
} else if (arg == "--keep") { } else if (arg == "--keep") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
@ -977,7 +983,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" binary file containing multiple choice tasks.\n"); printf(" binary file containing multiple choice tasks.\n");
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx); printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
printf(" -ub N, --ubatch-size N\n");
printf(" physical maximum batch size (default: %d)\n", params.n_ubatch);
printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n"); printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n");
printf(" (default: %s)\n", sampler_type_names.c_str()); printf(" (default: %s)\n", sampler_type_names.c_str());
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str()); printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
@ -1287,8 +1295,9 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
auto cparams = llama_context_default_params(); auto cparams = llama_context_default_params();
cparams.n_ctx = params.n_ctx; cparams.n_ctx = params.n_ctx;
cparams.n_seq_max = params.n_parallel;
cparams.n_batch = params.n_batch; cparams.n_batch = params.n_batch;
cparams.n_parallel = params.n_parallel; cparams.n_ubatch = params.n_ubatch;
cparams.n_threads = params.n_threads; cparams.n_threads = params.n_threads;
cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
cparams.seed = params.seed; cparams.seed = params.seed;
@ -1379,6 +1388,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), }; std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0)); llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
llama_kv_cache_clear(lctx); llama_kv_cache_clear(lctx);
llama_synchronize(lctx);
llama_reset_timings(lctx); llama_reset_timings(lctx);
} }
@ -1786,17 +1796,17 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size) {
static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+"; static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+";
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d", printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d",
view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
llama_kv_cache_view_cell * c_curr = view.cells; llama_kv_cache_view_cell * c_curr = view.cells;
llama_seq_id * cs_curr = view.cells_sequences; llama_seq_id * cs_curr = view.cells_sequences;
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
if (i % row_size == 0) { if (i % row_size == 0) {
printf("\n%5d: ", i); printf("\n%5d: ", i);
} }
int seq_count = 0; int seq_count = 0;
for (int j = 0; j < view.n_max_seq; j++) { for (int j = 0; j < view.n_seq_max; j++) {
if (cs_curr[j] >= 0) { seq_count++; } if (cs_curr[j] >= 0) { seq_count++; }
} }
putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]); putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]);
@ -1809,14 +1819,14 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n", printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n",
view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
std::unordered_map<llama_seq_id, size_t> seqs; std::unordered_map<llama_seq_id, size_t> seqs;
llama_kv_cache_view_cell * c_curr = view.cells; llama_kv_cache_view_cell * c_curr = view.cells;
llama_seq_id * cs_curr = view.cells_sequences; llama_seq_id * cs_curr = view.cells_sequences;
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
for (int j = 0; j < view.n_max_seq; j++) { for (int j = 0; j < view.n_seq_max; j++) {
if (cs_curr[j] < 0) { continue; } if (cs_curr[j] < 0) { continue; }
if (seqs.find(cs_curr[j]) == seqs.end()) { if (seqs.find(cs_curr[j]) == seqs.end()) {
if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
@ -1835,11 +1845,11 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
c_curr = view.cells; c_curr = view.cells;
cs_curr = view.cells_sequences; cs_curr = view.cells_sequences;
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) { for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
if (i % row_size == 0) { if (i % row_size == 0) {
printf("\n%5d: ", i); printf("\n%5d: ", i);
} }
for (int j = 0; j < view.n_max_seq; j++) { for (int j = 0; j < view.n_seq_max; j++) {
if (cs_curr[j] >= 0) { if (cs_curr[j] >= 0) {
const auto & it = seqs.find(cs_curr[j]); const auto & it = seqs.find(cs_curr[j]);
putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+'); putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+');
@ -1867,3 +1877,16 @@ void llama_embd_normalize(const float * inp, float * out, int n) {
} }
} }
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n){
double sum = 0.0;
double sum1 = 0.0;
double sum2 = 0.0;
for (int i = 0; i < n; i++) {
sum += embd1[i] * embd2[i];
sum1 += embd1[i] * embd1[i];
sum2 += embd2[i] * embd2[i];
}
return sum / (sqrt(sum1) * sqrt(sum2));
}

View file

@ -51,7 +51,8 @@ struct gpt_params {
int32_t n_threads_batch_draft = -1; int32_t n_threads_batch_draft = -1;
int32_t n_predict = -1; // new tokens to predict int32_t n_predict = -1; // new tokens to predict
int32_t n_ctx = 512; // context size int32_t n_ctx = 512; // context size
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_draft = 5; // number of tokens to draft during speculative decoding int32_t n_draft = 5; // number of tokens to draft during speculative decoding
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
@ -267,3 +268,4 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40
void llama_embd_normalize(const float * inp, float * out, int n); void llama_embd_normalize(const float * inp, float * out, int n);
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);

View file

@ -278,6 +278,22 @@ namespace grammar_parser {
while (*pos) { while (*pos) {
pos = parse_rule(state, pos); pos = parse_rule(state, pos);
} }
// Validate the state to ensure that all rules are defined
for (const auto & rule : state.rules) {
for (const auto & elem : rule) {
if (elem.type == LLAMA_GRETYPE_RULE_REF) {
// Ensure that the rule at that location exists
if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) {
// Get the name of the rule that is missing
for (const auto & kv : state.symbol_ids) {
if (kv.second == elem.value) {
throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
}
}
}
}
}
}
return state; return state;
} catch (const std::exception & err) { } catch (const std::exception & err) {
fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what()); fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());

View file

@ -17,6 +17,13 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_
return nullptr; return nullptr;
} }
// Ensure that there is a "root" node.
if (result->parsed_grammar.symbol_ids.find("root") == result->parsed_grammar.symbol_ids.end()) {
fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
delete result;
return nullptr;
}
std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules()); std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
result->grammar = llama_grammar_init( result->grammar = llama_grammar_init(

View file

@ -332,6 +332,9 @@ class Params:
# #
class BpeVocab: class BpeVocab:
tokenizer_model = "gpt2"
name = "bpe"
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None: def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read()) self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
if isinstance(self.bpe_tokenizer.get('model'), dict): if isinstance(self.bpe_tokenizer.get('model'), dict):
@ -390,6 +393,9 @@ class BpeVocab:
class SentencePieceVocab: class SentencePieceVocab:
tokenizer_model = "llama"
name = "spm"
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None: def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer)) self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer))
added_tokens: dict[str, int] added_tokens: dict[str, int]
@ -453,6 +459,9 @@ class SentencePieceVocab:
class HfVocab: class HfVocab:
tokenizer_model = "llama"
name = "hfft"
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None = None) -> None: def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None = None) -> None:
try: try:
from transformers import AutoTokenizer from transformers import AutoTokenizer
@ -553,7 +562,15 @@ class HfVocab:
return f"<HfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>" return f"<HfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
Vocab: TypeAlias = "BpeVocab | SentencePieceVocab | HfVocab" class NoVocab:
tokenizer_model = "no_vocab"
name = "no_vocab"
def __repr__(self) -> str:
return "<NoVocab for a model without integrated vocabulary>"
Vocab: TypeAlias = "BpeVocab | SentencePieceVocab | HfVocab | NoVocab"
# #
@ -935,8 +952,10 @@ def check_vocab_size(params: Params, vocab: Vocab, pad_vocab: bool = False) -> N
# Handle special case where the model's vocab size is not set # Handle special case where the model's vocab size is not set
if params.n_vocab == -1: if params.n_vocab == -1:
raise ValueError( raise ValueError(
f"The model's vocab size is set to -1 in params.json. Please update it manually. Maybe {vocab.vocab_size}?" f"The model's vocab size is set to -1 in params.json. Please update it manually.{f' Maybe {vocab.vocab_size}?' if hasattr(vocab, 'vocab_size') else ''}"
) )
if isinstance(vocab, NoVocab):
return # model has no vocab
# Check for a vocab size mismatch # Check for a vocab size mismatch
if params.n_vocab == vocab.vocab_size: if params.n_vocab == vocab.vocab_size:
@ -977,6 +996,7 @@ class OutputFile:
name = str(params.path_model.parent).split('/')[-1] name = str(params.path_model.parent).split('/')[-1]
self.gguf.add_name (name) self.gguf.add_name (name)
self.gguf.add_vocab_size (params.n_vocab)
self.gguf.add_context_length (params.n_ctx) self.gguf.add_context_length (params.n_ctx)
self.gguf.add_embedding_length (params.n_embd) self.gguf.add_embedding_length (params.n_embd)
self.gguf.add_block_count (params.n_layer) self.gguf.add_block_count (params.n_layer)
@ -1013,21 +1033,9 @@ class OutputFile:
if params.ftype is not None: if params.ftype is not None:
self.gguf.add_file_type(params.ftype) self.gguf.add_file_type(params.ftype)
def handle_tokenizer_model(self, vocab: Vocab) -> str:
# Map the vocab types to the supported tokenizer models
tokenizer_model = {
SentencePieceVocab: "llama",
HfVocab: "llama",
BpeVocab: "gpt2",
}.get(type(vocab))
# Block if vocab type is not predefined
if tokenizer_model is None:
raise ValueError("Unknown vocab type: Not supported")
return tokenizer_model
def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[bytes], list[float], list[gguf.TokenType]]: def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[bytes], list[float], list[gguf.TokenType]]:
assert not isinstance(vocab, NoVocab)
tokens = [] tokens = []
scores = [] scores = []
toktypes = [] toktypes = []
@ -1043,11 +1051,8 @@ class OutputFile:
return tokens, scores, toktypes return tokens, scores, toktypes
def add_meta_vocab(self, vocab: Vocab) -> None: def add_meta_vocab(self, vocab: Vocab) -> None:
# Handle the tokenizer model
tokenizer_model = self.handle_tokenizer_model(vocab)
# Ensure that tokenizer_model is added to the GGUF model # Ensure that tokenizer_model is added to the GGUF model
self.gguf.add_tokenizer_model(tokenizer_model) self.gguf.add_tokenizer_model(vocab.tokenizer_model)
# Extract model vocabulary for model conversion # Extract model vocabulary for model conversion
tokens, scores, toktypes = self.extract_vocabulary_from_model(vocab) tokens, scores, toktypes = self.extract_vocabulary_from_model(vocab)
@ -1074,6 +1079,26 @@ class OutputFile:
def write_tensor_info(self) -> None: def write_tensor_info(self) -> None:
self.gguf.write_ti_data_to_file() self.gguf.write_ti_data_to_file()
def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, concurrency: int) -> None:
ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency=concurrency)
if ftype == GGMLFileType.MostlyQ8_0:
ndarrays = bounded_parallel_map(
OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency,
use_processpool_executor=True,
)
else:
ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner)
start = time.time()
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
elapsed = time.time() - start
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
padi = len(str(len(model)))
print(
f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
)
self.gguf.write_tensor_data(ndarray)
def close(self) -> None: def close(self) -> None:
self.gguf.close() self.gguf.close()
@ -1082,7 +1107,7 @@ class OutputFile:
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False,
) -> None: ) -> None:
check_vocab_size(params, vocab, pad_vocab = pad_vocab) check_vocab_size(params, vocab, pad_vocab=pad_vocab)
of = OutputFile(fname_out, endianess=endianess) of = OutputFile(fname_out, endianess=endianess)
@ -1120,6 +1145,9 @@ class OutputFile:
# meta data # meta data
of.add_meta_arch(params) of.add_meta_arch(params)
if isinstance(vocab, NoVocab):
of.gguf.add_tokenizer_model(vocab.tokenizer_model)
else:
of.add_meta_vocab(vocab) of.add_meta_vocab(vocab)
of.add_meta_special_vocab(svocab) of.add_meta_special_vocab(svocab)
@ -1131,24 +1159,7 @@ class OutputFile:
of.write_tensor_info() of.write_tensor_info()
# tensor data # tensor data
ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency = concurrency) of.write_tensor_data(ftype, model, concurrency)
if ftype == GGMLFileType.MostlyQ8_0:
ndarrays = bounded_parallel_map(
OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency,
use_processpool_executor=True,
)
else:
ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner)
start = time.time()
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
elapsed = time.time() - start
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
padi = len(str(len(model)))
print(
f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
)
of.gguf.write_tensor_data(ndarray)
of.close() of.close()
@ -1309,8 +1320,8 @@ class VocabFactory:
return vtype, path return vtype, path
raise FileNotFoundError(f"Could not find any of {[self._FILES[vt] for vt in vocab_types]}") raise FileNotFoundError(f"Could not find any of {[self._FILES[vt] for vt in vocab_types]}")
def _create_special_vocab(self, vocab: Vocab, vocabtype: str, model_parent_path: Path) -> gguf.SpecialVocab: def _create_special_vocab(self, vocab: Vocab, model_parent_path: Path) -> gguf.SpecialVocab:
load_merges = vocabtype == "bpe" load_merges = vocab.name == "bpe"
n_vocab = vocab.vocab_size if hasattr(vocab, "vocab_size") else None n_vocab = vocab.vocab_size if hasattr(vocab, "vocab_size") else None
return gguf.SpecialVocab( return gguf.SpecialVocab(
model_parent_path, model_parent_path,
@ -1319,30 +1330,34 @@ class VocabFactory:
n_vocab=n_vocab, n_vocab=n_vocab,
) )
def load_vocab(self, vocab_types: list[str], model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]: def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab:
vocab_type, path = self._select_file(vocab_types) vocab_type, path = self._select_file(vocab_types)
print(f"Loading vocab file {path!r}, type {vocab_type!r}") print(f"Loading vocab file {path!r}, type {vocab_type!r}")
added_tokens_path = path.parent / "added_tokens.json" added_tokens_path = path.parent / "added_tokens.json"
vocab: Vocab
if vocab_type == "bpe": if vocab_type == "bpe":
vocab = BpeVocab( return BpeVocab(
path, added_tokens_path if added_tokens_path.exists() else None path, added_tokens_path if added_tokens_path.exists() else None
) )
elif vocab_type == "spm": if vocab_type == "spm":
vocab = SentencePieceVocab( return SentencePieceVocab(
path, added_tokens_path if added_tokens_path.exists() else None path, added_tokens_path if added_tokens_path.exists() else None
) )
elif vocab_type == "hfft": if vocab_type == "hfft":
vocab = HfVocab( return HfVocab(
path.parent, added_tokens_path if added_tokens_path.exists() else None path.parent, added_tokens_path if added_tokens_path.exists() else None
) )
else:
raise ValueError(vocab_type) raise ValueError(vocab_type)
def load_vocab(self, vocab_types: list[str], model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]:
vocab: Vocab
if len(vocab_types) == 1 and "no_vocab" in vocab_types:
vocab = NoVocab()
else:
vocab = self._create_vocab_by_path(vocab_types)
# FIXME: Respect --vocab-dir? # FIXME: Respect --vocab-dir?
special_vocab = self._create_special_vocab( special_vocab = self._create_special_vocab(
vocab, vocab,
vocab_type,
model_parent_path, model_parent_path,
) )
return vocab, special_vocab return vocab, special_vocab
@ -1380,6 +1395,7 @@ def main(args_in: list[str] | None = None) -> None:
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file") parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab") parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
parser.add_argument("--no-vocab", action="store_true", help="store model without the vocab")
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file") parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
parser.add_argument("--vocab-type", help="vocab types to try in order, choose from 'spm', 'bpe', 'hfft' (default: spm,hfft)", default="spm,hfft") parser.add_argument("--vocab-type", help="vocab types to try in order, choose from 'spm', 'bpe', 'hfft' (default: spm,hfft)", default="spm,hfft")
@ -1392,6 +1408,10 @@ def main(args_in: list[str] | None = None) -> None:
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing") parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
args = parser.parse_args(args_in) args = parser.parse_args(args_in)
if args.no_vocab:
if args.vocab_only:
raise ValueError("no need to specify --vocab-only if using --no-vocab")
args.vocab_type = "no_vocab"
if args.dump_single: if args.dump_single:
model_plus = lazy_load_file(args.model) model_plus = lazy_load_file(args.model)
@ -1442,7 +1462,7 @@ def main(args_in: list[str] | None = None) -> None:
print(f"Wrote {outfile}") print(f"Wrote {outfile}")
return return
if model_plus.vocab is not None and args.vocab_dir is None: if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
vocab = model_plus.vocab vocab = model_plus.vocab
print(f"Vocab info: {vocab}") print(f"Vocab info: {vocab}")

View file

@ -20,6 +20,7 @@ else()
add_subdirectory(convert-llama2c-to-ggml) add_subdirectory(convert-llama2c-to-ggml)
add_subdirectory(embedding) add_subdirectory(embedding)
add_subdirectory(finetune) add_subdirectory(finetune)
add_subdirectory(gritlm)
add_subdirectory(infill) add_subdirectory(infill)
add_subdirectory(llama-bench) add_subdirectory(llama-bench)
add_subdirectory(llava) add_subdirectory(llava)

View file

@ -106,7 +106,7 @@ int main(int argc, char ** argv) {
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
// ensure enough sequences are available // ensure enough sequences are available
ctx_params.n_parallel = *std::max_element(n_pl.begin(), n_pl.end()); ctx_params.n_seq_max = *std::max_element(n_pl.begin(), n_pl.end());
llama_context * ctx = llama_new_context_with_model(model, ctx_params); llama_context * ctx = llama_new_context_with_model(model, ctx_params);
@ -138,6 +138,8 @@ int main(int argc, char ** argv) {
LOG_TEE("failed to decode the batch, n_batch = %d, ret = %d\n", n_batch, ret); LOG_TEE("failed to decode the batch, n_batch = %d, ret = %d\n", n_batch, ret);
return false; return false;
} }
llama_synchronize(ctx);
} }
return true; return true;

View file

@ -80,7 +80,7 @@ int main(int argc, char ** argv) {
ctx_params.seed = 1234; ctx_params.seed = 1234;
ctx_params.n_ctx = n_kv_req; ctx_params.n_ctx = n_kv_req;
ctx_params.n_batch = std::max(n_len, n_parallel); ctx_params.n_batch = std::max(n_len, n_parallel);
ctx_params.n_parallel = n_parallel; ctx_params.n_seq_max = n_parallel;
ctx_params.n_threads = params.n_threads; ctx_params.n_threads = params.n_threads;
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch; ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;

View file

@ -107,18 +107,25 @@ int main(int argc, char ** argv) {
// max batch size // max batch size
const uint64_t n_batch = params.n_batch; const uint64_t n_batch = params.n_batch;
GGML_ASSERT(params.n_batch == params.n_ctx); GGML_ASSERT(params.n_batch >= params.n_ctx);
// tokenize the prompts and trim // tokenize the prompts and trim
std::vector<std::vector<int32_t>> inputs; std::vector<std::vector<int32_t>> inputs;
for (const auto & prompt : prompts) { for (const auto & prompt : prompts) {
auto inp = ::llama_tokenize(ctx, prompt, true); auto inp = ::llama_tokenize(ctx, prompt, true, false);
if (inp.size() > n_batch) { if (inp.size() > n_batch) {
inp.resize(n_batch); inp.resize(n_batch);
} }
inputs.push_back(inp); inputs.push_back(inp);
} }
// add eos if not present
for (auto & inp : inputs) {
if (inp.empty() || inp.back() != llama_token_eos(model)) {
inp.push_back(llama_token_eos(model));
}
}
// tokenization stats // tokenization stats
if (params.verbose_prompt) { if (params.verbose_prompt) {
for (int i = 0; i < (int) inputs.size(); i++) { for (int i = 0; i < (int) inputs.size(); i++) {
@ -167,15 +174,26 @@ int main(int argc, char ** argv) {
float * out = emb + p * n_embd; float * out = emb + p * n_embd;
batch_decode(ctx, batch, out, s, n_embd); batch_decode(ctx, batch, out, s, n_embd);
// print first 3 embeddings // print the first part of the embeddings
for (int j = 0; j < std::min(3, n_prompts); j++) { fprintf(stdout, "\n");
fprintf(stderr, "embedding %d: ", j); for (int j = 0; j < n_prompts; j++) {
for (int i = 0; i < n_embd; i++) { fprintf(stdout, "embedding %d: ", j);
fprintf(stderr, "%f ", emb[j * n_embd + i]); for (int i = 0; i < std::min(16, n_embd); i++) {
fprintf(stdout, "%9.6f ", emb[j * n_embd + i]);
} }
fprintf(stderr, "\n\n"); fprintf(stdout, "\n");
}
// print cosine similarity matrix
fprintf(stdout, "\n");
printf("cosine similarity matrix:\n\n");
for (int i = 0; i < n_prompts; i++) {
for (int j = 0; j < n_prompts; j++) {
float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
fprintf(stdout, "%6.2f ", sim);
}
fprintf(stdout, "\n");
} }
fprintf(stderr, "\n");
// clean up // clean up
llama_print_timings(ctx); llama_print_timings(ctx);

View file

@ -211,6 +211,7 @@ static bool gguf_ex_read_1(const std::string & fname) {
for (int j = 0; j < ggml_nelements(cur); ++j) { for (int j = 0; j < ggml_nelements(cur); ++j) {
if (data[j] != 100 + i) { if (data[j] != 100 + i) {
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]); fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
gguf_free(ctx);
return false; return false;
} }
} }

View file

@ -0,0 +1,5 @@
set(TARGET gritlm)
add_executable(${TARGET} gritlm.cpp)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)

215
examples/gritlm/gritlm.cpp Normal file
View file

@ -0,0 +1,215 @@
#include "common.h"
#include "llama.h"
#include <string>
#include <vector>
// #define GRIT_DEBUG
static std::vector<std::vector<float>> encode(llama_context * ctx, const std::vector<std::string> & sentences, const std::string & instruction) {
std::vector<std::vector<float>> result;
const llama_model * mdl = llama_get_model(ctx);
llama_batch batch = llama_batch_init(llama_n_batch(ctx), 0, 1);
for (uint64_t i = 0; i < sentences.size(); i++) {
llama_batch_clear(batch);
const std::string input_string = instruction + sentences[i];
std::vector<llama_token> inputs = llama_tokenize(mdl, input_string, true, false);
const int32_t n_toks = inputs.size();
// GritLM seems to have EOS = ""
// https://github.com/ContextualAI/gritlm/blob/92025b16534712b31b3c4aaaf069350e222bd5f8/gritlm/gritlm.py#L18
// inputs.push_back(llama_token_eos(mdl));
// we want to ignore instruction tokens for mean pooling
const int32_t n_inst = llama_tokenize(mdl, instruction, true, false).size();
#ifdef GRIT_DEBUG
// debug tokens - should be matching as referenced in the GritLM sample
std::for_each(inputs.begin(), inputs.end(), [&ctx](llama_token t) {
std::printf("[%u:%s]", t, llama_token_to_piece(ctx, t).c_str());
});
std::printf("\n");
#endif
// add input to batch (this increments n_tokens)
for (int32_t j = 0; j < n_toks; j++) {
llama_batch_add(batch, inputs[j], j, { 0 }, j >= n_inst);
}
// clear previous kv_cache values (irrelevant for embeddings)
llama_kv_cache_clear(ctx);
llama_set_causal_attn(ctx, false);
// run model
llama_decode(ctx, batch);
// get embedding dimensions
uint64_t n_embd = llama_n_embd(mdl);
// allocate embedding output
std::vector<float> emb_unorm(n_embd, 0.0f);
// sum up all token embeddings
for (int32_t k = n_inst; k < n_toks; k++) {
float * emb = llama_get_embeddings_ith(ctx, k);
for (uint64_t j = 0; j < n_embd; j++) {
emb_unorm[j] += emb[j];
}
}
// divide by number of tokens (mean pooling)
{
const uint64_t n_sent = n_toks - n_inst;
for (uint64_t j = 0; j < n_embd; j++) {
emb_unorm[j] /= n_sent;
}
}
std::vector<float> emb_norm(emb_unorm.size());
llama_embd_normalize(emb_unorm.data(), emb_norm.data(), n_embd);
result.push_back(emb_norm);
#ifdef GRIT_DEBUG
// print out emb_norm
std::printf("embedding %ld: ", i);
for (uint64_t j = 0; j < n_embd; j++) {
std::printf("%.5f ", emb_norm[j]);
}
std::printf("\n\n");
#endif
}
llama_batch_free(batch);
return result;
}
static std::string generate(llama_context * ctx, const std::string & prompt, bool stream) {
std::string result;
const llama_model * mdl = llama_get_model(ctx);
llama_token eos_token = llama_token_eos(mdl);
llama_kv_cache_clear(ctx);
llama_set_causal_attn(ctx, true);
llama_batch bat = llama_batch_init(llama_n_batch(ctx), 0, 1);
std::vector<llama_token> inputs = llama_tokenize(mdl, prompt, false, true);
int32_t i_current_token = 0;
while (true) {
llama_batch_clear(bat);
auto n_inputs = (int32_t)inputs.size();
for (int32_t i = 0; i < n_inputs; i++) {
llama_batch_add(bat, inputs[i], i_current_token++, { 0 }, i == n_inputs - 1);
}
inputs.clear();
llama_decode(ctx, bat);
auto logits = llama_get_logits_ith(ctx, bat.n_tokens - 1);
auto candidates = std::vector<llama_token_data>(llama_n_vocab(mdl));
auto n_candidates = (int32_t)candidates.size();
for (int32_t token = 0; token < n_candidates; token++) {
candidates[token] = llama_token_data{ token, logits[token], 0.0f };
}
auto candidates_p = llama_token_data_array{ candidates.data(), candidates.size(), false };
llama_token token = llama_sample_token_greedy(ctx, &candidates_p);
if (token == eos_token) {
break;
}
std::string piece = llama_token_to_piece(ctx, token);
if (stream) {
std::printf("%s", piece.c_str());
std::fflush(stdout);
}
inputs.push_back(token);
result += piece;
}
if (stream) {
std::printf("\n");
}
llama_batch_free(bat);
return result;
}
static std::string gritlm_instruction(const std::string & instruction) {
return !instruction.empty() ? "<|user|>\n" + instruction + "\n<|embed|>\n" : "<|embed|>\n";
}
int main(int argc, char * argv[]) {
gpt_params params;
if (!gpt_params_parse(argc, argv, params)) {
return 1;
}
llama_model_params mparams = llama_model_params_from_gpt_params(params);
llama_context_params cparams = llama_context_params_from_gpt_params(params);
llama_backend_init();
llama_model * mdl = llama_load_model_from_file(params.model.c_str(), mparams);
// create new context - set to embedding mode
cparams.embeddings = true;
llama_context * ctx = llama_new_context_with_model(mdl, cparams);
// ### Embedding/Representation ###
// samples taken from: https://github.com/ContextualAI/gritlm#basic
{
const std::string instruction = "Given a scientific paper title, retrieve the paper's abstract";
const std::vector<std::string> queries = {
"Bitcoin: A Peer-to-Peer Electronic Cash System",
"Generative Representational Instruction Tuning",
};
const std::vector<std::string> documents = {
"A purely peer-to-peer version of electronic cash would allow online payments to be sent directly from one party to another without going through a financial institution. Digital signatures provide part of the solution, but the main benefits are lost if a trusted third party is still required to prevent double-spending. We propose a solution to the double-spending problem using a peer-to-peer network. The network timestamps transactions by hashing them into an ongoing chain of hash-based proof-of-work, forming a record that cannot be changed without redoing the proof-of-work. The longest chain not only serves as proof of the sequence of events witnessed, but proof that it came from the largest pool of CPU power. As long as a majority of CPU power is controlled by nodes that are not cooperating to attack the network, they'll generate the longest chain and outpace attackers. The network itself requires minimal structure. Messages are broadcast on a best effort basis, and nodes can leave and rejoin the network at will, accepting the longest proof-of-work chain as proof of what happened while they were gone.",
"All text-based language problems can be reduced to either generation or embedding. Current models only perform well at one or the other. We introduce generative representational instruction tuning (GRIT) whereby a large language model is trained to handle both generative and embedding tasks by distinguishing between them through instructions. Compared to other open models, our resulting GritLM 7B sets a new state of the art on the Massive Text Embedding Benchmark (MTEB) and outperforms all models up to its size on a range of generative tasks. By scaling up further, GritLM 8X7B outperforms all open generative language models that we tried while still being among the best embedding models. Notably, we find that GRIT matches training on only generative or embedding data, thus we can unify both at no performance loss. Among other benefits, the unification via GRIT speeds up Retrieval-Augmented Generation (RAG) by > 60% for long documents, by no longer requiring separate retrieval and generation models. Models, code, etc. are freely available at https://github.com/ContextualAI/gritlm.",
};
// No need to add instruction for retrieval documents
const std::vector<std::vector<float>> d_rep = encode(ctx, documents, gritlm_instruction(""));
const std::vector<std::vector<float>> q_rep = encode(ctx, queries, gritlm_instruction(instruction));
const int n_embd = llama_n_embd(mdl);
const float cosine_sim_q0_d0 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd);
const float cosine_sim_q0_d1 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd);
const float cosine_sim_q1_d0 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[0].data(), n_embd);
const float cosine_sim_q1_d1 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[1].data(), n_embd);
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[0].c_str(), cosine_sim_q0_d0);
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[1].c_str(), cosine_sim_q0_d1);
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[1].c_str(), documents[0].c_str(), cosine_sim_q1_d0);
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[1].c_str(), documents[1].c_str(), cosine_sim_q1_d1);
}
// ### Generation ###
// GritLM models are not finetuned with system prompts, as you can just include system-like instructions together with your user instruction
{
const std::string prompt = "<|user|>\nPlease write me a poem about my recent hike of Mt. Fuji at midnight in the style of Shakespeare.\n<|assistant|>\n";
std::string response = generate(ctx, prompt, true);
}
llama_free(ctx);
llama_free_model(mdl);
llama_backend_free();
return 0;
}

View file

@ -103,6 +103,7 @@ static std::string get_cpu_info() {
} }
} }
} }
fclose(f);
} }
#endif #endif
// TODO: other platforms // TODO: other platforms
@ -164,6 +165,7 @@ struct cmd_params {
std::vector<int> n_prompt; std::vector<int> n_prompt;
std::vector<int> n_gen; std::vector<int> n_gen;
std::vector<int> n_batch; std::vector<int> n_batch;
std::vector<int> n_ubatch;
std::vector<ggml_type> type_k; std::vector<ggml_type> type_k;
std::vector<ggml_type> type_v; std::vector<ggml_type> type_v;
std::vector<int> n_threads; std::vector<int> n_threads;
@ -183,7 +185,8 @@ static const cmd_params cmd_params_defaults = {
/* model */ {"models/7B/ggml-model-q4_0.gguf"}, /* model */ {"models/7B/ggml-model-q4_0.gguf"},
/* n_prompt */ {512}, /* n_prompt */ {512},
/* n_gen */ {128}, /* n_gen */ {128},
/* n_batch */ {512}, /* n_batch */ {2048},
/* n_ubatch */ {512},
/* type_k */ {GGML_TYPE_F16}, /* type_k */ {GGML_TYPE_F16},
/* type_v */ {GGML_TYPE_F16}, /* type_v */ {GGML_TYPE_F16},
/* n_threads */ {get_num_physical_cores()}, /* n_threads */ {get_num_physical_cores()},
@ -208,6 +211,7 @@ static void print_usage(int /* argc */, char ** argv) {
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str()); printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str()); printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str()); printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
printf(" -ub N, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
printf(" -ctk <t>, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str()); printf(" -ctk <t>, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
printf(" -ctv <t>, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str()); printf(" -ctv <t>, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str()); printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
@ -217,7 +221,7 @@ static void print_usage(int /* argc */, char ** argv) {
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str()); printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str()); printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str()); printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str());
printf(" -ts, --tensor_split <ts0/ts1/..> (default: 0)\n"); printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps); printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
@ -297,6 +301,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
} }
auto p = split<int>(argv[i], split_delim); auto p = split<int>(argv[i], split_delim);
params.n_batch.insert(params.n_batch.end(), p.begin(), p.end()); params.n_batch.insert(params.n_batch.end(), p.begin(), p.end());
} else if (arg == "-ub" || arg == "--ubatch-size") {
if (++i >= argc) {
invalid_param = true;
break;
}
auto p = split<int>(argv[i], split_delim);
params.n_ubatch.insert(params.n_ubatch.end(), p.begin(), p.end());
} else if (arg == "-ctk" || arg == "--cache-type-k") { } else if (arg == "-ctk" || arg == "--cache-type-k") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
@ -455,6 +466,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
if (params.n_prompt.empty()) { params.n_prompt = cmd_params_defaults.n_prompt; } if (params.n_prompt.empty()) { params.n_prompt = cmd_params_defaults.n_prompt; }
if (params.n_gen.empty()) { params.n_gen = cmd_params_defaults.n_gen; } if (params.n_gen.empty()) { params.n_gen = cmd_params_defaults.n_gen; }
if (params.n_batch.empty()) { params.n_batch = cmd_params_defaults.n_batch; } if (params.n_batch.empty()) { params.n_batch = cmd_params_defaults.n_batch; }
if (params.n_ubatch.empty()) { params.n_ubatch = cmd_params_defaults.n_ubatch; }
if (params.type_k.empty()) { params.type_k = cmd_params_defaults.type_k; } if (params.type_k.empty()) { params.type_k = cmd_params_defaults.type_k; }
if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; } if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; }
if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; } if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; }
@ -474,6 +486,7 @@ struct cmd_params_instance {
int n_prompt; int n_prompt;
int n_gen; int n_gen;
int n_batch; int n_batch;
int n_ubatch;
ggml_type type_k; ggml_type type_k;
ggml_type type_v; ggml_type type_v;
int n_threads; int n_threads;
@ -511,6 +524,7 @@ struct cmd_params_instance {
cparams.n_ctx = n_prompt + n_gen; cparams.n_ctx = n_prompt + n_gen;
cparams.n_batch = n_batch; cparams.n_batch = n_batch;
cparams.n_ubatch = n_ubatch;
cparams.type_k = type_k; cparams.type_k = type_k;
cparams.type_v = type_v; cparams.type_v = type_v;
cparams.offload_kqv = !no_kv_offload; cparams.offload_kqv = !no_kv_offload;
@ -532,6 +546,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
for (const auto & mmp : params.use_mmap) for (const auto & mmp : params.use_mmap)
for (const auto & embd : params.embeddings) for (const auto & embd : params.embeddings)
for (const auto & nb : params.n_batch) for (const auto & nb : params.n_batch)
for (const auto & nub : params.n_ubatch)
for (const auto & tk : params.type_k) for (const auto & tk : params.type_k)
for (const auto & tv : params.type_v) for (const auto & tv : params.type_v)
for (const auto & nkvo : params.no_kv_offload) for (const auto & nkvo : params.no_kv_offload)
@ -545,6 +560,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
/* .n_prompt = */ n_prompt, /* .n_prompt = */ n_prompt,
/* .n_gen = */ 0, /* .n_gen = */ 0,
/* .n_batch = */ nb, /* .n_batch = */ nb,
/* .n_ubatch = */ nub,
/* .type_k = */ tk, /* .type_k = */ tk,
/* .type_v = */ tv, /* .type_v = */ tv,
/* .n_threads = */ nt, /* .n_threads = */ nt,
@ -568,6 +584,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
/* .n_prompt = */ 0, /* .n_prompt = */ 0,
/* .n_gen = */ n_gen, /* .n_gen = */ n_gen,
/* .n_batch = */ nb, /* .n_batch = */ nb,
/* .n_ubatch = */ nub,
/* .type_k = */ tk, /* .type_k = */ tk,
/* .type_v = */ tv, /* .type_v = */ tv,
/* .n_threads = */ nt, /* .n_threads = */ nt,
@ -604,6 +621,7 @@ struct test {
uint64_t model_size; uint64_t model_size;
uint64_t model_n_params; uint64_t model_n_params;
int n_batch; int n_batch;
int n_ubatch;
int n_threads; int n_threads;
ggml_type type_k; ggml_type type_k;
ggml_type type_v; ggml_type type_v;
@ -627,6 +645,7 @@ struct test {
model_size = llama_model_size(lmodel); model_size = llama_model_size(lmodel);
model_n_params = llama_model_n_params(lmodel); model_n_params = llama_model_n_params(lmodel);
n_batch = inst.n_batch; n_batch = inst.n_batch;
n_ubatch = inst.n_ubatch;
n_threads = inst.n_threads; n_threads = inst.n_threads;
type_k = inst.type_k; type_k = inst.type_k;
type_v = inst.type_v; type_v = inst.type_v;
@ -705,7 +724,8 @@ struct test {
"cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas",
"cpu_info", "gpu_info", "cpu_info", "gpu_info",
"model_filename", "model_type", "model_size", "model_n_params", "model_filename", "model_type", "model_size", "model_n_params",
"n_batch", "n_threads", "type_k", "type_v", "n_batch", "n_ubatch",
"n_threads", "type_k", "type_v",
"n_gpu_layers", "split_mode", "n_gpu_layers", "split_mode",
"main_gpu", "no_kv_offload", "main_gpu", "no_kv_offload",
"tensor_split", "use_mmap", "embeddings", "tensor_split", "use_mmap", "embeddings",
@ -719,7 +739,8 @@ struct test {
enum field_type {STRING, BOOL, INT, FLOAT}; enum field_type {STRING, BOOL, INT, FLOAT};
static field_type get_field_type(const std::string & field) { static field_type get_field_type(const std::string & field) {
if (field == "build_number" || field == "n_batch" || field == "n_threads" || if (field == "build_number" || field == "n_batch" || field == "n_ubatch" ||
field == "n_threads" ||
field == "model_size" || field == "model_n_params" || field == "model_size" || field == "model_n_params" ||
field == "n_gpu_layers" || field == "main_gpu" || field == "n_gpu_layers" || field == "main_gpu" ||
field == "n_prompt" || field == "n_gen" || field == "n_prompt" || field == "n_gen" ||
@ -759,7 +780,8 @@ struct test {
std::to_string(metal), std::to_string(sycl), std::to_string(gpu_blas), std::to_string(blas), std::to_string(metal), std::to_string(sycl), std::to_string(gpu_blas), std::to_string(blas),
cpu_info, gpu_info, cpu_info, gpu_info,
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params), model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v), std::to_string(n_batch), std::to_string(n_ubatch),
std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
std::to_string(n_gpu_layers), split_mode_str(split_mode), std::to_string(n_gpu_layers), split_mode_str(split_mode),
std::to_string(main_gpu), std::to_string(no_kv_offload), std::to_string(main_gpu), std::to_string(no_kv_offload),
tensor_split_str, std::to_string(use_mmap), std::to_string(embeddings), tensor_split_str, std::to_string(use_mmap), std::to_string(embeddings),
@ -957,6 +979,9 @@ struct markdown_printer : public printer {
if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) { if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) {
fields.emplace_back("n_batch"); fields.emplace_back("n_batch");
} }
if (params.n_ubatch.size() > 1 || params.n_ubatch != cmd_params_defaults.n_ubatch) {
fields.emplace_back("n_ubatch");
}
if (params.type_k.size() > 1 || params.type_k != cmd_params_defaults.type_k) { if (params.type_k.size() > 1 || params.type_k != cmd_params_defaults.type_k) {
fields.emplace_back("type_k"); fields.emplace_back("type_k");
} }
@ -1096,25 +1121,32 @@ struct sql_printer : public printer {
}; };
static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) { static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
llama_set_n_threads(ctx, n_threads, n_threads);
//std::vector<llama_token> tokens(n_prompt, llama_token_bos(llama_get_model(ctx)));
//llama_decode(ctx, llama_batch_get_one(tokens.data(), n_prompt, n_past, 0));
//GGML_UNUSED(n_batch);
std::vector<llama_token> tokens(n_batch, llama_token_bos(llama_get_model(ctx))); std::vector<llama_token> tokens(n_batch, llama_token_bos(llama_get_model(ctx)));
int n_processed = 0; int n_processed = 0;
llama_set_n_threads(ctx, n_threads, n_threads);
while (n_processed < n_prompt) { while (n_processed < n_prompt) {
int n_tokens = std::min(n_prompt - n_processed, n_batch); int n_tokens = std::min(n_prompt - n_processed, n_batch);
llama_decode(ctx, llama_batch_get_one(tokens.data(), n_tokens, n_past + n_processed, 0)); llama_decode(ctx, llama_batch_get_one(tokens.data(), n_tokens, n_past + n_processed, 0));
n_processed += n_tokens; n_processed += n_tokens;
} }
llama_synchronize(ctx);
} }
static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) { static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
llama_token token = llama_token_bos(llama_get_model(ctx));
llama_set_n_threads(ctx, n_threads, n_threads); llama_set_n_threads(ctx, n_threads, n_threads);
llama_token token = llama_token_bos(llama_get_model(ctx));
for (int i = 0; i < n_gen; i++) { for (int i = 0; i < n_gen; i++) {
llama_decode(ctx, llama_batch_get_one(&token, 1, n_past + i, 0)); llama_decode(ctx, llama_batch_get_one(&token, 1, n_past + i, 0));
llama_synchronize(ctx);
} }
} }
@ -1203,7 +1235,8 @@ int main(int argc, char ** argv) {
// warmup run // warmup run
if (t.n_prompt > 0) { if (t.n_prompt > 0) {
test_prompt(ctx, std::min(2, t.n_batch), 0, t.n_batch, t.n_threads); //test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
} }
if (t.n_gen > 0) { if (t.n_gen > 0) {
test_gen(ctx, 1, 0, t.n_threads); test_gen(ctx, 1, 0, t.n_threads);
@ -1219,6 +1252,7 @@ int main(int argc, char ** argv) {
if (t.n_gen > 0) { if (t.n_gen > 0) {
test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads); test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads);
} }
uint64_t t_ns = get_time_ns() - t_start; uint64_t t_ns = get_time_ns() - t_start;
t.samples_ns.push_back(t_ns); t.samples_ns.push_back(t_ns);
} }

View file

@ -33,6 +33,45 @@ jclass la_int_var;
jmethodID la_int_var_value; jmethodID la_int_var_value;
jmethodID la_int_var_inc; jmethodID la_int_var_inc;
std::string cached_token_chars;
bool is_valid_utf8(const char * string) {
if (!string) {
return true;
}
const unsigned char * bytes = (const unsigned char *)string;
int num;
while (*bytes != 0x00) {
if ((*bytes & 0x80) == 0x00) {
// U+0000 to U+007F
num = 1;
} else if ((*bytes & 0xE0) == 0xC0) {
// U+0080 to U+07FF
num = 2;
} else if ((*bytes & 0xF0) == 0xE0) {
// U+0800 to U+FFFF
num = 3;
} else if ((*bytes & 0xF8) == 0xF0) {
// U+10000 to U+10FFFF
num = 4;
} else {
return false;
}
bytes += 1;
for (int i = 1; i < num; ++i) {
if ((*bytes & 0xC0) != 0x80) {
return false;
}
bytes += 1;
}
}
return true;
}
static void log_callback(ggml_log_level level, const char * fmt, void * data) { static void log_callback(ggml_log_level level, const char * fmt, void * data) {
if (level == GGML_LOG_LEVEL_ERROR) __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data); if (level == GGML_LOG_LEVEL_ERROR) __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data);
else if (level == GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data); else if (level == GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data);
@ -295,6 +334,8 @@ Java_com_example_llama_Llm_completion_1init(
jint n_len jint n_len
) { ) {
cached_token_chars.clear();
const auto text = env->GetStringUTFChars(jtext, 0); const auto text = env->GetStringUTFChars(jtext, 0);
const auto context = reinterpret_cast<llama_context *>(context_pointer); const auto context = reinterpret_cast<llama_context *>(context_pointer);
const auto batch = reinterpret_cast<llama_batch *>(batch_pointer); const auto batch = reinterpret_cast<llama_batch *>(batch_pointer);
@ -372,8 +413,16 @@ Java_com_example_llama_Llm_completion_1loop(
} }
auto new_token_chars = llama_token_to_piece(context, new_token_id); auto new_token_chars = llama_token_to_piece(context, new_token_id);
LOGi("new_token_chars: `%s`", new_token_chars.c_str()); cached_token_chars += new_token_chars;
auto new_token = env->NewStringUTF(new_token_chars.c_str());
jstring new_token = nullptr;
if (is_valid_utf8(cached_token_chars.c_str())) {
new_token = env->NewStringUTF(cached_token_chars.c_str());
LOGi("cached: %s, new_token_chars: `%s`, id: %d", cached_token_chars.c_str(), new_token_chars.c_str(), new_token_id);
cached_token_chars.clear();
} else {
new_token = env->NewStringUTF("");
}
llama_batch_clear(*batch); llama_batch_clear(*batch);
llama_batch_add(*batch, new_token_id, n_cur, { 0 }, true); llama_batch_add(*batch, new_token_id, n_cur, { 0 }, true);

View file

@ -71,7 +71,7 @@ class Llm {
batch: Long, batch: Long,
nLen: Int, nLen: Int,
ncur: IntVar ncur: IntVar
): String ): String?
private external fun kv_cache_clear(context: Long) private external fun kv_cache_clear(context: Long)
@ -115,7 +115,7 @@ class Llm {
val ncur = IntVar(completion_init(state.context, state.batch, message, nlen)) val ncur = IntVar(completion_init(state.context, state.batch, message, nlen))
while (ncur.value <= nlen) { while (ncur.value <= nlen) {
val str = completion_loop(state.context, state.batch, nlen, ncur) val str = completion_loop(state.context, state.batch, nlen, ncur)
if (str.isEmpty()) { if (str == null) {
break break
} }
emit(str) emit(str)

View file

@ -221,6 +221,7 @@ actor LlamaContext {
if llama_decode(context, batch) != 0 { if llama_decode(context, batch) != 0 {
print("llama_decode() failed during prompt") print("llama_decode() failed during prompt")
} }
llama_synchronize(context)
let t_pp_end = ggml_time_us() let t_pp_end = ggml_time_us()
@ -240,6 +241,7 @@ actor LlamaContext {
if llama_decode(context, batch) != 0 { if llama_decode(context, batch) != 0 {
print("llama_decode() failed during text generation") print("llama_decode() failed during text generation")
} }
llama_synchronize(context)
} }
let t_tg_end = ggml_time_us() let t_tg_end = ggml_time_us()

View file

@ -63,12 +63,20 @@ Now both the LLaMA part and the image encoder is in the `llava-v1.5-7b` director
```console ```console
git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b
``` ```
2) Use `llava-surgery-v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models:
2) Install the required Python packages:
```sh
pip install -r examples/llava/requirements.txt
```
3) Use `llava-surgery-v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models:
```console ```console
python examples/llava/llava-surgery-v2.py -C -m ../llava-v1.6-vicuna-7b/ python examples/llava/llava-surgery-v2.py -C -m ../llava-v1.6-vicuna-7b/
``` ```
- you will find a llava.projector and a llava.clip file in your model directory - you will find a llava.projector and a llava.clip file in your model directory
3) Copy the llava.clip file into a subdirectory (like vit), rename it to pytorch_model.bin and add a fitting vit configuration to the directory:
4) Copy the llava.clip file into a subdirectory (like vit), rename it to pytorch_model.bin and add a fitting vit configuration to the directory:
```console ```console
mkdir vit mkdir vit
cp ../llava-v1.6-vicuna-7b/llava.clip vit/pytorch_model.bin cp ../llava-v1.6-vicuna-7b/llava.clip vit/pytorch_model.bin
@ -76,18 +84,18 @@ cp ../llava-v1.6-vicuna-7b/llava.projector vit/
curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.json -o vit/config.json curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.json -o vit/config.json
``` ```
4) Create the visual gguf model: 5) Create the visual gguf model:
```console ```console
python ./examples/llava/convert-image-encoder-to-gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision python ./examples/llava/convert-image-encoder-to-gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
``` ```
- This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP - This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP
5) Then convert the model to gguf format: 6) Then convert the model to gguf format:
```console ```console
python ./convert.py ../llava-v1.6-vicuna-7b/ --skip-unknown python ./convert.py ../llava-v1.6-vicuna-7b/ --skip-unknown
``` ```
6) And finally we can run the llava-cli using the 1.6 model version: 7) And finally we can run the llava-cli using the 1.6 model version:
```console ```console
./llava-cli -m ../llava-v1.6-vicuna-7b/ggml-model-f16.gguf --mmproj vit/mmproj-model-f16.gguf --image some-image.jpg -c 4096 ./llava-cli -m ../llava-v1.6-vicuna-7b/ggml-model-f16.gguf --mmproj vit/mmproj-model-f16.gguf --image some-image.jpg -c 4096
``` ```

View file

@ -995,6 +995,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
if (!new_clip->ctx_data) { if (!new_clip->ctx_data) {
fprintf(stderr, "%s: ggml_init() failed\n", __func__); fprintf(stderr, "%s: ggml_init() failed\n", __func__);
clip_free(new_clip); clip_free(new_clip);
gguf_free(ctx);
return nullptr; return nullptr;
} }
@ -1002,6 +1003,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
if (!fin) { if (!fin) {
printf("cannot open model file for loading tensors\n"); printf("cannot open model file for loading tensors\n");
clip_free(new_clip); clip_free(new_clip);
gguf_free(ctx);
return nullptr; return nullptr;
} }
@ -1023,6 +1025,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
if (!fin) { if (!fin) {
printf("%s: failed to seek for tensor %s\n", __func__, name); printf("%s: failed to seek for tensor %s\n", __func__, name);
clip_free(new_clip); clip_free(new_clip);
gguf_free(ctx);
return nullptr; return nullptr;
} }
int num_bytes = ggml_nbytes(cur); int num_bytes = ggml_nbytes(cur);
@ -1908,6 +1911,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
break; break;
default: default:
printf("Please use an input file in f32 or f16\n"); printf("Please use an input file in f32 or f16\n");
gguf_free(ctx_out);
return false; return false;
} }

View file

@ -878,6 +878,7 @@ int main(int argc, char ** argv) {
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true); const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false); const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true); const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str()); LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end()); embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end());

View file

@ -589,9 +589,10 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
} }
} }
const auto t_end = std::chrono::high_resolution_clock::now();
if (i == 0) { if (i == 0) {
llama_synchronize(ctx);
const auto t_end = std::chrono::high_resolution_clock::now();
const float t_total = std::chrono::duration<float>(t_end - t_start).count(); const float t_total = std::chrono::duration<float>(t_end - t_start).count();
fprintf(stderr, "%s: %.2f seconds per pass - ETA ", __func__, t_total); fprintf(stderr, "%s: %.2f seconds per pass - ETA ", __func__, t_total);
int total_seconds = (int)(t_total*n_chunk/n_seq); int total_seconds = (int)(t_total*n_chunk/n_seq);
@ -841,7 +842,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
const int n_batch = params.n_batch; const int n_batch = params.n_batch;
const int max_tasks_per_batch = 32; const int max_tasks_per_batch = 32;
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx)); const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq); llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
@ -1118,7 +1119,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
const int n_batch = params.n_batch; const int n_batch = params.n_batch;
const int max_tasks_per_batch = 128; const int max_tasks_per_batch = 128;
const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_max_seq(ctx)); const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq); llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
@ -1470,7 +1471,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
const int n_batch = params.n_batch; const int n_batch = params.n_batch;
const int max_tasks_per_batch = 32; const int max_tasks_per_batch = 32;
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx)); const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq); llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);

View file

@ -123,10 +123,10 @@ You can consume the endpoints with Postman or NodeJS with axios library. You can
### Docker ### Docker
```bash ```bash
docker run -p 8080:8080 -v /path/to/models:/models ggerganov/llama.cpp:server -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 docker run -p 8080:8080 -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:server -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080
# or, with CUDA: # or, with CUDA:
docker run -p 8080:8080 -v /path/to/models:/models --gpus all ggerganov/llama.cpp:server-cuda -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 --n-gpu-layers 99 docker run -p 8080:8080 -v /path/to/models:/models --gpus all ghcr.io/ggerganov/llama.cpp:server-cuda -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 --n-gpu-layers 99
``` ```
## Testing with CURL ## Testing with CURL
@ -556,9 +556,51 @@ Run with bash:
bash chat.sh bash chat.sh
``` ```
### API like OAI ### OAI-like API
The HTTP server supports OAI-like API The HTTP server supports OAI-like API: https://github.com/openai/openai-openapi
### API errors
Server returns error in the same format as OAI: https://github.com/openai/openai-openapi
Example of an error:
```json
{
"error": {
"code": 401,
"message": "Invalid API Key",
"type": "authentication_error"
}
}
```
Apart from error types supported by OAI, we also have custom types that are specific to functionalities of llama.cpp:
**When /metrics or /slots endpoint is disabled**
```json
{
"error": {
"code": 501,
"message": "This server does not support metrics endpoint.",
"type": "not_supported_error"
}
}
```
**When the server receives invalid grammar via */completions endpoint**
```json
{
"error": {
"code": 400,
"message": "Failed to parse grammar",
"type": "invalid_request_error"
}
}
```
### Extending or building alternative Web Front End ### Extending or building alternative Web Front End

View file

@ -231,255 +231,256 @@ unsigned char completion_js[] = {
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74,
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72,
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74,
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f,
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75,
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20,
0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x65, 0x6e, 0x74, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e,
0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67,
0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x65, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0x28, 0x27,
0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x69, 0x6c,
0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x61, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x61,
0x6e, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, 0x62, 0x6e, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, 0x62,
0x65, 0x20, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x20, 0x62, 0x79, 0x20, 0x65, 0x20, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x20, 0x62, 0x79, 0x20,
0x75, 0x70, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6c, 0x75, 0x70, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6c,
0x6c, 0x65, 0x72, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x72, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72,
0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x27, 0x6f, 0x77, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72, 0x6f, 0x72,
0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61,
0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72,
0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70,
0x72, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x5b, 0x24, 0x7b, 0x72,
0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e,
0x74, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x64, 0x65, 0x7d, 0x20, 0x2d, 0x20, 0x24, 0x7b, 0x72, 0x65,
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x74,
0x79, 0x70, 0x65, 0x7d, 0x5d, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73,
0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65,
0x73, 0x73, 0x61, 0x67, 0x65, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x28, 0x65, 0x29,
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d,
0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20,
0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72,
0x6f, 0x72, 0x7d, 0x60, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65,
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20,
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72,
0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61,
0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65,
0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c,
0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f,
0x72, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e,
0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
0x74, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20,
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a,
0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61,
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72,
0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x45, 0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20,
0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d,
0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x20, 0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20,
0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x65, 0x29, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20,
0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79,
0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x7b, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28,
0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29, 0x3b, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65,
0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20,
0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x74, 0x61, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20,
0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x79, 0x6f, 0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73,
0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73, 0x63, 0x72, 0x63, 0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a,
0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a,
0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70,
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x6f, 0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45,
0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d,
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20, 0x66, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70,
0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f,
0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61,
0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x74, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f,
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64,
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45, 0x76, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65,
0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x28, 0x72, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c,
0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20,
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f,
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65,
0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69,
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x2e, 0x6c, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f,
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65,
0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d,
0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d,
0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20,
0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x63, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77,
0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x20, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x29, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63,
0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66,
0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f,
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66,
0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63,
0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e,
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64,
0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b,
0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65,
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73,
0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e,
0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65,
0x6e, 0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22,
0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20,
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d,
0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68,
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e,
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68,
0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75,
0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67,
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64,
0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e,
0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61,
0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77,
0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74,
0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28, 0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20,
0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68,
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29, 0x29, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d,
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68,
0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75,
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64,
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x6f, 0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61,
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x69, 0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65, 0x6e, 0x74, 0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d,
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x29, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c,
0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74,
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x75, 0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73,
0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x76, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63,
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78,
0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x74, 0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73,
0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x20, 0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74,
0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x20, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f,
0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65,
0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65,
0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65,
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x6e, 0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20,
0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77,
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a,
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72,
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f, 0x6e, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61,
0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x28, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72,
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20,
0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x3b, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72,
0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63,
0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72,
0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f,
0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20,
0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e, 0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67,
0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20, 0x6e, 0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x73, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77,
0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f, 0x0a, 0x20, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79,
0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x6e, 0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c,
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20,
0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f,
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a,
0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69,
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75,
0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28,
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61,
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a, 0x2f, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29,
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68,
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e,
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f,
0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x6c, 0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63,
0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x68, 0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a,
0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74,
0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a,
0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x0a, 0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70,
0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f,
0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x50, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73,
0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c,
0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20,
0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e,
0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c,
0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x6c, 0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20,
0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61,
0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x72, 0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c,
0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29,
0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c,
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x62, 0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b,
0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a, 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74,
0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54,
0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75,
0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x65, 0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e,
0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x67, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78,
0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x2f, 0x74, 0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64,
0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72, 0x65, 0x20, 0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f,
0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a, 0x65, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61,
0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20,
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d,
0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x70, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67,
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62,
0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63,
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f,
0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72, 0x61,
0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70,
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e,
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29, 0x20,
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61,
0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a, 0x20,
0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x74,
0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20, 0x69,
0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65,
0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68, 0x69,
0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20,
0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x20,
0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x20,
0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73,
0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74,
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20,
0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20,
0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e,
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x20,
0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63,
0x68, 0x28, 0x22, 0x2f, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x22, 0x29, 0x2e,
0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72, 0x2e,
0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x70,
0x72, 0x6f, 0x70, 0x73, 0x2e, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74,
0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20,
0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67,
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70,
0x73, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65,
0x74, 0x63, 0x68, 0x28, 0x22, 0x2f, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x22,
0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20,
0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20,
0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d,
0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x2e, 0x64, 0x65, 0x66, 0x61, 0x75,
0x6c, 0x74, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a,
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
}; };
unsigned int completion_js_len = 5782; unsigned int completion_js_len = 5796;

View file

@ -96,17 +96,17 @@ export async function* llama(prompt, params = {}, config = {}) {
} }
} }
if (result.error) { if (result.error) {
try {
result.error = JSON.parse(result.error); result.error = JSON.parse(result.error);
if (result.error.content.includes('slot unavailable')) { if (result.error.message.includes('slot unavailable')) {
// Throw an error to be caught by upstream callers // Throw an error to be caught by upstream callers
throw new Error('slot unavailable'); throw new Error('slot unavailable');
} else { } else {
console.error(`llama.cpp error: ${result.error.content}`); console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
} }
} catch(e) {
console.error(`llama.cpp error ${result.error}`)
} }
if (result.error) {
result.error = JSON.parse(result.error);
console.error(`llama.cpp error: ${result.error.content}`);
} }
} }
} }

View file

@ -148,7 +148,7 @@ struct server_slot {
int32_t n_decoded = 0; int32_t n_decoded = 0;
int32_t n_remaining = -1; int32_t n_remaining = -1;
int32_t i_batch = -1; int32_t i_batch = -1;
int32_t n_predict = -1; int32_t n_predict = -1; // TODO: disambiguate from params.n_predict
int32_t n_prompt_tokens = 0; int32_t n_prompt_tokens = 0;
int32_t n_prompt_tokens_processed = 0; int32_t n_prompt_tokens_processed = 0;
@ -398,7 +398,7 @@ struct server_queue {
// callback functions // callback functions
std::function<void(server_task &)> callback_new_task; std::function<void(server_task &)> callback_new_task;
std::function<void(server_task_multi &)> callback_finish_multitask; std::function<void(server_task_multi &)> callback_finish_multitask;
std::function<void(void)> callback_run_slots; std::function<void(void)> callback_update_slots;
// Add a new task to the end of the queue // Add a new task to the end of the queue
int post(server_task task) { int post(server_task task) {
@ -437,8 +437,8 @@ struct server_queue {
} }
// Register the function to be called when all slots data is ready to be processed // Register the function to be called when all slots data is ready to be processed
void on_run_slots(std::function<void(void)> callback) { void on_update_slots(std::function<void(void)> callback) {
callback_run_slots = std::move(callback); callback_update_slots = std::move(callback);
} }
// Call when the state of one slot is changed // Call when the state of one slot is changed
@ -463,7 +463,7 @@ struct server_queue {
* - Wait until a new task arrives * - Wait until a new task arrives
* - Process the task (i.e. maybe copy data into slot) * - Process the task (i.e. maybe copy data into slot)
* - Check if multitask is finished * - Check if multitask is finished
* - Run all slots * - Update all slots
*/ */
void start_loop() { void start_loop() {
running = true; running = true;
@ -501,9 +501,9 @@ struct server_queue {
} }
// all tasks in the current loop is processed, slots data is now ready // all tasks in the current loop is processed, slots data is now ready
LOG_VERBOSE("callback_run_slots", {}); LOG_VERBOSE("callback_update_slots", {});
callback_run_slots(); callback_update_slots();
LOG_VERBOSE("wait for new task", {}); LOG_VERBOSE("wait for new task", {});
{ {
@ -741,7 +741,13 @@ struct server_context {
default_generation_settings_for_props = get_formated_generation(slots.front()); default_generation_settings_for_props = get_formated_generation(slots.front());
default_generation_settings_for_props["seed"] = -1; default_generation_settings_for_props["seed"] = -1;
batch = llama_batch_init(n_ctx, 0, params.n_parallel); // the update_slots() logic will always submit a maximum of n_batch tokens
// note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
{
const int32_t n_batch = llama_n_batch(ctx);
batch = llama_batch_init(n_batch, 0, params.n_parallel);
}
metrics.init(); metrics.init();
} }
@ -807,9 +813,10 @@ struct server_context {
return last_used; return last_used;
} }
bool launch_slot_with_data(server_slot & slot, json data) const { bool launch_slot_with_task(server_slot & slot, const server_task & task) {
slot_params default_params; slot_params default_params;
llama_sampling_params default_sparams; llama_sampling_params default_sparams;
auto & data = task.data;
if (data.count("__oaicompat") != 0) { if (data.count("__oaicompat") != 0) {
slot.oaicompat = true; slot.oaicompat = true;
@ -871,10 +878,15 @@ struct server_context {
{ {
const auto & prompt = data.find("prompt"); const auto & prompt = data.find("prompt");
if (prompt == data.end()) { if (prompt == data.end()) {
slot.prompt = ""; send_error(task, "Either \"prompt\" or \"messages\" must be provided", ERROR_TYPE_INVALID_REQUEST);
return false;
} else { } else {
slot.prompt = *prompt; slot.prompt = *prompt;
} }
if (slot.prompt.is_array() && slot.prompt.size() == 0) {
send_error(task, "\"prompt\" cannot be an empty array", ERROR_TYPE_INVALID_REQUEST);
return false;
}
} }
// penalize user-provided tokens // penalize user-provided tokens
@ -933,6 +945,7 @@ struct server_context {
if (logit_bias != data.end() && logit_bias->is_array()) { if (logit_bias != data.end() && logit_bias->is_array()) {
const int n_vocab = llama_n_vocab(model); const int n_vocab = llama_n_vocab(model);
for (const auto & el : *logit_bias) { for (const auto & el : *logit_bias) {
// TODO: we may want to throw errors here, in case "el" is incorrect
if (el.is_array() && el.size() == 2) { if (el.is_array() && el.size() == 2) {
float bias; float bias;
if (el[1].is_number()) { if (el[1].is_number()) {
@ -992,6 +1005,11 @@ struct server_context {
llama_sampling_free(slot.ctx_sampling); llama_sampling_free(slot.ctx_sampling);
} }
slot.ctx_sampling = llama_sampling_init(slot.sparams); slot.ctx_sampling = llama_sampling_init(slot.sparams);
if (slot.ctx_sampling == nullptr) {
// for now, the only error that may happen here is invalid grammar
send_error(task, "Failed to parse grammar", ERROR_TYPE_INVALID_REQUEST);
return false;
}
llama_set_rng_seed(ctx, slot.params.seed); llama_set_rng_seed(ctx, slot.params.seed);
} }
@ -1031,8 +1049,10 @@ struct server_context {
llama_batch_add(batch, system_tokens[i], i, { 0 }, false); llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
} }
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch) { const int32_t n_batch = llama_n_batch(ctx);
const int32_t n_tokens = std::min(params.n_batch, (int32_t) (batch.n_tokens - i));
for (int32_t i = 0; i < batch.n_tokens; i += n_batch) {
const int32_t n_tokens = std::min(params.n_batch, batch.n_tokens - i);
llama_batch batch_view = { llama_batch batch_view = {
n_tokens, n_tokens,
batch.token + i, batch.token + i,
@ -1221,7 +1241,7 @@ struct server_context {
{"mirostat_eta", slot.sparams.mirostat_eta}, {"mirostat_eta", slot.sparams.mirostat_eta},
{"penalize_nl", slot.sparams.penalize_nl}, {"penalize_nl", slot.sparams.penalize_nl},
{"stop", slot.params.antiprompt}, {"stop", slot.params.antiprompt},
{"n_predict", slot.params.n_predict}, {"n_predict", slot.params.n_predict}, // TODO: fix duplicate key n_predict
{"n_keep", params.n_keep}, {"n_keep", params.n_keep},
{"ignore_eos", ignore_eos}, {"ignore_eos", ignore_eos},
{"stream", slot.params.stream}, {"stream", slot.params.stream},
@ -1233,15 +1253,23 @@ struct server_context {
}; };
} }
void send_error(const server_task & task, const std::string & error) { void send_error(const server_task & task, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) {
LOG_TEE("task %i - error: %s\n", task.id, error.c_str()); send_error(task.id, task.id_multi, error, type);
}
void send_error(const server_slot & slot, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) {
send_error(slot.id_task, slot.id_multi, error, type);
}
void send_error(const int id_task, const int id_multi, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) {
LOG_TEE("task %i - error: %s\n", id_task, error.c_str());
server_task_result res; server_task_result res;
res.id = task.id; res.id = id_task;
res.id_multi = task.id_multi; res.id_multi = id_multi;
res.stop = false; res.stop = false;
res.error = true; res.error = true;
res.data = { { "content", error } }; res.data = format_error_response(error, type);
queue_results.send(res); queue_results.send(res);
} }
@ -1475,9 +1503,8 @@ struct server_context {
slot->infill = task.infill; slot->infill = task.infill;
slot->embedding = task.embedding; slot->embedding = task.embedding;
if (!launch_slot_with_data(*slot, task.data)) { if (!launch_slot_with_task(*slot, task)) {
// send error result LOG_ERROR("error while launching slot", task.data);
send_error(task, "internal_error");
break; break;
} }
} break; } break;
@ -1594,7 +1621,7 @@ struct server_context {
queue_results.send(result); queue_results.send(result);
} }
bool update_slots() { void update_slots() {
if (system_need_update) { if (system_need_update) {
system_prompt_update(); system_prompt_update();
} }
@ -1637,7 +1664,7 @@ struct server_context {
kv_cache_clear(); kv_cache_clear();
} }
return true; return;
} }
} }
@ -1726,7 +1753,8 @@ struct server_context {
} }
// process in chunks of params.n_batch // process in chunks of params.n_batch
int32_t n_batch = params.n_batch; int32_t n_batch = llama_n_batch(ctx);
int32_t n_ubatch = llama_n_ubatch(ctx);
// next, batch any pending prompts without exceeding n_batch // next, batch any pending prompts without exceeding n_batch
if (params.cont_batching || batch.n_tokens == 0) { if (params.cont_batching || batch.n_tokens == 0) {
@ -1799,7 +1827,7 @@ struct server_context {
if (slot.embedding) { if (slot.embedding) {
// this prompt is too large to process - discard it // this prompt is too large to process - discard it
if (slot.n_prompt_tokens > n_batch) { if (slot.n_prompt_tokens > n_ubatch) {
slot.state = SLOT_STATE_PROCESSING; slot.state = SLOT_STATE_PROCESSING;
slot.command = SLOT_COMMAND_NONE; slot.command = SLOT_COMMAND_NONE;
slot.release(); slot.release();
@ -1982,8 +2010,7 @@ struct server_context {
if (batch.n_tokens == 0) { if (batch.n_tokens == 0) {
LOG_VERBOSE("no tokens to decode", {}); LOG_VERBOSE("no tokens to decode", {});
return;
return true;
} }
LOG_VERBOSE("decoding batch", { LOG_VERBOSE("decoding batch", {
@ -2040,7 +2067,13 @@ struct server_context {
if (n_batch == 1 || ret < 0) { if (n_batch == 1 || ret < 0) {
// if you get here, it means the KV cache is full - try increasing it via the context size // if you get here, it means the KV cache is full - try increasing it via the context size
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret); LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
return false; for (auto & slot : slots) {
slot.state = SLOT_STATE_PROCESSING;
slot.command = SLOT_COMMAND_NONE;
slot.release();
send_error(slot, "Input prompt is too big compared to KV size. Please try increasing KV size.");
}
break; // break loop of n_batch
} }
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2); LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
@ -2049,12 +2082,12 @@ struct server_context {
n_batch /= 2; n_batch /= 2;
i -= n_batch; i -= n_batch;
continue; continue; // continue loop of n_batch
} }
for (auto & slot : slots) { for (auto & slot : slots) {
if (slot.state != SLOT_STATE_PROCESSING || slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) { if (slot.state != SLOT_STATE_PROCESSING || slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) {
continue; continue; // continue loop of slots
} }
// prompt evaluated for embedding // prompt evaluated for embedding
@ -2062,7 +2095,7 @@ struct server_context {
send_embedding(slot, batch_view); send_embedding(slot, batch_view);
slot.release(); slot.release();
slot.i_batch = -1; slot.i_batch = -1;
continue; continue; // continue loop of slots
} }
completion_token_output result; completion_token_output result;
@ -2104,9 +2137,7 @@ struct server_context {
} }
} }
LOG_VERBOSE("slots updated", {}); LOG_VERBOSE("run slots completed", {});
return true;
} }
json model_meta() const { json model_meta() const {
@ -2142,7 +2173,8 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
printf(" --pooling {none,mean,cls} pooling type for embeddings, use model default if unspecified\n"); printf(" --pooling {none,mean,cls} pooling type for embeddings, use model default if unspecified\n");
printf(" -dt N, --defrag-thold N\n"); printf(" -dt N, --defrag-thold N\n");
printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold); printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold);
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
printf(" -ub N, --ubatch-size N physical maximum batch size (default: %d)\n", params.n_ubatch);
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n"); printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
printf(" not recommended: doubles context memory required and no measurable increase in quality\n"); printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
if (llama_supports_mlock()) { if (llama_supports_mlock()) {
@ -2409,6 +2441,12 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
break; break;
} }
params.n_batch = std::stoi(argv[i]); params.n_batch = std::stoi(argv[i]);
} else if (arg == "-ub" || arg == "--ubatch-size") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.n_ubatch = std::stoi(argv[i]);
} else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") { } else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
@ -2748,36 +2786,37 @@ int main(int argc, char ** argv) {
res.set_header("Access-Control-Allow-Credentials", "true"); res.set_header("Access-Control-Allow-Credentials", "true");
res.set_header("Access-Control-Allow-Methods", "POST"); res.set_header("Access-Control-Allow-Methods", "POST");
res.set_header("Access-Control-Allow-Headers", "*"); res.set_header("Access-Control-Allow-Headers", "*");
return res.set_content("", "application/json; charset=utf-8");
}); });
svr->set_logger(log_server_request); svr->set_logger(log_server_request);
svr->set_exception_handler([](const httplib::Request &, httplib::Response & res, std::exception_ptr ep) { auto res_error = [](httplib::Response & res, json error_data) {
const char fmt[] = "500 Internal Server Error\n%s"; json final_response {{"error", error_data}};
res.set_content(final_response.dump(), "application/json; charset=utf-8");
res.status = json_value(error_data, "code", 500);
};
char buf[BUFSIZ]; svr->set_exception_handler([&res_error](const httplib::Request &, httplib::Response & res, std::exception_ptr ep) {
std::string message;
try { try {
std::rethrow_exception(std::move(ep)); std::rethrow_exception(std::move(ep));
} catch (std::exception &e) { } catch (std::exception & e) {
snprintf(buf, sizeof(buf), fmt, e.what()); message = e.what();
} catch (...) { } catch (...) {
snprintf(buf, sizeof(buf), fmt, "Unknown Exception"); message = "Unknown Exception";
} }
res.set_content(buf, "text/plain; charset=utf-8"); json formatted_error = format_error_response(message, ERROR_TYPE_SERVER);
res.status = 500; LOG_VERBOSE("Got exception", formatted_error);
res_error(res, formatted_error);
}); });
svr->set_error_handler([](const httplib::Request &, httplib::Response & res) { svr->set_error_handler([&res_error](const httplib::Request &, httplib::Response & res) {
if (res.status == 401) {
res.set_content("Unauthorized", "text/plain; charset=utf-8");
}
if (res.status == 400) {
res.set_content("Invalid request", "text/plain; charset=utf-8");
}
if (res.status == 404) { if (res.status == 404) {
res.set_content("File Not Found", "text/plain; charset=utf-8"); res_error(res, format_error_response("File Not Found", ERROR_TYPE_NOT_FOUND));
} }
// for other error codes, we skip processing here because it's already done by res_error()
}); });
// set timeouts and change hostname and port // set timeouts and change hostname and port
@ -2842,7 +2881,7 @@ int main(int argc, char ** argv) {
// Middlewares // Middlewares
// //
auto middleware_validate_api_key = [&sparams](const httplib::Request & req, httplib::Response & res) { auto middleware_validate_api_key = [&sparams, &res_error](const httplib::Request & req, httplib::Response & res) {
// TODO: should we apply API key to all endpoints, including "/health" and "/models"? // TODO: should we apply API key to all endpoints, including "/health" and "/models"?
static const std::set<std::string> protected_endpoints = { static const std::set<std::string> protected_endpoints = {
"/props", "/props",
@ -2883,8 +2922,7 @@ int main(int argc, char ** argv) {
// API key is invalid or not provided // API key is invalid or not provided
// TODO: make another middleware for CORS related logic // TODO: make another middleware for CORS related logic
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
res.set_content("Unauthorized: Invalid API Key", "text/plain; charset=utf-8"); res_error(res, format_error_response("Invalid API Key", ERROR_TYPE_AUTHENTICATION));
res.status = 401; // Unauthorized
LOG_WARNING("Unauthorized: Invalid API Key", {}); LOG_WARNING("Unauthorized: Invalid API Key", {});
@ -2947,21 +2985,18 @@ int main(int argc, char ** argv) {
} }
case SERVER_STATE_LOADING_MODEL: case SERVER_STATE_LOADING_MODEL:
{ {
res.set_content(R"({"status": "loading model"})", "application/json"); res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
res.status = 503; // HTTP Service Unavailable
} break; } break;
case SERVER_STATE_ERROR: case SERVER_STATE_ERROR:
{ {
res.set_content(R"({"status": "error", "error": "Model failed to load"})", "application/json"); res_error(res, format_error_response("Model failed to load", ERROR_TYPE_SERVER));
res.status = 500; // HTTP Internal Server Error
} break; } break;
} }
}; };
const auto handle_slots = [&](const httplib::Request &, httplib::Response & res) { const auto handle_slots = [&](const httplib::Request &, httplib::Response & res) {
if (!sparams.slots_endpoint) { if (!sparams.slots_endpoint) {
res.status = 501; res_error(res, format_error_response("This server does not support slots endpoint.", ERROR_TYPE_NOT_SUPPORTED));
res.set_content("This server does not support slots endpoint.", "text/plain; charset=utf-8");
return; return;
} }
@ -2985,8 +3020,7 @@ int main(int argc, char ** argv) {
const auto handle_metrics = [&](const httplib::Request &, httplib::Response & res) { const auto handle_metrics = [&](const httplib::Request &, httplib::Response & res) {
if (!sparams.metrics_endpoint) { if (!sparams.metrics_endpoint) {
res.status = 501; res_error(res, format_error_response("This server does not support metrics endpoint.", ERROR_TYPE_NOT_SUPPORTED));
res.set_content("This server does not support metrics endpoint.", "text/plain; charset=utf-8");
return; return;
} }
@ -3097,7 +3131,7 @@ int main(int argc, char ** argv) {
res.set_content(data.dump(), "application/json; charset=utf-8"); res.set_content(data.dump(), "application/json; charset=utf-8");
}; };
const auto handle_completions = [&ctx_server](const httplib::Request & req, httplib::Response & res) { const auto handle_completions = [&ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) {
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
json data = json::parse(req.body); json data = json::parse(req.body);
@ -3112,8 +3146,7 @@ int main(int argc, char ** argv) {
if (!result.error && result.stop) { if (!result.error && result.stop) {
res.set_content(result.data.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8"); res.set_content(result.data.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
} else { } else {
res.status = 500; res_error(res, result.data);
res.set_content(result.data["content"], "text/plain; charset=utf-8");
} }
ctx_server.queue_results.remove_waiting_task_id(id_task); ctx_server.queue_results.remove_waiting_task_id(id_task);
@ -3193,7 +3226,7 @@ int main(int argc, char ** argv) {
res.set_content(models.dump(), "application/json; charset=utf-8"); res.set_content(models.dump(), "application/json; charset=utf-8");
}; };
const auto handle_chat_completions = [&ctx_server, &sparams](const httplib::Request & req, httplib::Response & res) { const auto handle_chat_completions = [&ctx_server, &sparams, &res_error](const httplib::Request & req, httplib::Response & res) {
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
json data = oaicompat_completion_params_parse(ctx_server.model, json::parse(req.body), sparams.chat_template); json data = oaicompat_completion_params_parse(ctx_server.model, json::parse(req.body), sparams.chat_template);
@ -3202,24 +3235,24 @@ int main(int argc, char ** argv) {
ctx_server.queue_results.add_waiting_task_id(id_task); ctx_server.queue_results.add_waiting_task_id(id_task);
ctx_server.request_completion(id_task, -1, data, false, false); ctx_server.request_completion(id_task, -1, data, false, false);
const auto completion_id = gen_chatcmplid();
if (!json_value(data, "stream", false)) { if (!json_value(data, "stream", false)) {
server_task_result result = ctx_server.queue_results.recv(id_task); server_task_result result = ctx_server.queue_results.recv(id_task);
if (!result.error && result.stop) { if (!result.error && result.stop) {
json result_oai = format_final_response_oaicompat(data, result.data); json result_oai = format_final_response_oaicompat(data, result.data, completion_id);
res.set_content(result_oai.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8"); res.set_content(result_oai.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
} else { } else {
res.status = 500; res_error(res, result.data);
res.set_content(result.data["content"], "text/plain; charset=utf-8");
} }
ctx_server.queue_results.remove_waiting_task_id(id_task); ctx_server.queue_results.remove_waiting_task_id(id_task);
} else { } else {
const auto chunked_content_provider = [id_task, &ctx_server](size_t, httplib::DataSink & sink) { const auto chunked_content_provider = [id_task, &ctx_server, completion_id](size_t, httplib::DataSink & sink) {
while (true) { while (true) {
server_task_result result = ctx_server.queue_results.recv(id_task); server_task_result result = ctx_server.queue_results.recv(id_task);
if (!result.error) { if (!result.error) {
std::vector<json> result_array = format_partial_response_oaicompat(result.data); std::vector<json> result_array = format_partial_response_oaicompat(result.data, completion_id);
for (auto it = result_array.begin(); it != result_array.end(); ++it) { for (auto it = result_array.begin(); it != result_array.end(); ++it) {
if (!it->empty()) { if (!it->empty()) {
@ -3265,7 +3298,7 @@ int main(int argc, char ** argv) {
} }
}; };
const auto handle_infill = [&ctx_server](const httplib::Request & req, httplib::Response & res) { const auto handle_infill = [&ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) {
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
json data = json::parse(req.body); json data = json::parse(req.body);
@ -3280,8 +3313,7 @@ int main(int argc, char ** argv) {
if (!result.error && result.stop) { if (!result.error && result.stop) {
res.set_content(result.data.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8"); res.set_content(result.data.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
} else { } else {
res.status = 404; res_error(res, result.data);
res.set_content(result.data["content"], "text/plain; charset=utf-8");
} }
ctx_server.queue_results.remove_waiting_task_id(id_task); ctx_server.queue_results.remove_waiting_task_id(id_task);
@ -3352,7 +3384,7 @@ int main(int argc, char ** argv) {
return res.set_content(data.dump(), "application/json; charset=utf-8"); return res.set_content(data.dump(), "application/json; charset=utf-8");
}; };
const auto handle_embeddings = [&params, &ctx_server](const httplib::Request & req, httplib::Response & res) { const auto handle_embeddings = [&params, &ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) {
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin")); res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
if (!params.embedding) { if (!params.embedding) {
res.status = 501; res.status = 501;
@ -3363,65 +3395,58 @@ int main(int argc, char ** argv) {
const json body = json::parse(req.body); const json body = json::parse(req.body);
bool is_openai = false; bool is_openai = false;
// an input prompt can string or a list of tokens (integer) // an input prompt can be a string or a list of tokens (integer)
std::vector<json> prompts; json prompt;
if (body.count("input") != 0) { if (body.count("input") != 0) {
is_openai = true; is_openai = true;
if (body["input"].is_array()) { prompt = body["input"];
// support multiple prompts
for (const json & elem : body["input"]) {
prompts.push_back(elem);
}
} else {
// single input prompt
prompts.push_back(body["input"]);
}
} else if (body.count("content") != 0) { } else if (body.count("content") != 0) {
// only support single prompt here // with "content", we only support single prompt
std::string content = body["content"]; prompt = std::vector<std::string>{body["content"]};
prompts.push_back(content);
} else { } else {
// TODO @ngxson : should return an error here res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST));
prompts.push_back(""); return;
} }
// process all prompts
json responses = json::array();
for (auto & prompt : prompts) {
// TODO @ngxson : maybe support multitask for this endpoint?
// create and queue the task // create and queue the task
json responses;
{
const int id_task = ctx_server.queue_tasks.get_new_id(); const int id_task = ctx_server.queue_tasks.get_new_id();
ctx_server.queue_results.add_waiting_task_id(id_task); ctx_server.queue_results.add_waiting_task_id(id_task);
ctx_server.request_completion(id_task, -1, { {"prompt", prompt}, { "n_predict", 0}}, false, true); ctx_server.request_completion(id_task, -1, {{"prompt", prompt}}, false, true);
// get the result // get the result
server_task_result result = ctx_server.queue_results.recv(id_task); server_task_result result = ctx_server.queue_results.recv(id_task);
ctx_server.queue_results.remove_waiting_task_id(id_task); ctx_server.queue_results.remove_waiting_task_id(id_task);
if (!result.error) {
// append to the responses if (result.data.count("results")) {
responses.push_back(result.data); // result for multi-task
responses = result.data["results"];
} else {
// result for single task
responses = std::vector<json>{result.data};
}
} else {
// error received, ignore everything else
res_error(res, result.data);
return;
}
} }
// write JSON response // write JSON response
json root; json root = is_openai
if (is_openai) { ? format_embeddings_response_oaicompat(body, responses)
json res_oai = json::array(); : responses[0];
int i = 0;
for (auto & elem : responses) {
res_oai.push_back(json{
{"embedding", json_value(elem, "embedding", json::array())},
{"index", i++},
{"object", "embedding"}
});
}
root = format_embeddings_response_oaicompat(body, res_oai);
} else {
root = responses[0];
}
return res.set_content(root.dump(), "application/json; charset=utf-8"); return res.set_content(root.dump(), "application/json; charset=utf-8");
}; };
auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) {
return [content, len, mime_type](const httplib::Request &, httplib::Response & res) {
res.set_content(reinterpret_cast<const char*>(content), len, mime_type);
return false;
};
};
// //
// Router // Router
// //
@ -3433,17 +3458,6 @@ int main(int argc, char ** argv) {
} }
// using embedded static files // using embedded static files
auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) {
return [content, len, mime_type](const httplib::Request &, httplib::Response & res) {
res.set_content(reinterpret_cast<const char*>(content), len, mime_type);
return false;
};
};
svr->Options(R"(/.*)", [](const httplib::Request &, httplib::Response & res) {
// TODO @ngxson : I have no idea what it is... maybe this is redundant?
return res.set_content("", "application/json; charset=utf-8");
});
svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8")); svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8"));
svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8")); svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8"));
svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8")); svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8"));
@ -3494,7 +3508,7 @@ int main(int argc, char ** argv) {
&server_context::process_single_task, &ctx_server, std::placeholders::_1)); &server_context::process_single_task, &ctx_server, std::placeholders::_1));
ctx_server.queue_tasks.on_finish_multitask(std::bind( ctx_server.queue_tasks.on_finish_multitask(std::bind(
&server_context::on_finish_multitask, &ctx_server, std::placeholders::_1)); &server_context::on_finish_multitask, &ctx_server, std::placeholders::_1));
ctx_server.queue_tasks.on_run_slots(std::bind( ctx_server.queue_tasks.on_update_slots(std::bind(
&server_context::update_slots, &ctx_server)); &server_context::update_slots, &ctx_server));
ctx_server.queue_results.on_multitask_update(std::bind( ctx_server.queue_results.on_multitask_update(std::bind(
&server_queue::update_multitask, &server_queue::update_multitask,

View file

@ -9,6 +9,7 @@ Feature: llama.cpp server
And 42 as server seed And 42 as server seed
And 2 slots And 2 slots
And 1024 as batch size And 1024 as batch size
And 1024 as ubatch size
And 2048 KV cache size And 2048 KV cache size
And embeddings extraction And embeddings extraction
Then the server is starting Then the server is starting

View file

@ -1,9 +1,10 @@
import errno
import os import os
import socket import socket
import subprocess import subprocess
import time import time
from contextlib import closing from contextlib import closing
from signal import SIGKILL import signal
def before_scenario(context, scenario): def before_scenario(context, scenario):
@ -29,41 +30,68 @@ def after_scenario(context, scenario):
for line in f: for line in f:
print(line) print(line)
if not is_server_listening(context.server_fqdn, context.server_port): if not is_server_listening(context.server_fqdn, context.server_port):
print("\x1b[33;101mERROR: Server stopped listening\x1b[0m") print("\x1b[33;101mERROR: Server stopped listening\x1b[0m\n")
if not pid_exists(context.server_process.pid): if not pid_exists(context.server_process.pid):
assert False, f"Server not running pid={context.server_process.pid} ..." assert False, f"Server not running pid={context.server_process.pid} ..."
print(f"stopping server pid={context.server_process.pid} ...") server_graceful_shutdown(context)
context.server_process.kill()
# Wait few for socket to free up # Wait few for socket to free up
time.sleep(0.05) time.sleep(0.05)
attempts = 0 attempts = 0
while is_server_listening(context.server_fqdn, context.server_port): while pid_exists(context.server_process.pid) or is_server_listening(context.server_fqdn, context.server_port):
print(f"stopping server pid={context.server_process.pid} ...") server_kill(context)
os.kill(context.server_process.pid, SIGKILL)
time.sleep(0.1) time.sleep(0.1)
attempts += 1 attempts += 1
if attempts > 5: if attempts > 5:
print(f"Server dangling exits, killing all {context.server_path} ...") server_kill_hard(context)
process = subprocess.run(['killall', '-9', context.server_path],
stderr=subprocess.PIPE,
universal_newlines=True) def server_graceful_shutdown(context):
print(f"shutting down server pid={context.server_process.pid} ...\n")
if os.name == 'nt':
os.kill(context.server_process.pid, signal.CTRL_C_EVENT)
else:
os.kill(context.server_process.pid, signal.SIGINT)
def server_kill(context):
print(f"killing server pid={context.server_process.pid} ...\n")
context.server_process.kill()
def server_kill_hard(context):
pid = context.server_process.pid
path = context.server_path
print(f"Server dangling exits, hard killing force {pid}={path}...\n")
if os.name == 'nt':
process = subprocess.check_output(['taskkill', '/F', '/pid', str(pid)]).decode()
print(process) print(process)
else:
os.kill(-pid, signal.SIGKILL)
def is_server_listening(server_fqdn, server_port): def is_server_listening(server_fqdn, server_port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
result = sock.connect_ex((server_fqdn, server_port)) result = sock.connect_ex((server_fqdn, server_port))
return result == 0 _is_server_listening = result == 0
if _is_server_listening:
print(f"server is listening on {server_fqdn}:{server_port}...\n")
return _is_server_listening
def pid_exists(pid): def pid_exists(pid):
"""Check whether pid exists in the current process table.""" """Check whether pid exists in the current process table."""
import errno
if pid < 0: if pid < 0:
return False return False
if os.name == 'nt':
output = subprocess.check_output(['TASKLIST', '/FI', f'pid eq {pid}']).decode()
print(output)
return "No tasks are running" not in output
else:
try: try:
os.kill(pid, 0) os.kill(pid, 0)
except OSError as e: except OSError as e:

View file

@ -47,7 +47,7 @@ Feature: llama.cpp server
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
""" """
And a completion request with no api error And a completion request with no api error
Then 64 tokens are predicted matching fun|Annaks|popcorns Then 64 tokens are predicted matching fun|Annaks|popcorns|pictry
And the completion is truncated And the completion is truncated
And 109 prompt tokens are processed And 109 prompt tokens are processed

View file

@ -18,7 +18,7 @@ from huggingface_hub import hf_hub_download
from prometheus_client import parser from prometheus_client import parser
@step(u"a server listening on {server_fqdn}:{server_port}") @step("a server listening on {server_fqdn}:{server_port}")
def step_server_config(context, server_fqdn, server_port): def step_server_config(context, server_fqdn, server_port):
context.server_fqdn = server_fqdn context.server_fqdn = server_fqdn
context.server_port = int(server_port) context.server_port = int(server_port)
@ -33,6 +33,7 @@ def step_server_config(context, server_fqdn, server_port):
context.model_alias = None context.model_alias = None
context.n_batch = None context.n_batch = None
context.n_ubatch = None
context.n_ctx = None context.n_ctx = None
context.n_ga = None context.n_ga = None
context.n_ga_w = None context.n_ga_w = None
@ -57,24 +58,24 @@ def step_server_config(context, server_fqdn, server_port):
context.prompts = [] context.prompts = []
@step(u'a model file {hf_file} from HF repo {hf_repo}') @step('a model file {hf_file} from HF repo {hf_repo}')
def step_download_hf_model(context, hf_file, hf_repo): def step_download_hf_model(context, hf_file, hf_repo):
context.model_file = hf_hub_download(repo_id=hf_repo, filename=hf_file) context.model_file = hf_hub_download(repo_id=hf_repo, filename=hf_file)
if context.debug: if context.debug:
print(f"model file: {context.model_file}\n") print(f"model file: {context.model_file}\n")
@step(u'a model alias {model_alias}') @step('a model alias {model_alias}')
def step_model_alias(context, model_alias): def step_model_alias(context, model_alias):
context.model_alias = model_alias context.model_alias = model_alias
@step(u'{seed:d} as server seed') @step('{seed:d} as server seed')
def step_seed(context, seed): def step_seed(context, seed):
context.server_seed = seed context.server_seed = seed
@step(u'{ngl:d} GPU offloaded layers') @step('{ngl:d} GPU offloaded layers')
def step_n_gpu_layer(context, ngl): def step_n_gpu_layer(context, ngl):
if 'N_GPU_LAYERS' in os.environ: if 'N_GPU_LAYERS' in os.environ:
new_ngl = int(os.environ['N_GPU_LAYERS']) new_ngl = int(os.environ['N_GPU_LAYERS'])
@ -84,40 +85,44 @@ def step_n_gpu_layer(context, ngl):
context.n_gpu_layer = ngl context.n_gpu_layer = ngl
@step(u'{n_ctx:d} KV cache size') @step('{n_ctx:d} KV cache size')
def step_n_ctx(context, n_ctx): def step_n_ctx(context, n_ctx):
context.n_ctx = n_ctx context.n_ctx = n_ctx
@step(u'{n_slots:d} slots') @step('{n_slots:d} slots')
def step_n_slots(context, n_slots): def step_n_slots(context, n_slots):
context.n_slots = n_slots context.n_slots = n_slots
@step(u'{n_predict:d} server max tokens to predict') @step('{n_predict:d} server max tokens to predict')
def step_server_n_predict(context, n_predict): def step_server_n_predict(context, n_predict):
context.n_server_predict = n_predict context.n_server_predict = n_predict
@step(u'continuous batching') @step('continuous batching')
def step_server_continuous_batching(context): def step_server_continuous_batching(context):
context.server_continuous_batching = True context.server_continuous_batching = True
@step(u'embeddings extraction') @step('embeddings extraction')
def step_server_embeddings(context): def step_server_embeddings(context):
context.server_embeddings = True context.server_embeddings = True
@step(u'prometheus compatible metrics exposed') @step('prometheus compatible metrics exposed')
def step_server_metrics(context): def step_server_metrics(context):
context.server_metrics = True context.server_metrics = True
@step(u"the server is starting") @step("the server is starting")
def step_start_server(context): def step_start_server(context):
start_server_background(context) start_server_background(context)
attempts = 0 attempts = 0
max_attempts = 20
if 'GITHUB_ACTIONS' in os.environ:
max_attempts *= 2
while True: while True:
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
result = sock.connect_ex((context.server_fqdn, context.server_port)) result = sock.connect_ex((context.server_fqdn, context.server_port))
@ -125,13 +130,13 @@ def step_start_server(context):
print("\x1b[33;46mserver started!\x1b[0m") print("\x1b[33;46mserver started!\x1b[0m")
return return
attempts += 1 attempts += 1
if attempts > 20: if attempts > max_attempts:
assert False, "server not started" assert False, "server not started"
print(f"waiting for server to start, connect error code = {result}...") print(f"waiting for server to start, connect error code = {result}...")
time.sleep(0.1) time.sleep(0.1)
@step(u"the server is {expecting_status}") @step("the server is {expecting_status}")
@async_run_until_complete @async_run_until_complete
async def step_wait_for_the_server_to_be_started(context, expecting_status): async def step_wait_for_the_server_to_be_started(context, expecting_status):
match expecting_status: match expecting_status:
@ -160,7 +165,7 @@ async def step_wait_for_the_server_to_be_started(context, expecting_status):
assert False, "unknown status" assert False, "unknown status"
@step(u'all slots are {expected_slot_status_string}') @step('all slots are {expected_slot_status_string}')
@async_run_until_complete @async_run_until_complete
async def step_all_slots_status(context, expected_slot_status_string): async def step_all_slots_status(context, expected_slot_status_string):
match expected_slot_status_string: match expected_slot_status_string:
@ -176,7 +181,7 @@ async def step_all_slots_status(context, expected_slot_status_string):
await request_slots_status(context, expected_slots) await request_slots_status(context, expected_slots)
@step(u'a completion request with {api_error} api error') @step('a completion request with {api_error} api error')
@async_run_until_complete @async_run_until_complete
async def step_request_completion(context, api_error): async def step_request_completion(context, api_error):
expect_api_error = api_error == 'raised' expect_api_error = api_error == 'raised'
@ -194,133 +199,138 @@ async def step_request_completion(context, api_error):
assert completion == 401, f"completion must be an 401 status code: {completion}" assert completion == 401, f"completion must be an 401 status code: {completion}"
@step(u'{predicted_n:d} tokens are predicted matching {re_content}') @step('{predicted_n:d} tokens are predicted matching {re_content}')
def step_n_tokens_predicted_with_content(context, predicted_n, re_content): def step_n_tokens_predicted_with_content(context, predicted_n, re_content):
context.completion = context.tasks_result.pop() context.completion = context.tasks_result.pop()
assert_n_tokens_predicted(context.completion, predicted_n, re_content) assert_n_tokens_predicted(context.completion, predicted_n, re_content)
@step(u'{predicted_n:d} tokens are predicted') @step('{predicted_n:d} tokens are predicted')
def step_n_tokens_predicted(context, predicted_n): def step_n_tokens_predicted(context, predicted_n):
context.completion = context.tasks_result.pop() context.completion = context.tasks_result.pop()
assert_n_tokens_predicted(context.completion, predicted_n) assert_n_tokens_predicted(context.completion, predicted_n)
@step(u'the completion is truncated') @step('the completion is truncated')
def step_assert_completion_truncated(context): def step_assert_completion_truncated(context):
step_assert_completion_truncated(context, '') step_assert_completion_truncated(context, '')
@step(u'the completion is {truncated} truncated') @step('the completion is {truncated} truncated')
def step_assert_completion_truncated(context, truncated): def step_assert_completion_truncated(context, truncated):
truncated = truncated != "not" truncated = truncated != "not"
assert context.completion['truncated'] == truncated, f'{context.completion}' assert context.completion['truncated'] == truncated, f'{context.completion}'
@step(u'{n_prompt:d} prompt tokens are processed') @step('{n_prompt:d} prompt tokens are processed')
def step_impl(context, n_prompt): def step_impl(context, n_prompt):
assert n_prompt < 0 or n_prompt == context.completion['timings']['prompt_n'], f"n_prompt={context.completion['timings']['prompt_n']}" assert n_prompt < 0 or n_prompt == context.completion['timings']['prompt_n'], f"n_prompt={context.completion['timings']['prompt_n']}"
@step(u'a user prompt {user_prompt}') @step('a user prompt {user_prompt}')
def step_user_prompt(context, user_prompt): def step_user_prompt(context, user_prompt):
context.prompts.append(user_prompt) context.prompts.append(user_prompt)
context.n_prompts = len(context.prompts) context.n_prompts = len(context.prompts)
@step(u'a system prompt {system_prompt}') @step('a system prompt {system_prompt}')
def step_system_prompt(context, system_prompt): def step_system_prompt(context, system_prompt):
context.system_prompt = system_prompt context.system_prompt = system_prompt
@step(u'a model {model}') @step('a model {model}')
def step_model(context, model): def step_model(context, model):
context.model = model context.model = model
@step(u'{max_tokens:d} max tokens to predict') @step('{max_tokens:d} max tokens to predict')
def step_max_tokens(context, max_tokens): def step_max_tokens(context, max_tokens):
context.n_predict = max_tokens context.n_predict = max_tokens
@step(u'streaming is {enable_streaming}') @step('streaming is {enable_streaming}')
def step_streaming(context, enable_streaming): def step_streaming(context, enable_streaming):
context.enable_streaming = enable_streaming == 'enabled' context.enable_streaming = enable_streaming == 'enabled'
@step(u'a user api key {user_api_key}') @step('a user api key {user_api_key}')
def step_user_api_key(context, user_api_key): def step_user_api_key(context, user_api_key):
context.user_api_key = user_api_key context.user_api_key = user_api_key
@step(u'no user api key') @step('no user api key')
def step_no_user_api_key(context): def step_no_user_api_key(context):
context.user_api_key = None context.user_api_key = None
@step(u'a user api key ') @step('a user api key ')
def step_no_user_api_key_space(context): def step_no_user_api_key_space(context):
context.user_api_key = None context.user_api_key = None
@step(u'a server api key {server_api_key}') @step('a server api key {server_api_key}')
def step_server_api_key(context, server_api_key): def step_server_api_key(context, server_api_key):
context.server_api_key = server_api_key context.server_api_key = server_api_key
@step(u'{n_junk:d} as number of junk') @step('{n_junk:d} as number of junk')
def step_n_junk(context, n_junk): def step_n_junk(context, n_junk):
context.n_junk = n_junk context.n_junk = n_junk
@step(u'{n_batch:d} as batch size') @step('{n_batch:d} as batch size')
def step_n_batch(context, n_batch): def step_n_batch(context, n_batch):
context.n_batch = n_batch context.n_batch = n_batch
@step(u'{seed:d} as seed') @step('{n_ubatch:d} as ubatch size')
def step_n_ubatch(context, n_ubatch):
context.n_ubatch = n_ubatch
@step('{seed:d} as seed')
def step_seed(context, seed): def step_seed(context, seed):
context.seed = seed context.seed = seed
@step(u'a prefix prompt') @step('a prefix prompt')
def step_prompt_prefix(context): def step_prompt_prefix(context):
context.prompt_prefix = context.text context.prompt_prefix = context_text(context)
@step(u'a junk suffix prompt') @step('a junk suffix prompt')
def step_prompt_junk_suffix(context): def step_prompt_junk_suffix(context):
context.prompt_junk_suffix = context.text context.prompt_junk_suffix = context_text(context)
@step(u'a suffix prompt') @step('a suffix prompt')
def step_prompt_suffix(context): def step_prompt_suffix(context):
context.prompt_suffix = context.text context.prompt_suffix = context_text(context)
@step(u'{n_ga:d} group attention factor' @step('{n_ga:d} group attention factor'
u' to extend context size through self-extend') ' to extend context size through self-extend')
def step_impl(context, n_ga): def step_impl(context, n_ga):
context.n_ga = n_ga context.n_ga = n_ga
@step(u'{n_ga_w:d} group attention width to extend context size through self-extend') @step('{n_ga_w:d} group attention width to extend context size through self-extend')
def step_impl(context, n_ga_w): def step_impl(context, n_ga_w):
context.n_ga_w = n_ga_w context.n_ga_w = n_ga_w
@step(u'a passkey prompt template') @step('a passkey prompt template')
def step_prompt_passkey(context): def step_prompt_passkey(context):
context.prompt_passkey = context.text context.prompt_passkey = context_text(context)
@step(u'{n_prompts:d} fixed prompts') @step('{n_prompts:d} fixed prompts')
def step_fixed_prompts(context, n_prompts): def step_fixed_prompts(context, n_prompts):
context.prompts.extend([str(0)*(context.n_batch if context.n_batch is not None else 512) for i in range(n_prompts)]) context.prompts.extend([str(0)*(context.n_batch if context.n_batch is not None else 512) for i in range(n_prompts)])
context.n_prompts = n_prompts context.n_prompts = n_prompts
@step(u'a "{passkey}" passkey challenge prompt with the passkey inserted every {i_pos:d} junk') @step('a "{passkey}" passkey challenge prompt with the passkey inserted every {i_pos:d} junk')
def step_prompt_passkey(context, passkey, i_pos): def step_prompt_passkey(context, passkey, i_pos):
prompt = "" prompt = ""
for i in range(context.n_junk): for i in range(context.n_junk):
@ -334,7 +344,7 @@ def step_prompt_passkey(context, passkey, i_pos):
context.n_prompts = len(context.prompts) context.n_prompts = len(context.prompts)
@step(u'an OAI compatible chat completions request with {api_error} api error') @step('an OAI compatible chat completions request with {api_error} api error')
@async_run_until_complete @async_run_until_complete
async def step_oai_chat_completions(context, api_error): async def step_oai_chat_completions(context, api_error):
if context.debug: if context.debug:
@ -369,19 +379,19 @@ async def step_oai_chat_completions(context, api_error):
print(f"Completion response: {completion}") print(f"Completion response: {completion}")
@step(u'a prompt') @step('a prompt')
def step_a_prompt(context): def step_a_prompt(context):
context.prompts.append(context.text) context.prompts.append(context_text(context))
context.n_prompts = len(context.prompts) context.n_prompts = len(context.prompts)
@step(u'a prompt {prompt}') @step('a prompt {prompt}')
def step_a_prompt_prompt(context, prompt): def step_a_prompt_prompt(context, prompt):
context.prompts.append(prompt) context.prompts.append(prompt)
context.n_prompts = len(context.prompts) context.n_prompts = len(context.prompts)
@step(u'concurrent completion requests') @step('concurrent completion requests')
@async_run_until_complete() @async_run_until_complete()
async def step_concurrent_completion_requests(context): async def step_concurrent_completion_requests(context):
await concurrent_requests(context, await concurrent_requests(context,
@ -397,7 +407,7 @@ async def step_concurrent_completion_requests(context):
'user_api_key') else None) 'user_api_key') else None)
@step(u'concurrent OAI completions requests') @step('concurrent OAI completions requests')
@async_run_until_complete @async_run_until_complete
async def step_oai_chat_completions(context): async def step_oai_chat_completions(context):
await concurrent_requests(context, oai_chat_completions, await concurrent_requests(context, oai_chat_completions,
@ -417,7 +427,7 @@ async def step_oai_chat_completions(context):
if hasattr(context, 'user_api_key') else None) if hasattr(context, 'user_api_key') else None)
@step(u'concurrent OAI completions requests no v1') @step('concurrent OAI completions requests no v1')
@async_run_until_complete @async_run_until_complete
async def step_oai_chat_completions(context): async def step_oai_chat_completions(context):
await concurrent_requests(context, oai_chat_completions, await concurrent_requests(context, oai_chat_completions,
@ -440,13 +450,13 @@ async def step_oai_chat_completions(context):
if hasattr(context, 'user_api_key') else None) if hasattr(context, 'user_api_key') else None)
@step(u'all prompts are predicted') @step('all prompts are predicted')
@async_run_until_complete @async_run_until_complete
async def step_all_prompts_are_predicted(context): async def step_all_prompts_are_predicted(context):
await all_prompts_are_predicted(context) await all_prompts_are_predicted(context)
@step(u'all prompts are predicted with {n_expected_predicted:d} tokens') @step('all prompts are predicted with {n_expected_predicted:d} tokens')
@async_run_until_complete @async_run_until_complete
async def step_all_prompts_are_predicted_with_n_tokens(context, n_expected_predicted): async def step_all_prompts_are_predicted_with_n_tokens(context, n_expected_predicted):
await all_prompts_are_predicted(context, n_expected_predicted) await all_prompts_are_predicted(context, n_expected_predicted)
@ -460,14 +470,14 @@ async def all_prompts_are_predicted(context, expected_predicted_n=None):
assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests" assert len(context.concurrent_tasks) == 0, f"{len(context.concurrent_tasks)} pending requests"
@step(u'embeddings are computed for') @step('embeddings are computed for')
@async_run_until_complete @async_run_until_complete
async def step_compute_embedding(context): async def step_compute_embedding(context):
context.n_prompts = 1 context.n_prompts = 1
context.embeddings = await request_embedding(context.text, base_url=context.base_url) context.embeddings = await request_embedding(context_text(context), base_url=context.base_url)
@step(u'all embeddings are the same') @step('all embeddings are the same')
@async_run_until_complete @async_run_until_complete
async def step_all_embeddings_are_the_same(context): async def step_all_embeddings_are_the_same(context):
n_embedding_requests = await gather_tasks_results(context) n_embedding_requests = await gather_tasks_results(context)
@ -491,7 +501,8 @@ async def step_all_embeddings_are_the_same(context):
print(f"{msg}\n") print(f"{msg}\n")
assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg assert np.isclose(similarity, 1.0, rtol=1e-05, atol=1e-08, equal_nan=False), msg
@step(u'embeddings are generated')
@step('embeddings are generated')
def step_assert_embeddings(context): def step_assert_embeddings(context):
assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n" assert context.n_prompts == len(context.embeddings), (f"unexpected response:\n"
f"context.n_prompts={context.n_prompts}\n" f"context.n_prompts={context.n_prompts}\n"
@ -500,17 +511,17 @@ def step_assert_embeddings(context):
assert_embeddings(embedding) assert_embeddings(embedding)
@step(u'an OAI compatible embeddings computation request for') @step('an OAI compatible embeddings computation request for')
@async_run_until_complete @async_run_until_complete
async def step_oai_compute_embeddings(context): async def step_oai_compute_embeddings(context):
context.n_prompts = 1 context.n_prompts = 1
context.embeddings = await request_oai_embeddings(context.text, context.embeddings = await request_oai_embeddings(context_text(context),
base_url=context.base_url, base_url=context.base_url,
user_api_key=context.user_api_key, user_api_key=context.user_api_key,
model=context.model) model=context.model)
@step(u'an OAI compatible embeddings computation request for multiple inputs') @step('an OAI compatible embeddings computation request for multiple inputs')
@async_run_until_complete @async_run_until_complete
async def step_oai_compute_embeddings_multiple_inputs(context): async def step_oai_compute_embeddings_multiple_inputs(context):
context.embeddings = await request_oai_embeddings(context.prompts, context.embeddings = await request_oai_embeddings(context.prompts,
@ -520,7 +531,7 @@ async def step_oai_compute_embeddings_multiple_inputs(context):
context.prompts.clear() context.prompts.clear()
@step(u'concurrent embedding requests') @step('concurrent embedding requests')
@async_run_until_complete() @async_run_until_complete()
async def step_concurrent_embedding_requests(context): async def step_concurrent_embedding_requests(context):
await concurrent_requests(context, await concurrent_requests(context,
@ -529,7 +540,7 @@ async def step_concurrent_embedding_requests(context):
base_url=context.base_url) base_url=context.base_url)
@step(u'concurrent OAI embedding requests') @step('concurrent OAI embedding requests')
@async_run_until_complete() @async_run_until_complete()
async def step_concurrent_oai_embedding_requests(context): async def step_concurrent_oai_embedding_requests(context):
await concurrent_requests(context, await concurrent_requests(context,
@ -540,7 +551,7 @@ async def step_concurrent_oai_embedding_requests(context):
model=context.model) model=context.model)
@step(u'all embeddings are generated') @step('all embeddings are generated')
@async_run_until_complete() @async_run_until_complete()
async def all_embeddings_are_generated(context): async def all_embeddings_are_generated(context):
n_embedding_requests = await gather_tasks_results(context) n_embedding_requests = await gather_tasks_results(context)
@ -549,10 +560,10 @@ async def all_embeddings_are_generated(context):
assert_embeddings(context.tasks_result.pop().pop()) assert_embeddings(context.tasks_result.pop().pop())
@step(u'tokenizing') @step('tokenizing')
@async_run_until_complete @async_run_until_complete
async def step_tokenize(context): async def step_tokenize(context):
context.tokenized_text = context.text context.tokenized_text = context_text(context)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
async with session.post(f'{context.base_url}/tokenize', async with session.post(f'{context.base_url}/tokenize',
json={ json={
@ -563,7 +574,7 @@ async def step_tokenize(context):
context.tokens = tokenize_json['tokens'] context.tokens = tokenize_json['tokens']
@step(u'tokens can be detokenize') @step('tokens can be detokenize')
@async_run_until_complete @async_run_until_complete
async def step_detokenize(context): async def step_detokenize(context):
assert len(context.tokens) > 0 assert len(context.tokens) > 0
@ -578,7 +589,7 @@ async def step_detokenize(context):
assert context.tokenized_text == detokenize_json['content'].strip() assert context.tokenized_text == detokenize_json['content'].strip()
@step(u'an OPTIONS request is sent from {origin}') @step('an OPTIONS request is sent from {origin}')
@async_run_until_complete @async_run_until_complete
async def step_options_request(context, origin): async def step_options_request(context, origin):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
@ -589,12 +600,12 @@ async def step_options_request(context, origin):
context.options_response = response context.options_response = response
@step(u'CORS header {cors_header} is set to {cors_header_value}') @step('CORS header {cors_header} is set to {cors_header_value}')
def step_check_options_header_value(context, cors_header, cors_header_value): def step_check_options_header_value(context, cors_header, cors_header_value):
assert context.options_response.headers[cors_header] == cors_header_value assert context.options_response.headers[cors_header] == cors_header_value
@step(u'prometheus metrics are exposed') @step('prometheus metrics are exposed')
@async_run_until_complete @async_run_until_complete
async def step_prometheus_metrics_exported(context): async def step_prometheus_metrics_exported(context):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
@ -616,14 +627,14 @@ async def step_prometheus_metrics_exported(context):
assert metric_exported, "No metrics exported" assert metric_exported, "No metrics exported"
@step(u'metric {metric_name} is {metric_value:d}') @step('metric {metric_name} is {metric_value:d}')
def step_assert_metric_value(context, metric_name, metric_value): def step_assert_metric_value(context, metric_name, metric_value):
if metric_name not in context.metrics: if metric_name not in context.metrics:
assert False, f"no metric {metric_name} in {context.metrics.keys()}" assert False, f"no metric {metric_name} in {context.metrics.keys()}"
assert context.metrics[metric_name].samples[0].value == metric_value, f"metric: {context.metrics[metric_name]}" assert context.metrics[metric_name].samples[0].value == metric_value, f"metric: {context.metrics[metric_name]}"
@step(u'available models') @step('available models')
def step_available_models(context): def step_available_models(context):
# openai client always expects an api_key # openai client always expects an api_key
openai.api_key = context.user_api_key if context.user_api_key is not None else 'nope' openai.api_key = context.user_api_key if context.user_api_key is not None else 'nope'
@ -631,14 +642,14 @@ def step_available_models(context):
context.models = openai.Model.list().data context.models = openai.Model.list().data
@step(u'{n_model:d} models are supported') @step('{n_model:d} models are supported')
def step_supported_models(context, n_model): def step_supported_models(context, n_model):
if context.debug: if context.debug:
print("server models available:", context.models) print("server models available:", context.models)
assert len(context.models) == n_model assert len(context.models) == n_model
@step(u'model {i_model:d} is {param} {preposition} {param_value}') @step('model {i_model:d} is {param} {preposition} {param_value}')
def step_supported_models(context, i_model, param, preposition, param_value): def step_supported_models(context, i_model, param, preposition, param_value):
assert i_model < len(context.models) assert i_model < len(context.models)
model = context.models[i_model] model = context.models[i_model]
@ -800,7 +811,7 @@ async def oai_chat_completions(user_prompt,
stream=enable_streaming, stream=enable_streaming,
seed=seed seed=seed
) )
except openai.error.APIError as e: except openai.error.AuthenticationError as e:
if expect_api_error is not None and expect_api_error: if expect_api_error is not None and expect_api_error:
return 401 return 401
else: else:
@ -936,6 +947,9 @@ async def wait_for_health_status(context,
print(f"Starting checking for health for expected_health_status={expected_health_status}\n") print(f"Starting checking for health for expected_health_status={expected_health_status}\n")
interval = 0.5 interval = 0.5
counter = 0 counter = 0
if 'GITHUB_ACTIONS' in os.environ:
timeout *= 2
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
while True: while True:
async with await session.get(f'{base_url}/health', params=params) as health_response: async with await session.get(f'{base_url}/health', params=params) as health_response:
@ -1007,17 +1021,29 @@ async def completions_seed(context):
else context.server_seed if hasattr(context, 'server_seed') else None else context.server_seed if hasattr(context, 'server_seed') else None
def context_text(context):
return context.text.replace('\r', '')
def start_server_background(context): def start_server_background(context):
if os.name == 'nt':
context.server_path = '../../../build/bin/Release/server.exe'
else:
context.server_path = '../../../build/bin/server' context.server_path = '../../../build/bin/server'
if 'LLAMA_SERVER_BIN_PATH' in os.environ: if 'LLAMA_SERVER_BIN_PATH' in os.environ:
context.server_path = os.environ['LLAMA_SERVER_BIN_PATH'] context.server_path = os.environ['LLAMA_SERVER_BIN_PATH']
server_listen_addr = context.server_fqdn
if os.name == 'nt':
server_listen_addr = '0.0.0.0'
server_args = [ server_args = [
'--host', context.server_fqdn, '--host', server_listen_addr,
'--port', context.server_port, '--port', context.server_port,
'--model', context.model_file '--model', context.model_file
] ]
if context.n_batch: if context.n_batch:
server_args.extend(['--batch-size', context.n_batch]) server_args.extend(['--batch-size', context.n_batch])
if context.n_ubatch:
server_args.extend(['--ubatch-size', context.n_ubatch])
if context.n_gpu_layer: if context.n_gpu_layer:
server_args.extend(['--n-gpu-layers', context.n_gpu_layer]) server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
if context.server_continuous_batching: if context.server_continuous_batching:
@ -1045,7 +1071,16 @@ def start_server_background(context):
if 'SERVER_LOG_FORMAT_JSON' not in os.environ: if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
server_args.extend(['--log-format', "text"]) server_args.extend(['--log-format', "text"])
print(f"starting server with: {context.server_path} {server_args}\n") print(f"starting server with: {context.server_path} {server_args}\n")
flags = 0
if 'nt' == os.name:
flags |= subprocess.DETACHED_PROCESS
flags |= subprocess.CREATE_NEW_PROCESS_GROUP
flags |= subprocess.CREATE_NO_WINDOW
pkwargs = {
'creationflags': flags,
}
context.server_process = subprocess.Popen( context.server_process = subprocess.Popen(
[str(arg) for arg in [context.server_path, *server_args]], [str(arg) for arg in [context.server_path, *server_args]],
close_fds=True) **pkwargs)
print(f"server pid={context.server_process.pid}") print(f"server pid={context.server_process.pid}, behave pid={os.getpid()}")

View file

@ -14,6 +14,17 @@
using json = nlohmann::json; using json = nlohmann::json;
// https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
enum error_type {
ERROR_TYPE_INVALID_REQUEST,
ERROR_TYPE_AUTHENTICATION,
ERROR_TYPE_SERVER,
ERROR_TYPE_NOT_FOUND,
ERROR_TYPE_PERMISSION,
ERROR_TYPE_UNAVAILABLE, // custom error
ERROR_TYPE_NOT_SUPPORTED, // custom error
};
extern bool server_verbose; extern bool server_verbose;
extern bool server_log_json; extern bool server_log_json;
@ -386,7 +397,7 @@ static json oaicompat_completion_params_parse(
return llama_params; return llama_params;
} }
static json format_final_response_oaicompat(const json & request, json result, bool streaming = false) { static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
bool stopped_word = result.count("stopped_word") != 0; bool stopped_word = result.count("stopped_word") != 0;
bool stopped_eos = json_value(result, "stopped_eos", false); bool stopped_eos = json_value(result, "stopped_eos", false);
int num_tokens_predicted = json_value(result, "tokens_predicted", 0); int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
@ -420,7 +431,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
{"prompt_tokens", num_prompt_tokens}, {"prompt_tokens", num_prompt_tokens},
{"total_tokens", num_tokens_predicted + num_prompt_tokens} {"total_tokens", num_tokens_predicted + num_prompt_tokens}
}}, }},
{"id", gen_chatcmplid()} {"id", completion_id}
}; };
if (server_verbose) { if (server_verbose) {
@ -435,7 +446,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
} }
// return value is vector as there is one case where we might need to generate two responses // return value is vector as there is one case where we might need to generate two responses
static std::vector<json> format_partial_response_oaicompat(json result) { static std::vector<json> format_partial_response_oaicompat(json result, const std::string & completion_id) {
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) { if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
return std::vector<json>({result}); return std::vector<json>({result});
} }
@ -479,7 +490,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
{"role", "assistant"} {"role", "assistant"}
}}}})}, }}}})},
{"created", t}, {"created", t},
{"id", gen_chatcmplid()}, {"id", completion_id},
{"model", modelname}, {"model", modelname},
{"object", "chat.completion.chunk"}}; {"object", "chat.completion.chunk"}};
@ -490,7 +501,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
{"content", content}}} {"content", content}}}
}})}, }})},
{"created", t}, {"created", t},
{"id", gen_chatcmplid()}, {"id", completion_id},
{"model", modelname}, {"model", modelname},
{"object", "chat.completion.chunk"}}; {"object", "chat.completion.chunk"}};
@ -517,7 +528,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
json ret = json { json ret = json {
{"choices", choices}, {"choices", choices},
{"created", t}, {"created", t},
{"id", gen_chatcmplid()}, {"id", completion_id},
{"model", modelname}, {"model", modelname},
{"object", "chat.completion.chunk"} {"object", "chat.completion.chunk"}
}; };
@ -526,6 +537,16 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
} }
static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) { static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
json data = json::array();
int i = 0;
for (auto & elem : embeddings) {
data.push_back(json{
{"embedding", json_value(elem, "embedding", json::array())},
{"index", i++},
{"object", "embedding"}
});
}
json res = json { json res = json {
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))}, {"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
{"object", "list"}, {"object", "list"},
@ -533,7 +554,7 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
{"prompt_tokens", 0}, {"prompt_tokens", 0},
{"total_tokens", 0} {"total_tokens", 0}
}}, }},
{"data", embeddings} {"data", data}
}; };
return res; return res;
@ -550,3 +571,43 @@ static json format_detokenized_response(const std::string & content) {
{"content", content} {"content", content}
}; };
} }
static json format_error_response(const std::string & message, const enum error_type type) {
std::string type_str;
int code = 500;
switch (type) {
case ERROR_TYPE_INVALID_REQUEST:
type_str = "invalid_request_error";
code = 400;
break;
case ERROR_TYPE_AUTHENTICATION:
type_str = "authentication_error";
code = 401;
break;
case ERROR_TYPE_NOT_FOUND:
type_str = "not_found_error";
code = 404;
break;
case ERROR_TYPE_SERVER:
type_str = "server_error";
code = 500;
break;
case ERROR_TYPE_PERMISSION:
type_str = "permission_error";
code = 403;
break;
case ERROR_TYPE_NOT_SUPPORTED:
type_str = "not_supported_error";
code = 501;
break;
case ERROR_TYPE_UNAVAILABLE:
type_str = "unavailable_error";
code = 503;
break;
}
return json {
{"code", code},
{"message", message},
{"type", type_str},
};
}

View file

@ -711,6 +711,7 @@ static bool load_checkpoint_file(const char * filename, struct my_llama_model *
load_checkpoint_gguf(fctx, f_ggml_ctx, model, train); load_checkpoint_gguf(fctx, f_ggml_ctx, model, train);
gguf_free(fctx);
return true; return true;
} }

View file

@ -61,7 +61,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
} }
} }
// TODO: GGML_PAD ?
static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) { static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) {
assert(alignment && !(alignment & (alignment - 1))); // power of 2 assert(alignment && !(alignment & (alignment - 1))); // power of 2
size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment; size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment;
@ -69,25 +68,14 @@ static size_t aligned_offset(const void * buffer, size_t offset, size_t alignmen
} }
// tallocr // tallocr
struct ggml_tallocr {
ggml_backend_buffer_t buffer;
void * base;
size_t alignment;
size_t offset;
};
ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) {
ggml_tallocr_t talloc = malloc(sizeof(struct ggml_tallocr));
if (talloc == NULL) {
return NULL;
}
struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
void * base = ggml_backend_buffer_get_base(buffer); void * base = ggml_backend_buffer_get_base(buffer);
size_t align = ggml_backend_buffer_get_alignment(buffer); size_t align = ggml_backend_buffer_get_alignment(buffer);
assert(align && !(align & (align - 1))); // power of 2 assert(align && !(align & (align - 1))); // power of 2
*talloc = (struct ggml_tallocr) { struct ggml_tallocr talloc = (struct ggml_tallocr) {
/*.buffer = */ buffer, /*.buffer = */ buffer,
/*.base = */ base, /*.base = */ base,
/*.alignment = */ align, /*.alignment = */ align,
@ -96,11 +84,7 @@ ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) {
return talloc; return talloc;
} }
void ggml_tallocr_free(ggml_tallocr_t talloc) { void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
free(talloc);
}
void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor) {
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor); size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
size = GGML_PAD(size, talloc->alignment); size = GGML_PAD(size, talloc->alignment);
@ -354,12 +338,16 @@ struct hash_node {
bool allocated; bool allocated;
}; };
//
struct tensor_alloc { struct tensor_alloc {
size_t offset; size_t offset;
size_t size_max; // 0 = pre-allocated, unused, or view size_t size_max; // 0 = pre-allocated, unused, or view
}; };
struct leaf_alloc {
int buffer_id;
struct tensor_alloc leaf;
};
struct node_alloc { struct node_alloc {
int buffer_id; int buffer_id;
struct tensor_alloc dst; struct tensor_alloc dst;
@ -378,7 +366,7 @@ struct ggml_gallocr {
struct node_alloc * node_allocs; // [n_nodes] struct node_alloc * node_allocs; // [n_nodes]
int n_nodes; int n_nodes;
struct tensor_alloc * leaf_allocs; // [n_leafs] struct leaf_alloc * leaf_allocs; // [n_leafs]
int n_leafs; int n_leafs;
}; };
@ -543,13 +531,20 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
return node_buffer_ids ? node_buffer_ids[i] : 0; return node_buffer_ids ? node_buffer_ids[i] : 0;
} }
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) { static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
// clear hash tables // clear hash tables
memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *)); memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node)); memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
// allocate leafs
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
ggml_gallocr_allocate_node(galloc, leaf, get_node_buffer_id(leaf_buffer_ids, i));
}
// count number of children and views // count number of children and views
// allocate all graph inputs and leafs first to avoid overwriting them // allocate other graph inputs and leafs first to avoid overwriting them
for (int i = 0; i < graph->n_nodes; i++) { for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
@ -577,19 +572,6 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
} }
} }
// allocate the remaining leafs that are unused on the graph
// these are effectively static tensors that the application is not using in the graph, but may still want to allocate for other purposes
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
if (hn->n_children == 0) {
assert(!hn->allocated);
// since buffer ids are only given for nodes, these leafs are always allocated in the first buffer
ggml_gallocr_allocate_node(galloc, leaf, 0);
}
}
// allocate tensors // allocate tensors
for (int i = 0; i < graph->n_nodes; i++) { for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
@ -652,7 +634,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
} }
} }
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) { bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
size_t hash_size = graph->visited_hash_table.size; size_t hash_size = graph->visited_hash_table.size;
// initialize hash table // initialize hash table
@ -676,7 +658,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
} }
// allocate in hash table // allocate in hash table
ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids); ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids);
// set the node_allocs from the hash table // set the node_allocs from the hash table
if (galloc->n_nodes < graph->n_nodes) { if (galloc->n_nodes < graph->n_nodes) {
@ -711,15 +693,16 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
} }
if (galloc->n_leafs < graph->n_leafs) { if (galloc->n_leafs < graph->n_leafs) {
free(galloc->leaf_allocs); free(galloc->leaf_allocs);
galloc->leaf_allocs = calloc(sizeof(struct tensor_alloc), graph->n_leafs); galloc->leaf_allocs = calloc(sizeof(galloc->leaf_allocs[0]), graph->n_leafs);
GGML_ASSERT(galloc->leaf_allocs != NULL); GGML_ASSERT(galloc->leaf_allocs != NULL);
} }
galloc->n_leafs = graph->n_leafs; galloc->n_leafs = graph->n_leafs;
for (int i = 0; i < graph->n_leafs; i++) { for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i]; struct ggml_tensor * leaf = graph->leafs[i];
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf); struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
galloc->leaf_allocs[i].offset = hn->offset; galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
galloc->leaf_allocs[i].size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf); galloc->leaf_allocs[i].leaf.offset = hn->offset;
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
} }
// reallocate buffers if needed // reallocate buffers if needed
@ -727,7 +710,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
size_t cur_size = galloc->buffers[i] ? ggml_backend_buffer_get_size(galloc->buffers[i]) : 0; size_t cur_size = galloc->buffers[i] ? ggml_backend_buffer_get_size(galloc->buffers[i]) : 0;
size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]); size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);
if (new_size > cur_size) { // even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
if (new_size > cur_size || galloc->buffers[i] == NULL) {
#ifndef NDEBUG #ifndef NDEBUG
fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
#endif #endif
@ -744,30 +728,30 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
} }
bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) { bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
return ggml_gallocr_reserve_n(galloc, graph, NULL); return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
} }
static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id, struct tensor_alloc * tensor_alloc) { static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, int buffer_id, struct tensor_alloc * tensor_alloc) {
assert(node->data || node->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max); assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
if (node->view_src != NULL) { if (tensor->view_src != NULL) {
if (node->buffer == NULL) { if (tensor->buffer == NULL) {
assert(tensor_alloc->offset == SIZE_MAX); assert(tensor_alloc->offset == SIZE_MAX);
if (node->view_src->buffer == NULL) { if (tensor->view_src->buffer == NULL) {
// this tensor was allocated without ggml-backend // this tensor was allocated without ggml-backend
return; return;
} }
ggml_backend_view_init(galloc->buffers[buffer_id], node); ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
} }
} else { } else {
if (node->data == NULL) { if (tensor->data == NULL) {
assert(tensor_alloc->offset != SIZE_MAX); assert(tensor_alloc->offset != SIZE_MAX);
assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max); assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]); void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]);
void * addr = (char *)base + tensor_alloc->offset; void * addr = (char *)base + tensor_alloc->offset;
ggml_backend_tensor_alloc(galloc->buffers[buffer_id], node, addr); ggml_backend_tensor_alloc(galloc->buffers[buffer_id], tensor, addr);
} else { } else {
if (node->buffer == NULL) { if (tensor->buffer == NULL) {
// this tensor was allocated without ggml-backend // this tensor was allocated without ggml-backend
return; return;
} }
@ -843,13 +827,18 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
// reset buffers // reset buffers
for (int i = 0; i < galloc->n_buffers; i++) { for (int i = 0; i < galloc->n_buffers; i++) {
// zero size buffers are not allocated
if (galloc->buffers[i] != NULL) { if (galloc->buffers[i] != NULL) {
ggml_backend_buffer_reset(galloc->buffers[i]); ggml_backend_buffer_reset(galloc->buffers[i]);
} }
} }
// allocate the graph tensors from the previous assignments // allocate the graph tensors from the previous assignments
// leafs
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
struct leaf_alloc * leaf_alloc = &galloc->leaf_allocs[i];
ggml_gallocr_init_tensor(galloc, leaf, leaf_alloc->buffer_id, &leaf_alloc->leaf);
}
// nodes // nodes
for (int i = 0; i < graph->n_nodes; i++) { for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
@ -863,12 +852,6 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
} }
ggml_gallocr_init_tensor(galloc, node, node_alloc->buffer_id, &node_alloc->dst); ggml_gallocr_init_tensor(galloc, node, node_alloc->buffer_id, &node_alloc->dst);
} }
// leafs
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
struct tensor_alloc * leaf_alloc = &galloc->leaf_allocs[i];
ggml_gallocr_init_tensor(galloc, leaf, 0, leaf_alloc);
}
return true; return true;
} }
@ -900,12 +883,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
return false; return false;
} }
struct ggml_tallocr * tallocr = ggml_tallocr_new(buffer); struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) { for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
if (t->data == NULL) { if (t->data == NULL) {
if (t->view_src == NULL) { if (t->view_src == NULL) {
ggml_tallocr_alloc(tallocr, t); ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) { } else if (t->buffer == NULL) {
ggml_backend_view_init(buffer, t); ggml_backend_view_init(buffer, t);
} }
@ -917,8 +900,6 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
} }
} }
ggml_tallocr_free(tallocr);
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1)); *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
(*buffers)[(*n_buffers)++] = buffer; (*buffers)[(*n_buffers)++] = buffer;

View file

@ -11,11 +11,15 @@ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
typedef struct ggml_backend * ggml_backend_t; typedef struct ggml_backend * ggml_backend_t;
// Tensor allocator // Tensor allocator
typedef struct ggml_tallocr * ggml_tallocr_t; struct ggml_tallocr {
ggml_backend_buffer_t buffer;
void * base;
size_t alignment;
size_t offset;
};
GGML_API ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer); GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
GGML_API void ggml_tallocr_free(ggml_tallocr_t talloc); GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
GGML_API void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor);
// Graph allocator // Graph allocator
/* /*
@ -50,7 +54,11 @@ GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed // not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
// returns false if the buffer allocation failed // returns false if the buffer allocation failed
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph); GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
GGML_API bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids); GGML_API bool ggml_gallocr_reserve_n(
ggml_gallocr_t galloc,
struct ggml_cgraph * graph,
const int * node_buffer_ids,
const int * leaf_buffer_ids);
// automatic reallocation if the topology changes when using a single buffer // automatic reallocation if the topology changes when using a single buffer
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers) // returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)

View file

@ -86,12 +86,12 @@ extern "C" {
// (optional) asynchronous tensor data access // (optional) asynchronous tensor data access
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * src, struct ggml_tensor * dst); bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
// (optional) complete all pending operations // (optional) complete all pending operations
void (*GGML_CALL synchronize)(ggml_backend_t backend); void (*GGML_CALL synchronize)(ggml_backend_t backend);
// create a plan for ggml_cgraph and free it // compute graph with a plan (not used currently)
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph); ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
@ -102,16 +102,27 @@ extern "C" {
// check if the backend supports an operation // check if the backend supports an operation
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
// (optional) event synchronization
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
void (*GGML_CALL event_free) (ggml_backend_event_t event);
void (*GGML_CALL event_record) (ggml_backend_event_t event);
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
}; };
struct ggml_backend { struct ggml_backend {
ggml_guid_t guid; ggml_guid_t guid;
struct ggml_backend_i iface; struct ggml_backend_i iface;
ggml_backend_context_t context; ggml_backend_context_t context;
}; };
struct ggml_backend_event {
ggml_backend_t backend;
void * context;
};
// //
// Backend registry // Backend registry
// //

View file

@ -221,29 +221,29 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
GGML_CALL void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_CALL void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(buf != NULL && "tensor buffer not set"); GGML_ASSERT(buf != NULL && "tensor buffer not set");
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
if (!size) { if (!size) {
return; return;
} }
tensor->buffer->iface.set_tensor(buf, tensor, data, offset, size); buf->iface.set_tensor(buf, tensor, data, offset, size);
} }
GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer; ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
GGML_ASSERT(buf != NULL && "tensor buffer not set");
GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(tensor->buffer != NULL && "tensor buffer not set");
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
if (!size) { if (!size) {
return; return;
} }
tensor->buffer->iface.get_tensor(buf, tensor, data, offset, size); buf->iface.get_tensor(buf, tensor, data, offset, size);
} }
void ggml_backend_synchronize(ggml_backend_t backend) { void ggml_backend_synchronize(ggml_backend_t backend) {
@ -255,18 +255,30 @@ void ggml_backend_synchronize(ggml_backend_t backend) {
} }
ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) { ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
GGML_ASSERT(backend->iface.graph_plan_create != NULL);
return backend->iface.graph_plan_create(backend, cgraph); return backend->iface.graph_plan_create(backend, cgraph);
} }
void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
GGML_ASSERT(backend->iface.graph_plan_free != NULL);
backend->iface.graph_plan_free(backend, plan); backend->iface.graph_plan_free(backend, plan);
} }
enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) { enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
GGML_ASSERT(backend->iface.graph_plan_compute != NULL);
return backend->iface.graph_plan_compute(backend, plan); return backend->iface.graph_plan_compute(backend, plan);
} }
enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
enum ggml_status err = ggml_backend_graph_compute_async(backend, cgraph);
ggml_backend_synchronize(backend);
return err;
}
bool ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
return backend->iface.graph_compute(backend, cgraph); return backend->iface.graph_compute(backend, cgraph);
} }
@ -314,34 +326,68 @@ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst
} }
} }
void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) { void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst) {
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts"); GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
if (src == dst) { if (src == dst) {
return; return;
} }
if (ggml_backend_buft_supports_backend(src->buffer->buft, backend) && ggml_backend_buft_supports_backend(dst->buffer->buft, backend)) { if (backend_dst->iface.cpy_tensor_async != NULL) {
if (backend->iface.cpy_tensor_async != NULL) { if (backend_dst->iface.cpy_tensor_async(backend_src, backend_dst, src, dst)) {
if (backend->iface.cpy_tensor_async(backend, src, dst)) {
return; return;
} }
} }
}
size_t nbytes = ggml_nbytes(src); // an async copy would normally happen after all the queued operations on both backends are completed
// sync src, set_async dst
if (ggml_backend_buffer_is_host(src->buffer)) { if (ggml_backend_buffer_is_host(src->buffer)) {
ggml_backend_tensor_set_async(backend, dst, src->data, 0, nbytes); ggml_backend_synchronize(backend_src);
} ggml_backend_tensor_set_async(backend_dst, dst, src->data, 0, ggml_nbytes(src));
else { } else {
ggml_backend_synchronize(backend_src);
ggml_backend_tensor_copy(src, dst); ggml_backend_tensor_copy(src, dst);
ggml_backend_synchronize(backend_dst);
} }
} }
// events
ggml_backend_event_t ggml_backend_event_new(ggml_backend_t backend) {
if (backend->iface.event_new == NULL) {
return NULL;
}
return backend->iface.event_new(backend);
}
void ggml_backend_event_free(ggml_backend_event_t event) {
if (event == NULL) {
return;
}
event->backend->iface.event_free(event);
}
void ggml_backend_event_record(ggml_backend_event_t event) {
GGML_ASSERT(event->backend->iface.event_record != NULL);
event->backend->iface.event_record(event);
}
void ggml_backend_event_synchronize(ggml_backend_event_t event) {
GGML_ASSERT(event->backend->iface.event_synchronize != NULL);
event->backend->iface.event_synchronize(event);
}
void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
GGML_ASSERT(backend->iface.event_wait != NULL);
backend->iface.event_wait(backend, event);
}
// backend registry // backend registry
#define GGML_MAX_BACKENDS_REG 16 #define GGML_REG_MAX_BACKENDS 16
struct ggml_backend_reg { struct ggml_backend_reg {
char name[128]; char name[128];
@ -350,7 +396,7 @@ struct ggml_backend_reg {
void * user_data; void * user_data;
}; };
static struct ggml_backend_reg ggml_backend_registry[GGML_MAX_BACKENDS_REG]; static struct ggml_backend_reg ggml_backend_registry[GGML_REG_MAX_BACKENDS];
static size_t ggml_backend_registry_count = 0; static size_t ggml_backend_registry_count = 0;
GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, void * user_data); GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, void * user_data);
@ -395,7 +441,7 @@ GGML_CALL static void ggml_backend_registry_init(void) {
} }
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) { GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG); GGML_ASSERT(ggml_backend_registry_count < GGML_REG_MAX_BACKENDS);
size_t id = ggml_backend_registry_count; size_t id = ggml_backend_registry_count;
@ -746,8 +792,12 @@ GGML_CALL static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads); struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
if (cpu_ctx->work_size < cplan.work_size) { if (cpu_ctx->work_size < cplan.work_size) {
// TODO: may be faster to free and use malloc to avoid the copy free(cpu_ctx->work_data);
cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size); cpu_ctx->work_data = malloc(cplan.work_size);
if (cpu_ctx->work_data == NULL) {
cpu_ctx->work_size = 0;
return GGML_STATUS_ALLOC_FAILED;
}
cpu_ctx->work_size = cplan.work_size; cpu_ctx->work_size = cplan.work_size;
} }
cplan.work_data = cpu_ctx->work_data; cplan.work_data = cpu_ctx->work_data;
@ -784,6 +834,11 @@ static struct ggml_backend_i cpu_backend_i = {
/* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute, /* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
/* .graph_compute = */ ggml_backend_cpu_graph_compute, /* .graph_compute = */ ggml_backend_cpu_graph_compute,
/* .supports_op = */ ggml_backend_cpu_supports_op, /* .supports_op = */ ggml_backend_cpu_supports_op,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .event_synchronize = */ NULL,
}; };
static ggml_guid_t ggml_backend_cpu_guid(void) { static ggml_guid_t ggml_backend_cpu_guid(void) {
@ -939,15 +994,27 @@ static bool ggml_is_view_op(enum ggml_op op) {
// scheduler // scheduler
#define GGML_MAX_BACKENDS 16 #ifndef GGML_SCHED_MAX_BACKENDS
#define GGML_MAX_SPLITS 256 #define GGML_SCHED_MAX_BACKENDS 16
#define GGML_MAX_SPLIT_INPUTS 16 #endif
#ifndef GGML_SCHED_MAX_SPLITS
#define GGML_SCHED_MAX_SPLITS 256
#endif
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
#define GGML_SCHED_MAX_SPLIT_INPUTS 16
#endif
#ifndef GGML_SCHED_MAX_COPIES
#define GGML_SCHED_MAX_COPIES 4
#endif
struct ggml_backend_sched_split { struct ggml_backend_sched_split {
int backend_id; int backend_id;
int i_start; int i_start;
int i_end; int i_end;
struct ggml_tensor * inputs[GGML_MAX_SPLIT_INPUTS]; struct ggml_tensor * inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
int n_inputs; int n_inputs;
// graph view of this split // graph view of this split
struct ggml_cgraph graph; struct ggml_cgraph graph;
@ -955,45 +1022,53 @@ struct ggml_backend_sched_split {
struct ggml_backend_sched { struct ggml_backend_sched {
bool is_reset; // true if the scheduler has been reset since the last graph split bool is_reset; // true if the scheduler has been reset since the last graph split
bool is_alloc;
int n_backends; int n_backends;
ggml_backend_t backends[GGML_MAX_BACKENDS];
ggml_backend_buffer_type_t bufts[GGML_MAX_BACKENDS];
ggml_backend_t backends[GGML_SCHED_MAX_BACKENDS];
ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
ggml_gallocr_t galloc; ggml_gallocr_t galloc;
// hash keys of the nodes in the graph // hash keys of the nodes in the graph
struct ggml_hash_set hash_set; struct ggml_hash_set hash_set;
// hash values // hash values
int * tensor_backend_id; int * tensor_backend_id;
struct ggml_tensor * (* tensor_copies)[GGML_MAX_BACKENDS]; struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
int * node_backend_ids; // [n_nodes] int * node_backend_ids; // [graph_size]
int n_nodes; int * leaf_backend_ids; // [graph_size]
// copy of the graph with modified inputs // copy of the graph with modified inputs
struct ggml_cgraph * graph; struct ggml_cgraph * graph;
struct ggml_backend_sched_split splits[GGML_MAX_SPLITS]; // graph splits
struct ggml_backend_sched_split splits[GGML_SCHED_MAX_SPLITS];
int n_splits; int n_splits;
// pipeline parallelism support
int n_copies;
int cur_copy;
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
int n_graph_inputs;
struct ggml_context * ctx; struct ggml_context * ctx;
ggml_backend_sched_eval_callback callback_eval; ggml_backend_sched_eval_callback callback_eval;
void * callback_eval_user_data; void * callback_eval_user_data;
// align context_buffer to GGML_MEM_ALIGN // align context_buffer to GGML_MEM_ALIGN
#ifdef _MSC_VER #ifdef _MSC_VER
__declspec(align(GGML_MEM_ALIGN)) __declspec(align(GGML_MEM_ALIGN))
#else #else
__attribute__((aligned(GGML_MEM_ALIGN))) __attribute__((aligned(GGML_MEM_ALIGN)))
#endif #endif
char context_buffer[GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)]; char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
}; };
#define hash_id(node) ggml_hash_find_or_insert(sched->hash_set, node) #define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor)
#define tensor_backend_id(node) sched->tensor_backend_id[hash_id(node)] #define tensor_backend_id(tensor) sched->tensor_backend_id[hash_id(tensor)]
#define tensor_backend(node) (tensor_backend_id(node) == -1 ? NULL : sched->backends[tensor_backend_id(node)])
// returns the priority of the backend, lower id is higher priority // returns the priority of the backend, lower id is higher priority
static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) { static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
@ -1005,7 +1080,8 @@ static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backen
return -1; return -1;
} }
static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, ggml_backend_buffer_t buffer) { static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor) {
ggml_backend_buffer_t buffer = tensor->buffer;
if (buffer == NULL) { if (buffer == NULL) {
return -1; return -1;
} }
@ -1016,12 +1092,16 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, gg
return i; return i;
} }
} }
GGML_ASSERT(false && "tensor buffer type not supported by any backend");
return -1; // silence warning fprintf(stderr, "%s: error: no backend supports buffer type %s used in tensor %s\n",
__func__, ggml_backend_buffer_name(buffer), tensor->name);
GGML_ASSERT(false);
return -1;
} }
#if 0 #if 0
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS][128]; // debug only static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__) #define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
#define GET_CAUSE(node) causes[hash_id(node)] #define GET_CAUSE(node) causes[hash_id(node)]
#else #else
@ -1035,19 +1115,28 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
// assign pre-allocated nodes to their backend // assign pre-allocated nodes to their backend
// dst // dst
int cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->buffer); int cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor);
if (cur_backend != -1) { if (cur_backend != -1) {
SET_CAUSE(node, "1.dst"); SET_CAUSE(tensor, "1.dst");
return cur_backend; return cur_backend;
} }
// view_src // view_src
if (tensor->view_src != NULL) { if (tensor->view_src != NULL) {
cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src->buffer); cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src);
if (cur_backend != -1) { if (cur_backend != -1) {
SET_CAUSE(node, "1.vsrc"); SET_CAUSE(tensor, "1.vsrc");
return cur_backend; return cur_backend;
} }
} }
// input
if (tensor->flags & GGML_TENSOR_FLAG_INPUT) {
cur_backend = sched->n_backends - 1; // last backend (assumed CPU)
SET_CAUSE(tensor, "1.inp");
return cur_backend;
}
// assign nodes that use weights to the backend of the weights // assign nodes that use weights to the backend of the weights
for (int i = 0; i < GGML_MAX_SRC; i++) { for (int i = 0; i < GGML_MAX_SRC; i++) {
const struct ggml_tensor * src = tensor->src[i]; const struct ggml_tensor * src = tensor->src[i];
@ -1055,9 +1144,9 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
continue; continue;
} }
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) { if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
int src_backend = ggml_backend_sched_backend_from_buffer(sched, src->buffer); int src_backend = ggml_backend_sched_backend_from_buffer(sched, src);
// operations with weights are always run on the same backend as the weights // operations with weights are always run on the same backend as the weights
SET_CAUSE(node, "1.wgt%d", i); SET_CAUSE(tensor, "1.wgt%d", i);
return src_backend; return src_backend;
} }
} }
@ -1093,7 +1182,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
if (ggml_is_view_op(node->op)) { if (ggml_is_view_op(node->op)) {
continue; continue;
} }
ggml_backend_t tensor_backend = tensor_backend(node); ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
fprintf(stderr, "node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name, fprintf(stderr, "node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)); fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node));
for (int j = 0; j < GGML_MAX_SRC; j++) { for (int j = 0; j < GGML_MAX_SRC; j++) {
@ -1101,7 +1190,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
if (src == NULL) { if (src == NULL) {
continue; continue;
} }
ggml_backend_t src_backend = tensor_backend(src); ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src);
fprintf(stderr, " %20.20s (%5.5s) [%5.5s %8.8s]", src->name, fprintf(stderr, " %20.20s (%5.5s) [%5.5s %8.8s]", src->name,
fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src)); fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src));
} }
@ -1118,6 +1207,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
// reset splits // reset splits
sched->n_splits = 0; sched->n_splits = 0;
sched->n_graph_inputs = 0;
sched->is_reset = false; sched->is_reset = false;
struct ggml_init_params params = { struct ggml_init_params params = {
@ -1163,7 +1253,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
} }
} }
#ifdef DEBUG_PASS1 #ifdef DEBUG_PASS1
fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
#endif #endif
// pass 2: expand current backend assignments // pass 2: expand current backend assignments
@ -1171,28 +1261,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
// expand gpu backends (i.e. non last prio) up and down, ignoring cpu (the lowest priority backend) // expand gpu backends (i.e. non last prio) up and down, ignoring cpu (the lowest priority backend)
// thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops // thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops
// pass 2.1 expand gpu up
{
int cur_backend_id = -1;
for (int i = graph->n_nodes - 1; i >= 0; i--) {
struct ggml_tensor * node = graph->nodes[i];
if (ggml_is_view_op(node->op)) {
continue;
}
int tensor_backend_id = tensor_backend_id(node);
if (tensor_backend_id != -1) {
if (tensor_backend_id == sched->n_backends - 1) {
// skip cpu (lowest prio backend)
cur_backend_id = -1;
} else {
cur_backend_id = tensor_backend_id;
}
} else {
tensor_backend_id(node) = cur_backend_id;
SET_CAUSE(node, "2.1");
}
}
}
// pass 2.2 expand gpu down // pass 2.2 expand gpu down
{ {
@ -1217,7 +1285,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
} }
} }
// pass 2.3 expand rest up // pass 2.1 expand gpu up
{ {
int cur_backend_id = -1; int cur_backend_id = -1;
for (int i = graph->n_nodes - 1; i >= 0; i--) { for (int i = graph->n_nodes - 1; i >= 0; i--) {
@ -1227,14 +1295,20 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
} }
int tensor_backend_id = tensor_backend_id(node); int tensor_backend_id = tensor_backend_id(node);
if (tensor_backend_id != -1) { if (tensor_backend_id != -1) {
if (tensor_backend_id == sched->n_backends - 1) {
// skip cpu (lowest prio backend)
cur_backend_id = -1;
} else {
cur_backend_id = tensor_backend_id; cur_backend_id = tensor_backend_id;
}
} else { } else {
tensor_backend_id(node) = cur_backend_id; tensor_backend_id(node) = cur_backend_id;
SET_CAUSE(node, "2.3"); SET_CAUSE(node, "2.1");
} }
} }
} }
// pass 2.4 expand rest down // pass 2.4 expand rest down
{ {
int cur_backend_id = -1; int cur_backend_id = -1;
@ -1252,8 +1326,26 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
} }
} }
} }
// pass 2.3 expand rest up
{
int cur_backend_id = -1;
for (int i = graph->n_nodes - 1; i >= 0; i--) {
struct ggml_tensor * node = graph->nodes[i];
if (ggml_is_view_op(node->op)) {
continue;
}
int tensor_backend_id = tensor_backend_id(node);
if (tensor_backend_id != -1) {
cur_backend_id = tensor_backend_id;
} else {
tensor_backend_id(node) = cur_backend_id;
SET_CAUSE(node, "2.3");
}
}
}
#ifdef DEBUG_PASS2 #ifdef DEBUG_PASS2
fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
#endif #endif
// pass 3: assign backends to remaining src from dst and view_src // pass 3: assign backends to remaining src from dst and view_src
@ -1283,7 +1375,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
} }
} }
#ifdef DEBUG_PASS3 #ifdef DEBUG_PASS3
fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
#endif #endif
// pass 4: split graph, find tensors that need to be copied // pass 4: split graph, find tensors that need to be copied
@ -1315,7 +1407,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
if (tensor_backend_id != cur_backend_id) { if (tensor_backend_id != cur_backend_id) {
sched->splits[cur_split].i_end = i; sched->splits[cur_split].i_end = i;
cur_split++; cur_split++;
GGML_ASSERT(cur_split < GGML_MAX_SPLITS); GGML_ASSERT(cur_split < GGML_SCHED_MAX_SPLITS);
sched->splits[cur_split].backend_id = tensor_backend_id; sched->splits[cur_split].backend_id = tensor_backend_id;
sched->splits[cur_split].i_start = i; sched->splits[cur_split].i_start = i;
sched->splits[cur_split].n_inputs = 0; sched->splits[cur_split].n_inputs = 0;
@ -1328,25 +1420,57 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
if (src == NULL) { if (src == NULL) {
continue; continue;
} }
int src_backend_id = tensor_backend_id(src); int src_backend_id = tensor_backend_id(src);
assert(src_backend_id != -1); // all inputs should be assigned by now assert(src_backend_id != -1); // all inputs should be assigned by now
if (src->flags & GGML_TENSOR_FLAG_INPUT) {
size_t id = hash_id(src);
if (sched->tensor_copies[id][src_backend_id][0] == NULL) {
ggml_backend_t backend = sched->backends[src_backend_id];
for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * tensor_copy;
if (c == sched->cur_copy) {
tensor_copy = src; // use the original tensor as the current copy
} else {
tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
}
if (sched->n_copies > 1) {
ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
}
sched->tensor_copies[id][src_backend_id][c] = tensor_copy;
tensor_backend_id(tensor_copy) = src_backend_id;
SET_CAUSE(tensor_copy, "4.cpy");
}
int n_graph_inputs = sched->n_graph_inputs++;
GGML_ASSERT(n_graph_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
sched->graph_inputs[n_graph_inputs] = src;
}
}
if (src_backend_id != tensor_backend_id) { if (src_backend_id != tensor_backend_id) {
// create a copy of the input in the split's backend // create a copy of the input in the split's backend
size_t id = hash_id(src); size_t id = hash_id(src);
if (sched->tensor_copies[id][cur_backend_id] == NULL) { if (sched->tensor_copies[id][cur_backend_id][0] == NULL) {
ggml_backend_t backend = sched->backends[cur_backend_id]; ggml_backend_t backend = sched->backends[cur_backend_id];
for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src); struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
ggml_format_name(tensor_copy, "%s#%s", ggml_backend_name(backend), src->name); ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
if (sched->n_copies > 1) {
sched->tensor_copies[id][cur_backend_id] = tensor_copy; ggml_set_input(tensor_copy);
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
}
sched->tensor_copies[id][cur_backend_id][c] = tensor_copy;
tensor_backend_id(tensor_copy) = cur_backend_id; tensor_backend_id(tensor_copy) = cur_backend_id;
SET_CAUSE(tensor_copy, "4.cpy"); SET_CAUSE(tensor_copy, "4.cpy");
}
int n_inputs = sched->splits[cur_split].n_inputs++; int n_inputs = sched->splits[cur_split].n_inputs++;
GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS); GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
sched->splits[cur_split].inputs[n_inputs] = src; sched->splits[cur_split].inputs[n_inputs] = src;
} }
node->src[j] = sched->tensor_copies[id][cur_backend_id]; node->src[j] = sched->tensor_copies[id][cur_backend_id][sched->cur_copy];
} }
} }
} }
@ -1354,37 +1478,39 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
sched->n_splits = cur_split + 1; sched->n_splits = cur_split + 1;
} }
#ifdef DEBUG_PASS4 #ifdef DEBUG_PASS4
fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
#endif #endif
#ifndef NDEBUG #ifndef NDEBUG
// sanity check: all sources should have the same backend as the node // sanity check: all sources should have the same backend as the node
for (int i = 0; i < graph->n_nodes; i++) { for (int i = 0; i < graph->n_nodes; i++) {
struct ggml_tensor * node = graph->nodes[i]; struct ggml_tensor * node = graph->nodes[i];
ggml_backend_t tensor_backend = tensor_backend(node); ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
if (tensor_backend == NULL) { if (tensor_backend == NULL) {
fprintf(stderr, "!!!!!!! %s has no backend\n", node->name); fprintf(stderr, "!!!!!!! %s has no backend\n", node->name);
} }
if (node->view_src != NULL && tensor_backend != tensor_backend(node->view_src)) { if (node->view_src != NULL && tensor_backend != ggml_backend_sched_get_tensor_backend(sched, node->view_src)) {
fprintf(stderr, "!!!!!!! %s has backend %s, view_src %s has backend %s\n", fprintf(stderr, "!!!!!!! %s has backend %s, view_src %s has backend %s\n",
node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL",
node->view_src->name, tensor_backend(node->view_src) ? ggml_backend_name(tensor_backend(node->view_src)) : "NULL"); node->view_src->name, ggml_backend_sched_get_tensor_backend(sched, node->view_src) ?
ggml_backend_name(ggml_backend_sched_get_tensor_backend(sched, node->view_src)) : "NULL");
} }
for (int j = 0; j < GGML_MAX_SRC; j++) { for (int j = 0; j < GGML_MAX_SRC; j++) {
struct ggml_tensor * src = node->src[j]; struct ggml_tensor * src = node->src[j];
if (src == NULL) { if (src == NULL) {
continue; continue;
} }
ggml_backend_t src_backend = tensor_backend(src); ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src);
if (src_backend != tensor_backend /* && src_backend != NULL */) { if (src_backend != tensor_backend /* && src_backend != NULL */) {
fprintf(stderr, "!!!! %s has backend %s, src %d (%s) has backend %s\n", fprintf(stderr, "!!!! %s has backend %s, src %d (%s) has backend %s\n",
node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL",
j, src->name, src_backend ? ggml_backend_name(src_backend) : "NULL"); j, src->name, src_backend ? ggml_backend_name(src_backend) : "NULL");
} }
if (src->view_src != NULL && src_backend != tensor_backend(src->view_src)) { if (src->view_src != NULL && src_backend != ggml_backend_sched_get_tensor_backend(sched, src->view_src)) {
fprintf(stderr, "!!!!!!! [src] %s has backend %s, view_src %s has backend %s\n", fprintf(stderr, "!!!!!!! [src] %s has backend %s, view_src %s has backend %s\n",
src->name, src_backend ? ggml_backend_name(src_backend) : "NULL", src->name, src_backend ? ggml_backend_name(src_backend) : "NULL",
src->view_src->name, tensor_backend(src->view_src) ? ggml_backend_name(tensor_backend(src->view_src)) : "NULL"); src->view_src->name, ggml_backend_sched_get_tensor_backend(sched, src->view_src) ?
ggml_backend_name(ggml_backend_sched_get_tensor_backend(sched, src->view_src)) : "NULL");
} }
} }
} }
@ -1392,18 +1518,20 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
#endif #endif
// create copies of the graph for each split // create copies of the graph for each split
// FIXME: avoid this copy, pass split inputs to ggml_gallocr_alloc_graph_n in some other way // TODO: avoid this copy
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_MAX_SPLIT_INPUTS, false); struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS, false);
for (int i = 0; i < sched->n_splits; i++) { for (int i = 0; i < sched->n_splits; i++) {
struct ggml_backend_sched_split * split = &sched->splits[i]; struct ggml_backend_sched_split * split = &sched->splits[i];
split->graph = ggml_graph_view(graph, split->i_start, split->i_end); split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
// add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
for (int j = 0; j < split->n_inputs; j++) { for (int j = 0; j < split->n_inputs; j++) {
struct ggml_tensor * input = split->inputs[j]; struct ggml_tensor * input = split->inputs[j];
struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split->backend_id]; struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split->backend_id][sched->cur_copy];
// add a dependency to the input source so that it is not freed before the copy is done // add a dependency to the input source so that it is not freed before the copy is done
struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input); struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
input_dep->src[0] = input;
sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(input); sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(input);
graph_copy->nodes[graph_copy->n_nodes++] = input_dep; graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
@ -1417,18 +1545,56 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j]; graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j];
} }
} }
if (sched->n_copies > 1) {
// add input copies as leafs so that they are allocated first
for (int i = 0; i < sched->n_graph_inputs; i++) {
struct ggml_tensor * input = sched->graph_inputs[i];
size_t id = hash_id(input);
int backend_id = tensor_backend_id(input);
for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
}
}
for (int i = 0; i < sched->n_splits; i++) {
struct ggml_backend_sched_split * split = &sched->splits[i];
int backend_id = split->backend_id;
for (int j = 0; j < split->n_inputs; j++) {
struct ggml_tensor * input = split->inputs[j];
size_t id = hash_id(input);
for (int c = 0; c < sched->n_copies; c++) {
struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
}
}
}
}
// add leafs from the original graph
for (int i = 0; i < graph->n_leafs; i++) {
struct ggml_tensor * leaf = graph->leafs[i];
sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
}
sched->graph = graph_copy; sched->graph = graph_copy;
} }
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
// ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids); // allocate graph
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
// the re-allocation may cause the split inputs to be moved to a different address
ggml_backend_sched_synchronize(sched);
#ifndef NDEBUG #ifndef NDEBUG
fprintf(stderr, "ggml_backend_sched: failed to allocate graph, reserving\n"); fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__);
#endif #endif
ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids); ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) { if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
fprintf(stderr, "ggml_backend_sched: failed to allocate graph\n"); fprintf(stderr, "%s: failed to allocate graph\n", __func__);
return false; return false;
} }
} }
@ -1437,9 +1603,6 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
} }
static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) { static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
uint64_t copy_us[GGML_MAX_BACKENDS] = {0};
uint64_t compute_us[GGML_MAX_BACKENDS] = {0};
struct ggml_backend_sched_split * splits = sched->splits; struct ggml_backend_sched_split * splits = sched->splits;
for (int i = 0; i < sched->n_splits; i++) { for (int i = 0; i < sched->n_splits; i++) {
@ -1448,34 +1611,36 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
ggml_backend_t split_backend = sched->backends[split_backend_id]; ggml_backend_t split_backend = sched->backends[split_backend_id];
// copy the input tensors to the split backend // copy the input tensors to the split backend
uint64_t copy_start_us = ggml_time_us();
for (int j = 0; j < split->n_inputs; j++) { for (int j = 0; j < split->n_inputs; j++) {
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
struct ggml_tensor * input = split->inputs[j]; struct ggml_tensor * input = split->inputs[j];
struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id]; struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id][sched->cur_copy];
GGML_ASSERT(input->buffer != NULL); if (input->flags & GGML_TENSOR_FLAG_INPUT) {
GGML_ASSERT(input_cpy->buffer != NULL); // inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
ggml_backend_tensor_copy_async(split_backend, input, input_cpy); ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
} else {
ggml_backend_synchronize(split_backend);
}
ggml_backend_tensor_copy(input, input_cpy);
} else {
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
ggml_backend_event_wait(split_backend, sched->events[split_backend_id][sched->cur_copy]);
} else {
ggml_backend_synchronize(split_backend);
ggml_backend_synchronize(input_backend);
} }
//ggml_backend_synchronize(split_backend); // necessary to measure copy time
int64_t copy_end_us = ggml_time_us();
copy_us[split_backend_id] += copy_end_us - copy_start_us;
#if 0 ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy);
char split_filename[GGML_MAX_NAME]; }
snprintf(split_filename, GGML_MAX_NAME, "split_%i_%s.dot", i, ggml_backend_name(split_backend)); }
ggml_graph_dump_dot(split->graph, NULL, split_filename);
#endif
uint64_t compute_start_us = ggml_time_us();
if (!sched->callback_eval) { if (!sched->callback_eval) {
enum ggml_status ec = ggml_backend_graph_compute(split_backend, &split->graph); enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph);
if (ec != GGML_STATUS_SUCCESS) { if (ec != GGML_STATUS_SUCCESS) {
return ec; return ec;
} }
//ggml_backend_synchronize(split_backend); // necessary to measure compute time
} else { } else {
// similar to ggml_backend_compare_graph_backend // similar to ggml_backend_compare_graph_backend
for (int j0 = 0; j0 < split->graph.n_nodes; j0++) { for (int j0 = 0; j0 < split->graph.n_nodes; j0++) {
@ -1494,11 +1659,14 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1); struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
enum ggml_status ec = ggml_backend_graph_compute(split_backend, &gv); enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &gv);
if (ec != GGML_STATUS_SUCCESS) { if (ec != GGML_STATUS_SUCCESS) {
return ec; return ec;
} }
// TODO: pass backend to the callback, then the user can decide if they want to synchronize
ggml_backend_synchronize(split_backend);
if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) { if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
break; break;
} }
@ -1506,39 +1674,54 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
j0 = j1; j0 = j1;
} }
} }
uint64_t compute_end_us = ggml_time_us();
compute_us[split_backend_id] += compute_end_us - compute_start_us; // record the event of this copy
if (split->n_inputs > 0) {
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
ggml_backend_event_record(sched->events[split_backend_id][sched->cur_copy]);
}
}
} }
#if 0 sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
// per-backend timings
fprintf(stderr, "sched_compute_splits times (%d splits):\n", sched->n_splits);
for (int i = 0; i < sched->n_backends; i++) {
if (copy_us[i] > 0 || compute_us[i] > 0) {
fprintf(stderr, "\t%5.5s: %lu us copy, %lu us compute\n", ggml_backend_name(sched->backends[i]), copy_us[i], compute_us[i]);
}
}
#endif
return GGML_STATUS_SUCCESS; return GGML_STATUS_SUCCESS;
} }
ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) { ggml_backend_sched_t ggml_backend_sched_new(
ggml_backend_t * backends,
ggml_backend_buffer_type_t * bufts,
int n_backends,
size_t graph_size,
bool parallel) {
GGML_ASSERT(n_backends > 0); GGML_ASSERT(n_backends > 0);
GGML_ASSERT(n_backends <= GGML_MAX_BACKENDS); GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
GGML_ASSERT(ggml_backend_is_cpu(backends[n_backends - 1])); // last backend must be CPU
struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1); struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1);
// initialize hash table // initialize hash table
sched->hash_set = ggml_hash_set_new(graph_size + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS); sched->hash_set = ggml_hash_set_new(graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS);
sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size); sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size);
sched->tensor_copies = calloc(sizeof(sched->tensor_copies[0]), sched->hash_set.size); sched->tensor_copies = calloc(sizeof(sched->tensor_copies[0]), sched->hash_set.size);
sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), graph_size); sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), graph_size);
sched->leaf_backend_ids = calloc(sizeof(sched->leaf_backend_ids[0]), graph_size);
sched->n_backends = n_backends; sched->n_backends = n_backends;
for (int i = 0; i < n_backends; i++) {
sched->backends[i] = backends[i]; sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
sched->bufts[i] = bufts ? bufts[i] : ggml_backend_get_default_buffer_type(backends[i]);
GGML_ASSERT(sched->n_copies <= GGML_SCHED_MAX_COPIES);
for (int b = 0; b < n_backends; b++) {
sched->backends[b] = backends[b];
sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]);
GGML_ASSERT(ggml_backend_buft_supports_backend(sched->bufts[b], backends[b]));
if (sched->n_copies > 1) {
for (int c = 0; c < sched->n_copies; c++) {
sched->events[b][c] = ggml_backend_event_new(backends[b]);
}
}
} }
sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends); sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
@ -1552,12 +1735,18 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
if (sched == NULL) { if (sched == NULL) {
return; return;
} }
for (int b = 0; b < sched->n_backends; b++) {
for (int c = 0; c < sched->n_copies; c++) {
ggml_backend_event_free(sched->events[b][c]);
}
}
ggml_gallocr_free(sched->galloc); ggml_gallocr_free(sched->galloc);
ggml_free(sched->ctx); ggml_free(sched->ctx);
free(sched->hash_set.keys); free(sched->hash_set.keys);
free(sched->tensor_backend_id); free(sched->tensor_backend_id);
free(sched->tensor_copies); free(sched->tensor_copies);
free(sched->node_backend_ids); free(sched->node_backend_ids);
free(sched->leaf_backend_ids);
free(sched); free(sched);
} }
@ -1569,34 +1758,63 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size); memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
sched->is_reset = true; sched->is_reset = true;
sched->is_alloc = false;
} }
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) { bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
ggml_backend_sched_split_graph(sched, measure_graph); ggml_backend_sched_split_graph(sched, measure_graph);
if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids)) { // TODO: extract this to a separate function
if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
return false; return false;
} }
ggml_backend_sched_reset(sched); ggml_backend_sched_reset(sched);
ggml_backend_sched_synchronize(sched);
return true;
}
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS);
ggml_backend_sched_split_graph(sched, graph);
if (!ggml_backend_sched_alloc_splits(sched)) {
return false;
}
sched->is_alloc = true;
return true; return true;
} }
enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) { enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS); enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph);
ggml_backend_sched_synchronize(sched);
return err;
}
if (!sched->is_reset) { enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
if (!sched->is_reset && !sched->is_alloc) {
ggml_backend_sched_reset(sched); ggml_backend_sched_reset(sched);
} }
ggml_backend_sched_split_graph(sched, graph); if (!sched->is_alloc) {
if (!ggml_backend_sched_alloc_splits(sched)) { if (!ggml_backend_sched_alloc_graph(sched, graph)) {
return GGML_STATUS_ALLOC_FAILED; return GGML_STATUS_ALLOC_FAILED;
} }
}
return ggml_backend_sched_compute_splits(sched); return ggml_backend_sched_compute_splits(sched);
} }
void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
for (int i = 0; i < sched->n_backends; i++) {
ggml_backend_synchronize(sched->backends[i]);
}
}
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) { void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
sched->callback_eval = callback; sched->callback_eval = callback;
sched->callback_eval_user_data = user_data; sched->callback_eval_user_data = user_data;
@ -1606,19 +1824,24 @@ int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) {
return sched->n_splits; return sched->n_splits;
} }
int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
return sched->n_copies;
}
size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) { size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
int backend_index = ggml_backend_sched_backend_id(sched, backend); int backend_index = ggml_backend_sched_backend_id(sched, backend);
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
return ggml_gallocr_get_buffer_size(sched->galloc, backend_index); return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
} }
void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) { void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
int backend_index = ggml_backend_sched_backend_id(sched, backend); int backend_index = ggml_backend_sched_backend_id(sched, backend);
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends); GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
tensor_backend_id(node) = backend_index; tensor_backend_id(node) = backend_index;
} }
ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) { ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
int backend_index = tensor_backend_id(node); int backend_index = tensor_backend_id(node);
if (backend_index == -1) { if (backend_index == -1) {
return NULL; return NULL;

View file

@ -9,6 +9,7 @@ extern "C" {
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t; typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
typedef struct ggml_backend_buffer * ggml_backend_buffer_t; typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
typedef struct ggml_backend_event * ggml_backend_event_t;
typedef struct ggml_backend * ggml_backend_t; typedef struct ggml_backend * ggml_backend_t;
typedef void * ggml_backend_graph_plan_t; typedef void * ggml_backend_graph_plan_t;
@ -72,11 +73,24 @@ extern "C" {
GGML_API enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan); GGML_API enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph); GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op); GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
// tensor copy between different backends // tensor copy between different backends
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst); GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy
// asynchronous copy
// the copy is performed after all the currently queued operations in backend_src
// backend_dst will wait for the copy to complete before performing other operations
// automatic fallback to sync copy if async is not supported
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
// events
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event); // wait async on event
// //
// CPU backend // CPU backend
@ -123,27 +137,31 @@ extern "C" {
/* /*
Example usage: Example usage:
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, num_backends); // operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be asigned
// sched is initialized with measure allocators and cannot be used until allocated with a measure graph // preferrably to run on the same backend as the buffer
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
// initialize buffers from a measure graph sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
measure_graph = build_graph(sched); // use the allocr to allocate inputs as needed
// initialize buffers from a max size graph (optional)
reserve_graph = build_graph(sched, max_batch_size);
// in build_graph:
build_graph(...) {
// manually assign nodes to a backend (optional, should not be needed in most cases) // manually assign nodes to a backend (optional, should not be needed in most cases)
struct ggml_tensor * node = ggml_mul_mat(ctx, ...); struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
ggml_backend_sched_set_node_backend(sched, node, backend_gpu); ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
}
// allocate backend buffers from measure graph ggml_backend_sched_reserve(sched, reserve_graph);
ggml_backend_sched_init_measure(sched, measure_graph);
// the scheduler is now ready to compute graphs
// compute // compute
graph = build_graph(sched); graph = build_graph(sched);
ggml_backend_sched_graph_compute(sched, graph); ggml_backend_sched_graph_compute(sched, graph);
// if there are graph inputs:
ggml_backend_sched_reset(sched);
ggml_backend_sched_alloc_graph(sched, graph);
ggml_backend_tensor_set(input_tensor, ...);
ggml_backend_sched_graph_compute(sched, graph);
}
*/ */
struct ggml_backend_sched; struct ggml_backend_sched;
@ -158,20 +176,26 @@ extern "C" {
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data); typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
// Initialize a backend scheduler // Initialize a backend scheduler
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size); GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched); GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
// Initialize backend buffers from a measure graph // Initialize backend buffers from a measure graph
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph); GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
// Get the number of splits of the last graph // Get the number of splits of the last graph
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched); GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend); GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend); GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node); GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
// Allocate and compute graph on the backend scheduler // Allocate and compute graph on the backend scheduler
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph); GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
// Reset all assignments and allocators - must be called before changing the node backends // Reset all assignments and allocators - must be called before changing the node backends
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched); GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);

File diff suppressed because it is too large Load diff

View file

@ -2,7 +2,13 @@
#include "ggml.h" #include "ggml.h"
#include "ggml-backend-impl.h" #include "ggml-backend-impl.h"
#if defined(GGML_USE_HIPBLAS)
#define GGML_COMMON_DECL_HIP
#define GGML_COMMON_IMPL_HIP
#else
#define GGML_COMMON_DECL_CUDA
#define GGML_COMMON_IMPL_CUDA #define GGML_COMMON_IMPL_CUDA
#endif
#include "ggml-common.h" #include "ggml-common.h"
#include <algorithm> #include <algorithm>
@ -66,6 +72,7 @@
#define cudaEventCreateWithFlags hipEventCreateWithFlags #define cudaEventCreateWithFlags hipEventCreateWithFlags
#define cudaEventDisableTiming hipEventDisableTiming #define cudaEventDisableTiming hipEventDisableTiming
#define cudaEventRecord hipEventRecord #define cudaEventRecord hipEventRecord
#define cudaEventSynchronize hipEventSynchronize
#define cudaEvent_t hipEvent_t #define cudaEvent_t hipEvent_t
#define cudaEventDestroy hipEventDestroy #define cudaEventDestroy hipEventDestroy
#define cudaFree hipFree #define cudaFree hipFree
@ -75,6 +82,7 @@
#define cudaGetDeviceProperties hipGetDeviceProperties #define cudaGetDeviceProperties hipGetDeviceProperties
#define cudaGetErrorString hipGetErrorString #define cudaGetErrorString hipGetErrorString
#define cudaGetLastError hipGetLastError #define cudaGetLastError hipGetLastError
#define cudaLaunchHostFunc hipLaunchHostFunc
#ifdef GGML_HIP_UMA #ifdef GGML_HIP_UMA
#define cudaMalloc hipMallocManaged #define cudaMalloc hipMallocManaged
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size) #define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size)
@ -98,6 +106,7 @@
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags #define cudaStreamCreateWithFlags hipStreamCreateWithFlags
#define cudaStreamFireAndForget hipStreamFireAndForget #define cudaStreamFireAndForget hipStreamFireAndForget
#define cudaStreamNonBlocking hipStreamNonBlocking #define cudaStreamNonBlocking hipStreamNonBlocking
#define cudaStreamPerThread hipStreamPerThread
#define cudaStreamSynchronize hipStreamSynchronize #define cudaStreamSynchronize hipStreamSynchronize
#define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags) #define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
#define cudaStream_t hipStream_t #define cudaStream_t hipStream_t
@ -359,66 +368,6 @@ typedef void (*ggml_cuda_op_flatten_t)(
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream); const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream);
// QK = number of values after dequantization
// QR = QK / number of values before dequantization
// QI = number of 32 bit integers before dequantization
#define QK4_0 32
#define QR4_0 2
#define QI4_0 (QK4_0 / (4 * QR4_0))
typedef struct {
half d; // delta
uint8_t qs[QK4_0 / 2]; // nibbles / quants
} block_q4_0;
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
#define QK4_1 32
#define QR4_1 2
#define QI4_1 (QK4_1 / (4 * QR4_1))
typedef struct {
half2 dm; // dm.x = delta, dm.y = min
uint8_t qs[QK4_1 / 2]; // nibbles / quants
} block_q4_1;
static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding");
#define QK5_0 32
#define QR5_0 2
#define QI5_0 (QK5_0 / (4 * QR5_0))
typedef struct {
half d; // delta
uint8_t qh[4]; // 5-th bit of quants
uint8_t qs[QK5_0 / 2]; // nibbles / quants
} block_q5_0;
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
#define QK5_1 32
#define QR5_1 2
#define QI5_1 (QK5_1 / (4 * QR5_1))
typedef struct {
half2 dm; // dm.x = delta, dm.y = min
uint8_t qh[4]; // 5-th bit of quants
uint8_t qs[QK5_1 / 2]; // nibbles / quants
} block_q5_1;
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
#define QK8_0 32
#define QR8_0 1
#define QI8_0 (QK8_0 / (4 * QR8_0))
typedef struct {
half d; // delta
int8_t qs[QK8_0]; // quants
} block_q8_0;
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
#define QK8_1 32
#define QR8_1 1
#define QI8_1 (QK8_1 / (4 * QR8_1))
typedef struct {
half2 ds; // ds.x = delta, ds.y = sum
int8_t qs[QK8_0]; // quants
} block_q8_1;
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_fp16_t) + QK8_0, "wrong q8_1 block size/padding");
typedef float (*vec_dot_q_cuda_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs); typedef float (*vec_dot_q_cuda_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs);
typedef void (*allocate_tiles_cuda_t)(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc); typedef void (*allocate_tiles_cuda_t)(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc);
typedef void (*load_tiles_cuda_t)( typedef void (*load_tiles_cuda_t)(
@ -428,174 +377,6 @@ typedef float (*vec_dot_q_mul_mat_cuda_t)(
const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc, const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
const int * __restrict__ y_qs, const half2 * __restrict__ y_ms, const int & i, const int & j, const int & k); const int * __restrict__ y_qs, const half2 * __restrict__ y_ms, const int & i, const int & j, const int & k);
//================================= k-quants
#ifdef GGML_QKK_64
#define QK_K 64
#define K_SCALE_SIZE 4
#else
#define QK_K 256
#define K_SCALE_SIZE 12
#endif
#define QR2_K 4
#define QI2_K (QK_K / (4*QR2_K))
typedef struct {
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
uint8_t qs[QK_K/4]; // quants
half2 dm; // super-block scale for quantized scales/mins
} block_q2_K;
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
#define QR3_K 4
#define QI3_K (QK_K / (4*QR3_K))
typedef struct {
uint8_t hmask[QK_K/8]; // quants - high bit
uint8_t qs[QK_K/4]; // quants - low 2 bits
#ifdef GGML_QKK_64
uint8_t scales[2]; // scales, quantized with 8 bits
#else
uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
#endif
half d; // super-block scale
} block_q3_K;
//static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + K_SCALE_SIZE, "wrong q3_K block size/padding");
#define QR4_K 2
#define QI4_K (QK_K / (4*QR4_K))
#ifdef GGML_QKK_64
typedef struct {
half dm[2]; // super-block scales/mins
uint8_t scales[2]; // 4-bit block scales/mins
uint8_t qs[QK_K/2]; // 4--bit quants
} block_q4_K;
static_assert(sizeof(block_q4_K) == sizeof(half2) + QK_K/2 + 2, "wrong q4_K block size/padding");
#else
typedef struct {
half2 dm; // super-block scale for quantized scales/mins
uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits
uint8_t qs[QK_K/2]; // 4--bit quants
} block_q4_K;
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding");
#endif
#define QR5_K 2
#define QI5_K (QK_K / (4*QR5_K))
#ifdef GGML_QKK_64
typedef struct {
half d; // super-block scale
int8_t scales[QK_K/16]; // block scales
uint8_t qh[QK_K/8]; // quants, high bit
uint8_t qs[QK_K/2]; // quants, low 4 bits
} block_q5_K;
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
#else
typedef struct {
half2 dm; // super-block scale for quantized scales/mins
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
uint8_t qh[QK_K/8]; // quants, high bit
uint8_t qs[QK_K/2]; // quants, low 4 bits
} block_q5_K;
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
#endif
#define QR6_K 2
#define QI6_K (QK_K / (4*QR6_K))
typedef struct {
uint8_t ql[QK_K/2]; // quants, lower 4 bits
uint8_t qh[QK_K/4]; // quants, upper 2 bits
int8_t scales[QK_K/16]; // scales
half d; // delta
} block_q6_K;
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding");
#define QR2_XXS 8
#define QI2_XXS (QK_K / (4*QR2_XXS))
typedef struct {
half d;
uint16_t qs[QK_K/8];
} block_iq2_xxs;
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
#define QR2_XS 8
#define QI2_XS (QK_K / (4*QR2_XS))
typedef struct {
half d;
uint16_t qs[QK_K/8];
uint8_t scales[QK_K/32];
} block_iq2_xs;
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
// 2.5625 bpw quants
#define QR2_S 8
#define QI2_S (QK_K / (4*QR2_S))
typedef struct {
half d;
uint8_t qs[QK_K/4];
uint8_t qh[QK_K/32];
uint8_t scales[QK_K/32];
} block_iq2_s;
static_assert(sizeof(block_iq2_s) == sizeof(ggml_fp16_t) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding");
#define QR3_XXS 8
#define QI3_XXS (QK_K / (4*QR3_XXS))
typedef struct {
half d;
uint8_t qs[3*(QK_K/8)];
} block_iq3_xxs;
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
#define QR3_XS 8
#define QI3_XS (QK_K / (4*QR3_XS))
#if QK_K == 64
#define IQ3S_N_SCALE 2
#else
#define IQ3S_N_SCALE QK_K/64
#endif
typedef struct {
half d;
uint8_t qs[QK_K/4];
uint8_t qh[QK_K/32];
uint8_t signs[QK_K/8];
uint8_t scales[IQ3S_N_SCALE];
} block_iq3_s;
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
#define QR1_S 8
#define QI1_S (QK_K / (4*QR1_S))
typedef struct {
half d;
uint8_t qs[QK_K/8];
uint8_t scales[QK_K/16];
} block_iq1_s;
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
#define QK4_NL 32
#define QR4_NL 2
#define QI4_NL (QK4_NL / (4*QR4_NL))
typedef struct {
half d;
uint8_t qs[QK4_NL/2];
} block_iq4_nl;
static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding");
#if QK_K == 64
#define block_iq4_xs block_iq4_nl
#define QR4_XS QR4_NL
#define QI4_XS QI4_NL
#else
// QR4_XS = 8 is very slightly faster than QR4_XS = 4
#define QR4_XS 8
#define QI4_XS (QK_K / (4*QR4_XS))
typedef struct {
half d;
uint16_t scales_h;
uint8_t scales_l[QK_K/64];
uint8_t qs[QK_K/2];
} block_iq4_xs;
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
#endif
#define WARP_SIZE 32 #define WARP_SIZE 32
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
@ -1722,11 +1503,15 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_
const int il = tid/8; // 0...3 const int il = tid/8; // 0...3
const int ib = tid%8; // 0...7 const int ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il; dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const int i8 = 4*ib+il; const float delta = x[i].qh[ib] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA;
uint8_t h = x[i].scales[i8/2] >> 4*(i8%2); const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 7) + 1);
const int8_t * grid = (const int8_t *)(iq1s_grid + (x[i].qs[i8] | ((h & 8) << 5))); uint32_t grid32[2]; const int8_t * q = (const int8_t *)grid32;
const float d = (float)x[i].d * (2*(h & 7) + 1); grid32[0] = iq1s_grid_gpu[x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)];
for (int j = 0; j < 8; ++j) y[j] = d * grid[j]; grid32[1] = (grid32[0] >> 4) & 0x0f0f0f0f;
grid32[0] &= 0x0f0f0f0f;
for (int j = 0; j < 8; ++j) {
y[j] = d * (q[j] + delta);
}
#else #else
assert(false); assert(false);
#endif #endif
@ -3566,7 +3351,7 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
#pragma unroll #pragma unroll
for (int i = 0; i < QR2_K; ++ i) { for (int i = 0; i < QR2_K; ++ i) {
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1); u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1);
d8[i] = __low2half(bq8_1[bq8_offset + i].ds); d8[i] = __low2float(bq8_1[bq8_offset + i].ds);
} }
return vec_dot_q2_K_q8_1_impl_mmvq(v, u, scales, bq2_K->dm, d8); return vec_dot_q2_K_q8_1_impl_mmvq(v, u, scales, bq2_K->dm, d8);
@ -3688,7 +3473,7 @@ static __device__ __forceinline__ float vec_dot_q3_K_q8_1(
#pragma unroll #pragma unroll
for (int i = 0; i < QR3_K; ++i) { for (int i = 0; i < QR3_K; ++i) {
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1); u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1);
d8[i] = __low2half(bq8_1[bq8_offset + i].ds); d8[i] = __low2float(bq8_1[bq8_offset + i].ds);
} }
return vec_dot_q3_K_q8_1_impl_mmvq(vl, vh, u, bq3_K->scales, scale_offset, d, d8); return vec_dot_q3_K_q8_1_impl_mmvq(vl, vh, u, bq3_K->scales, scale_offset, d, d8);
@ -3857,7 +3642,7 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1(
for (int i = 0; i < QR4_K; ++i) { for (int i = 0; i < QR4_K; ++i) {
const block_q8_1 * bq8i = bq8_1 + bq8_offset + i; const block_q8_1 * bq8i = bq8_1 + bq8_offset + i;
d8[i] = __low2half(bq8i->ds); d8[i] = __low2float(bq8i->ds);
const int * q8 = (const int *)bq8i->qs + ((iqs/2)%4); const int * q8 = (const int *)bq8i->qs + ((iqs/2)%4);
u[2*i+0] = q8[0]; u[2*i+0] = q8[0];
@ -4222,7 +4007,7 @@ static __device__ __forceinline__ float vec_dot_q6_K_q8_1(
#pragma unroll #pragma unroll
for (int i = 0; i < QR6_K; ++i) { for (int i = 0; i < QR6_K; ++i) {
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + 2*i].qs, iqs % QI8_1); u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + 2*i].qs, iqs % QI8_1);
d8[i] = __low2half(bq8_1[bq8_offset + 2*i].ds); d8[i] = __low2float(bq8_1[bq8_offset + 2*i].ds);
} }
return vec_dot_q6_K_q8_1_impl_mmvq(vl, vh, u, scales, bq6_K->d, d8); return vec_dot_q6_K_q8_1_impl_mmvq(vl, vh, u, scales, bq6_K->d, d8);
@ -4538,44 +4323,36 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
#endif #endif
} }
static __device__ __forceinline__ float vec_dot_iq1_s_q8_1( static __device__ __forceinline__ float vec_dot_iq1_s_q8_1(
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) { const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
#if QK_K == 256 #if QK_K == 256
const block_iq1_s * bq1 = (const block_iq1_s *) vbq; const block_iq1_s * bq1 = (const block_iq1_s *) vbq;
const int ib32 = iqs; const int ib32 = iqs;
int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0; int sumi = 0;
const uint8_t h1 = bq1->scales[2*ib32+0];
const uint8_t h2 = bq1->scales[2*ib32+1];
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics #if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
const int * q8 = (const int *)bq8_1[ib32].qs; const int * q8 = (const int *)bq8_1[ib32].qs;
const int * grid1 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); for (int l = 0; l < 4; ++l) {
const int * grid2 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1))); const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
const int * grid3 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5))); int grid0 = grid[0] & 0x0f0f0f0f;
const int * grid4 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1))); int grid1 = (grid[0] >> 4) & 0x0f0f0f0f;
for (int j = 0; j < 2; ++j) { sumi = __dp4a(q8[2*l+1], grid1, __dp4a(q8[2*l+0], grid0, sumi));
sumi1 = __dp4a(q8[j+0], grid1[j], sumi1);
sumi2 = __dp4a(q8[j+2], grid2[j], sumi2);
sumi3 = __dp4a(q8[j+4], grid3[j], sumi3);
sumi4 = __dp4a(q8[j+6], grid4[j], sumi4);
} }
#else #else
const int8_t * q8 = bq8_1[ib32].qs; const int8_t * q8 = bq8_1[ib32].qs;
const int8_t * grid1 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5))); for (int l = 0; l < 4; ++l) {
const int8_t * grid2 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1))); const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
const int8_t * grid3 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5))); for (int j = 0; j < 4; ++j) {
const int8_t * grid4 = (const int8_t *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1))); sumi += q8[j] * (grid[j] & 0xf) + q8[j+4] * (grid[j] >> 4);
for (int j = 0; j < 8; ++j) { }
sumi1 += q8[j+ 0] * grid1[j]; q8 += 8;
sumi2 += q8[j+ 8] * grid2[j];
sumi3 += q8[j+16] * grid3[j];
sumi4 += q8[j+24] * grid4[j];
} }
#endif #endif
const float d = (float)bq1->d * __low2float(bq8_1[ib32].ds); const float delta = bq1->qh[ib32] & 0x8000 ? -1-IQ1S_DELTA : -1+IQ1S_DELTA;
return d * (sumi1 * (2*(h1 & 7) + 1) + sumi2 * (2*((h1 >> 4) & 7) + 1) + const float d1q = (float)bq1->d * (2*((bq1->qh[ib32] >> 12) & 7) + 1);
sumi3 * (2*(h2 & 7) + 1) + sumi4 * (2*((h2 >> 4) & 7) + 1)); const float d = d1q * __low2float (bq8_1[ib32].ds);
const float m = d1q * __high2float(bq8_1[ib32].ds);
return d * sumi + m * delta;
#else #else
assert(false); assert(false);
return 0.f; return 0.f;
@ -4767,7 +4544,7 @@ static __device__ __forceinline__ void mul_mat_q(
*dsi_dst = *dsi_src; *dsi_dst = *dsi_src;
} else { } else {
float * dfi_dst = (float *) dsi_dst; float * dfi_dst = (float *) dsi_dst;
*dfi_dst = __low2half(*dsi_src); *dfi_dst = __low2float(*dsi_src);
} }
} }
@ -10867,8 +10644,20 @@ GGML_CALL void ggml_cuda_get_device_description(int device, char * description,
#define UNUSED GGML_UNUSED #define UNUSED GGML_UNUSED
struct ggml_backend_cuda_context { struct ggml_backend_cuda_context {
explicit ggml_backend_cuda_context(int device) :
device(device),
name(GGML_CUDA_NAME + std::to_string(device)) {
}
~ggml_backend_cuda_context() {
if (copy_event != nullptr) {
CUDA_CHECK(cudaEventDestroy(copy_event));
}
}
int device; int device;
std::string name; std::string name;
cudaEvent_t copy_event = nullptr;
}; };
// cuda buffer // cuda buffer
@ -10958,9 +10747,8 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
ggml_cuda_set_device(ctx->device); ggml_cuda_set_device(ctx->device);
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cudaStreamPerThread));
CUDA_CHECK(cudaMemcpy((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice)); CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
CUDA_CHECK(cudaDeviceSynchronize());
} }
GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
@ -10969,26 +10757,25 @@ GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context; ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
ggml_cuda_set_device(ctx->device); ggml_cuda_set_device(ctx->device);
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
CUDA_CHECK(cudaMemcpy(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost)); CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
CUDA_CHECK(cudaDeviceSynchronize());
} }
GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
if (ggml_backend_buffer_is_cuda(src->buffer)) { if (ggml_backend_buffer_is_cuda(src->buffer)) {
ggml_backend_cuda_buffer_context * src_ctx = (ggml_backend_cuda_buffer_context *)src->buffer->context; ggml_backend_cuda_buffer_context * src_ctx = (ggml_backend_cuda_buffer_context *)src->buffer->context;
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)buffer->context; ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)dst->buffer->context;
if (src_ctx->device == dst_ctx->device) {
ggml_cuda_set_device(src_ctx->device); CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice, cudaStreamPerThread));
CUDA_CHECK(cudaDeviceSynchronize()); } else {
ggml_cuda_set_device(dst_ctx->device); CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread));
CUDA_CHECK(cudaDeviceSynchronize()); }
CUDA_CHECK(cudaMemcpy((char *)dst->data, (const char *)src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice)); CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
CUDA_CHECK(cudaDeviceSynchronize());
return true; return true;
} }
return false; return false;
UNUSED(buffer);
} }
GGML_CALL static void ggml_backend_cuda_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { GGML_CALL static void ggml_backend_cuda_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
@ -11233,7 +11020,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buf
} }
const char * buf_host = (const char *)data + offset_split; const char * buf_host = (const char *)data + offset_split;
CUDA_CHECK(cudaMemcpy(extra->data_device[id], buf_host, original_size, cudaMemcpyHostToDevice)); CUDA_CHECK(cudaMemcpyAsync(extra->data_device[id], buf_host, original_size, cudaMemcpyHostToDevice, cudaStreamPerThread));
}
for (int id = 0; id < g_device_count; ++id) {
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
} }
} }
@ -11267,7 +11058,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_get_tensor(ggml_backend_buf
} }
char * buf_host = (char *)data + offset_split; char * buf_host = (char *)data + offset_split;
CUDA_CHECK(cudaMemcpy(buf_host, extra->data_device[id], original_size, cudaMemcpyDeviceToHost)); CUDA_CHECK(cudaMemcpyAsync(buf_host, extra->data_device[id], original_size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
}
for (int id = 0; id < g_device_count; ++id) {
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
} }
} }
@ -11446,6 +11241,10 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() {
return &ggml_backend_cuda_buffer_type_host; return &ggml_backend_cuda_buffer_type_host;
} }
//static bool ggml_backend_buffer_is_cuda_host(ggml_backend_buffer_t buffer) {
// return buffer->buft->iface.get_name == ggml_backend_cuda_host_buffer_type_name;
//}
// backend // backend
GGML_CALL static const char * ggml_backend_cuda_name(ggml_backend_t backend) { GGML_CALL static const char * ggml_backend_cuda_name(ggml_backend_t backend) {
@ -11469,8 +11268,9 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cuda_get_default_buffer
GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type"); GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU); GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0])); CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0]));
@ -11478,22 +11278,61 @@ GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend,
GGML_CALL static void ggml_backend_cuda_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { GGML_CALL static void ggml_backend_cuda_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type"); GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU); GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0])); CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0]));
} }
GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) { GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context; GGML_ASSERT(ggml_backend_is_cuda(backend_src) || ggml_backend_is_cuda(backend_dst));
if (dst->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && ggml_backend_buffer_is_cuda(src->buffer)) { ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer;
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx->device][0])); ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer;
return true;
if (!ggml_backend_buffer_is_cuda(src->buffer)) {
return false;
} }
if (!ggml_backend_buffer_is_cuda(dst->buffer)) {
return false; return false;
}
// device -> device
ggml_backend_cuda_context * cuda_ctx_src = (ggml_backend_cuda_context *)backend_src->context;
ggml_backend_cuda_context * cuda_ctx_dst = (ggml_backend_cuda_context *)backend_dst->context;
if (backend_src != backend_dst) {
ggml_backend_cuda_buffer_context * buf_ctx_src = (ggml_backend_cuda_buffer_context *)buf_src->context;
ggml_backend_cuda_buffer_context * buf_ctx_dst = (ggml_backend_cuda_buffer_context *)buf_dst->context;
GGML_ASSERT(cuda_ctx_src->device == buf_ctx_src->device);
GGML_ASSERT(cuda_ctx_dst->device == buf_ctx_dst->device);
if (!cuda_ctx_src->copy_event) {
ggml_cuda_set_device(cuda_ctx_src->device);
CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming));
}
// copy on src stream
if (cuda_ctx_src->device == cuda_ctx_dst->device) {
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
} else {
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), g_cudaStreams[cuda_ctx_src->device][0]));
}
// record event on src stream
CUDA_CHECK(cudaEventRecord(cuda_ctx_src->copy_event, g_cudaStreams[cuda_ctx_src->device][0]));
// wait on dst stream for the copy to complete
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx_dst->device][0], cuda_ctx_src->copy_event, 0));
} else {
// src and dst are on the same backend
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
}
return true;
} }
GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) { GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
@ -11670,6 +11509,52 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
UNUSED(backend); UNUSED(backend);
} }
static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
ggml_cuda_set_device(cuda_ctx->device);
cudaEvent_t event;
CUDA_CHECK(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
return new ggml_backend_event {
/* .backend = */ backend,
/* .context = */ event,
};
}
static void ggml_backend_cuda_event_free(ggml_backend_event_t event) {
CUDA_CHECK(cudaEventDestroy((cudaEvent_t)event->context));
delete event;
}
static void ggml_backend_cuda_event_record(ggml_backend_event_t event) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)event->backend->context;
CUDA_CHECK(cudaEventRecord((cudaEvent_t)event->context, g_cudaStreams[cuda_ctx->device][0]));
}
static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
if (ggml_backend_is_cuda(event->backend)) {
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx->device][0], (cudaEvent_t)event->context, 0));
} else {
// untested
auto wait_fn = [](void * user_data) {
ggml_backend_event_t event = (ggml_backend_event_t)user_data;
ggml_backend_event_synchronize(event);
};
CUDA_CHECK(cudaLaunchHostFunc(g_cudaStreams[cuda_ctx->device][0], wait_fn, event));
}
}
static void ggml_backend_cuda_event_synchronize(ggml_backend_event_t event) {
CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context));
}
static ggml_backend_i ggml_backend_cuda_interface = { static ggml_backend_i ggml_backend_cuda_interface = {
/* .get_name = */ ggml_backend_cuda_name, /* .get_name = */ ggml_backend_cuda_name,
/* .free = */ ggml_backend_cuda_free, /* .free = */ ggml_backend_cuda_free,
@ -11683,6 +11568,11 @@ static ggml_backend_i ggml_backend_cuda_interface = {
/* .graph_plan_compute = */ NULL, /* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_cuda_graph_compute, /* .graph_compute = */ ggml_backend_cuda_graph_compute,
/* .supports_op = */ ggml_backend_cuda_supports_op, /* .supports_op = */ ggml_backend_cuda_supports_op,
/* .event_new = */ ggml_backend_cuda_event_new,
/* .event_free = */ ggml_backend_cuda_event_free,
/* .event_record = */ ggml_backend_cuda_event_record,
/* .event_wait = */ ggml_backend_cuda_event_wait,
/* .event_synchronize = */ ggml_backend_cuda_event_synchronize,
}; };
static ggml_guid_t ggml_backend_cuda_guid() { static ggml_guid_t ggml_backend_cuda_guid() {
@ -11701,10 +11591,11 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device) {
// not strictly necessary, but it may reduce the overhead of the first graph_compute // not strictly necessary, but it may reduce the overhead of the first graph_compute
ggml_cuda_set_main_device(device); ggml_cuda_set_main_device(device);
ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context { ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context(device);
/* .device = */ device, if (ctx == nullptr) {
/* .name = */ GGML_CUDA_NAME + std::to_string(device), fprintf(stderr, "%s: error: failed to allocate context\n", __func__);
}; return nullptr;
}
ggml_backend_t cuda_backend = new ggml_backend { ggml_backend_t cuda_backend = new ggml_backend {
/* .guid = */ ggml_backend_cuda_guid(), /* .guid = */ ggml_backend_cuda_guid(),

View file

@ -53,26 +53,30 @@ extern "C" {
// //
#include <arm_neon.h> #include <arm_neon.h>
typedef __fp16 ggml_fp16_internal_t;
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
__fp16 tmp; ggml_fp16_internal_t tmp;
memcpy(&tmp, &h, sizeof(ggml_fp16_t)); memcpy(&tmp, &h, sizeof(ggml_fp16_t));
return (float)tmp; return (float)tmp;
} }
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
ggml_fp16_t res; ggml_fp16_t res;
__fp16 tmp = f; ggml_fp16_internal_t tmp = f;
memcpy(&res, &tmp, sizeof(ggml_fp16_t)); memcpy(&res, &tmp, sizeof(ggml_fp16_t));
return res; return res;
} }
#else #else
typedef uint16_t ggml_fp16_internal_t;
#ifdef __wasm_simd128__ #ifdef __wasm_simd128__
#include <wasm_simd128.h> #include <wasm_simd128.h>
#else #else

View file

@ -1951,6 +1951,11 @@ static struct ggml_backend_i kompute_backend_i = {
/* .graph_plan_compute = */ NULL, /* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_kompute_graph_compute, /* .graph_compute = */ ggml_backend_kompute_graph_compute,
/* .supports_op = */ ggml_backend_kompute_supports_op, /* .supports_op = */ ggml_backend_kompute_supports_op,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .event_synchronize = */ NULL,
}; };
static ggml_guid_t ggml_backend_kompute_guid() { static ggml_guid_t ggml_backend_kompute_guid() {

View file

@ -280,6 +280,11 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
id<MTLLibrary> metal_library; id<MTLLibrary> metal_library;
// load library // load library
//
// - first check if the library is embedded
// - then check if the library is in the bundle
// - if not found, load the source and compile it
// - if that fails, return NULL
{ {
NSBundle * bundle = nil; NSBundle * bundle = nil;
#ifdef SWIFT_PACKAGE #ifdef SWIFT_PACKAGE
@ -287,12 +292,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
#else #else
bundle = [NSBundle bundleForClass:[GGMLMetalClass class]]; bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
#endif #endif
NSError * error = nil; NSError * error = nil;
NSString * libPath = [bundle pathForResource:@"default" ofType:@"metallib"];
if (libPath != nil) { #if GGML_METAL_EMBED_LIBRARY
const bool try_metallib = false;
#else
const bool try_metallib = true;
#endif
NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
if (try_metallib && path_lib != nil) {
// pre-compiled library found // pre-compiled library found
NSURL * libURL = [NSURL fileURLWithPath:libPath]; NSURL * libURL = [NSURL fileURLWithPath:path_lib];
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]); GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
metal_library = [ctx->device newLibraryWithURL:libURL error:&error]; metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
if (error) { if (error) {
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
@ -309,34 +323,37 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
#else #else
GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__); GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
NSString * sourcePath; NSString * path_source;
NSString * ggmlMetalPathResources = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"]; NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, ggmlMetalPathResources ? [ggmlMetalPathResources UTF8String] : "nil"); GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
if (ggmlMetalPathResources) { if (path_resource) {
sourcePath = [ggmlMetalPathResources stringByAppendingPathComponent:@"ggml-metal.metal"]; path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
} else { } else {
sourcePath = [bundle pathForResource:@"ggml-metal" ofType:@"metal"]; path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
} }
if (sourcePath == nil) {
if (path_source == nil) {
GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__); GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
sourcePath = @"ggml-metal.metal"; path_source = @"ggml-metal.metal";
} }
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [sourcePath UTF8String]);
NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error]; GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
if (error) { if (error) {
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]); GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
return NULL; return NULL;
} }
#endif #endif // GGML_METAL_EMBED_LIBRARY
@autoreleasepool { @autoreleasepool {
// dictionary of preprocessor macros // dictionary of preprocessor macros
NSMutableDictionary * prep = [NSMutableDictionary dictionary]; NSMutableDictionary * prep = [NSMutableDictionary dictionary];
#ifdef GGML_QKK_64 #ifdef GGML_QKK_64
prep[@"QK_K"] = @(64); prep[@"GGML_QKK_64"] = @(1);
#endif #endif
MTLCompileOptions* options = [MTLCompileOptions new]; MTLCompileOptions* options = [MTLCompileOptions new];
@ -1642,8 +1659,8 @@ static enum ggml_status ggml_metal_graph_compute(
// TODO: make this more general // TODO: make this more general
GGML_ASSERT(n_as <= 8); GGML_ASSERT(n_as <= 8);
// max size of the src1ids array in the kernel stack // max size of the src1ids array in the kernel shared buffer
GGML_ASSERT(ne11 <= 512); GGML_ASSERT(ne11 <= 4096);
const int64_t ne20 = src2 ? src2->ne[0] : 0; const int64_t ne20 = src2 ? src2->ne[0] : 0;
const int64_t ne21 = src2 ? src2->ne[1] : 0; const int64_t ne21 = src2 ? src2->ne[1] : 0;
@ -1741,7 +1758,7 @@ static enum ggml_status ggml_metal_graph_compute(
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j]; [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
} }
[encoder setThreadgroupMemoryLength:8192 atIndex:0]; [encoder setThreadgroupMemoryLength:GGML_PAD(8192 + 2*ne11, 16) atIndex:0];
[encoder dispatchThreadgroups:MTLSizeMake((ne11 + 31)/32, (ne21 + 63)/64, n_as*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)]; [encoder dispatchThreadgroups:MTLSizeMake((ne11 + 31)/32, (ne21 + 63)/64, n_as*ne12*ne13) threadsPerThreadgroup:MTLSizeMake(128, 1, 1)];
} else { } else {
@ -2820,6 +2837,11 @@ static struct ggml_backend_i ggml_backend_metal_i = {
/* .graph_plan_compute = */ NULL, /* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_metal_graph_compute, /* .graph_compute = */ ggml_backend_metal_graph_compute,
/* .supports_op = */ ggml_backend_metal_supports_op, /* .supports_op = */ ggml_backend_metal_supports_op,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .event_synchronize = */ NULL,
}; };
void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) { void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) {

View file

@ -1,49 +1,15 @@
#include <metal_stdlib> #define GGML_COMMON_DECL_METAL
#define GGML_COMMON_IMPL_METAL #define GGML_COMMON_IMPL_METAL
#include "ggml-common.h" #include "ggml-common.h"
#include <metal_stdlib>
using namespace metal; using namespace metal;
#define MAX(x, y) ((x) > (y) ? (x) : (y)) #define MAX(x, y) ((x) > (y) ? (x) : (y))
#define MIN(x, y) ((x) < (y) ? (x) : (y)) #define MIN(x, y) ((x) < (y) ? (x) : (y))
#define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; } #define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; }
#define QK4_0 32
#define QR4_0 2
typedef struct {
half d; // delta
uint8_t qs[QK4_0 / 2]; // nibbles / quants
} block_q4_0;
#define QK4_1 32
typedef struct {
half d; // delta
half m; // min
uint8_t qs[QK4_1 / 2]; // nibbles / quants
} block_q4_1;
#define QK5_0 32
typedef struct {
half d; // delta
uint8_t qh[4]; // 5-th bit of quants
uint8_t qs[QK5_0 / 2]; // nibbles / quants
} block_q5_0;
#define QK5_1 32
typedef struct {
half d; // delta
half m; // min
uint8_t qh[4]; // 5-th bit of quants
uint8_t qs[QK5_1 / 2]; // nibbles / quants
} block_q5_1;
#define QK8_0 32
typedef struct {
half d; // delta
int8_t qs[QK8_0]; // quants
} block_q8_0;
#define N_SIMDWIDTH 32 // assuming SIMD group size is 32 #define N_SIMDWIDTH 32 // assuming SIMD group size is 32
enum ggml_sort_order { enum ggml_sort_order {
@ -2478,147 +2444,6 @@ kernel void kernel_concat(
} }
} }
//============================================ k-quants ======================================================
#ifndef QK_K
#define QK_K 256
#else
static_assert(QK_K == 256 || QK_K == 64, "QK_K must be 256 or 64");
#endif
#if QK_K == 256
#define K_SCALE_SIZE 12
#else
#define K_SCALE_SIZE 4
#endif
typedef struct {
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
uint8_t qs[QK_K/4]; // quants
half d; // super-block scale for quantized scales
half dmin; // super-block scale for quantized mins
} block_q2_K;
// 84 bytes / block
typedef struct {
uint8_t hmask[QK_K/8]; // quants - high bit
uint8_t qs[QK_K/4]; // quants - low 2 bits
#if QK_K == 64
uint8_t scales[2];
#else
uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
#endif
half d; // super-block scale
} block_q3_K;
#if QK_K == 64
typedef struct {
half d[2]; // super-block scales/mins
uint8_t scales[2];
uint8_t qs[QK_K/2]; // 4-bit quants
} block_q4_K;
#else
typedef struct {
half d; // super-block scale for quantized scales
half dmin; // super-block scale for quantized mins
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
uint8_t qs[QK_K/2]; // 4--bit quants
} block_q4_K;
#endif
#if QK_K == 64
typedef struct {
half d; // super-block scales/mins
int8_t scales[QK_K/16]; // 8-bit block scales
uint8_t qh[QK_K/8]; // quants, high bit
uint8_t qs[QK_K/2]; // quants, low 4 bits
} block_q5_K;
#else
typedef struct {
half d; // super-block scale for quantized scales
half dmin; // super-block scale for quantized mins
uint8_t scales[3*QK_K/64]; // scales and mins, quantized with 6 bits
uint8_t qh[QK_K/8]; // quants, high bit
uint8_t qs[QK_K/2]; // quants, low 4 bits
} block_q5_K;
// 176 bytes / block
#endif
typedef struct {
uint8_t ql[QK_K/2]; // quants, lower 4 bits
uint8_t qh[QK_K/4]; // quants, upper 2 bits
int8_t scales[QK_K/16]; // scales, quantized with 8 bits
half d; // super-block scale
} block_q6_K;
// 210 bytes / block
typedef struct {
half d;
uint16_t qs[QK_K/8];
} block_iq2_xxs;
// 66 bytes / block for QK_K = 256, so 2.0625 bpw
typedef struct {
half d;
uint16_t qs[QK_K/8];
uint8_t scales[QK_K/32];
} block_iq2_xs;
// 74 bytes / block for QK_K = 256, so 2.3125 bpw
// 2.5625 bpw quants
typedef struct {
half d;
uint8_t qs[QK_K/4];
uint8_t qh[QK_K/32];
uint8_t scales[QK_K/32];
} block_iq2_s;
typedef struct {
half d;
uint8_t qs[3*QK_K/8];
} block_iq3_xxs;
// 98 bytes / block for QK_K = 256, so 3.0625 bpw
// 3.4375 bpw
#if QK_K == 64
#define IQ3S_N_SCALE 2
#else
#define IQ3S_N_SCALE QK_K/64
#endif
typedef struct {
half d;
uint8_t qs[QK_K/4];
uint8_t qh[QK_K/32];
uint8_t signs[QK_K/8];
uint8_t scales[IQ3S_N_SCALE];
} block_iq3_s;
typedef struct {
half d;
uint8_t qs[QK_K/8];
uint8_t scales[QK_K/16];
} block_iq1_s;
// Non-linear quants
#define QK4_NL 32
typedef struct {
half d;
uint8_t qs[QK4_NL/2];
} block_iq4_nl;
#if QK_K == 64
#define block_iq4_xs block_iq4_nl
#else
typedef struct {
half d;
uint16_t scales_h;
uint8_t scales_l[QK_K/64];
uint8_t qs[QK_K/2];
} block_iq4_xs;
#endif
//====================================== dot products =========================
void kernel_mul_mv_q2_K_f32_impl( void kernel_mul_mv_q2_K_f32_impl(
device const void * src0, device const void * src0,
device const float * src1, device const float * src1,
@ -4338,48 +4163,53 @@ void kernel_mul_mv_iq1_s_f32_impl(
device const block_iq1_s * x = (device const block_iq1_s *) src0 + ib_row + offset0; device const block_iq1_s * x = (device const block_iq1_s *) src0 + ib_row + offset0;
device const float * y = (device const float *) src1 + r1*ne10 + im*ne00*ne1; device const float * y = (device const float *) src1 + r1*ne10 + im*ne00*ne1;
float yl[16]; float yl[32];
float sumf[N_DST]={0.f}, all_sum; float sumf[N_DST]={0.f}, all_sum;
const int nb32 = nb * (QK_K / 32); const int nb32 = nb * (QK_K / 32);
const int ix = tiisg/2; const int ix = tiisg;
const int il = tiisg%2;
device const float * y4 = y + 32 * ix + 16 * il; device const float * y4 = y + 32 * ix;
for (int ib32 = ix; ib32 < nb32; ib32 += 16) { for (int ib32 = ix; ib32 < nb32; ib32 += 32) {
for (int i = 0; i < 16; ++i) { float sumy = 0;
for (int i = 0; i < 32; ++i) {
yl[i] = y4[i]; yl[i] = y4[i];
sumy += yl[i];
} }
const int ibl = ib32 / (QK_K / 32); const int ibl = ib32 / (QK_K / 32);
const int ib = ib32 % (QK_K / 32); const int ib = ib32 % (QK_K / 32);
device const block_iq1_s * xr = x + ibl; device const block_iq1_s * xr = x + ibl;
device const uint8_t * qs = xr->qs + 4 * ib + 2 * il; device const uint8_t * qs = xr->qs + 4 * ib;
device const uint8_t * sc = xr->scales + 2 * ib + il; device const uint16_t * qh = xr->qh + ib;
device const half * dh = &xr->d; device const half * dh = &xr->d;
for (int row = 0; row < N_DST; row++) { for (int row = 0; row < N_DST; row++) {
constant int8_t * grid1 = (constant int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5))); constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | ((qh[0] << 8) & 0x700)));
constant int8_t * grid2 = (constant int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1))); constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | ((qh[0] << 5) & 0x700)));
constant uint8_t * grid3 = (constant uint8_t *)(iq1s_grid_gpu + (qs[2] | ((qh[0] << 2) & 0x700)));
constant uint8_t * grid4 = (constant uint8_t *)(iq1s_grid_gpu + (qs[3] | ((qh[0] >> 1) & 0x700)));
float2 sum = {0}; float sum = 0;
for (int j = 0; j < 8; ++j) { for (int j = 0; j < 4; ++j) {
sum[0] += yl[j+ 0] * grid1[j]; sum += yl[j+ 0] * (grid1[j] & 0xf) + yl[j+ 4] * (grid1[j] >> 4)
sum[1] += yl[j+ 8] * grid2[j]; + yl[j+ 8] * (grid2[j] & 0xf) + yl[j+12] * (grid2[j] >> 4)
+ yl[j+16] * (grid3[j] & 0xf) + yl[j+20] * (grid3[j] >> 4)
+ yl[j+24] * (grid4[j] & 0xf) + yl[j+28] * (grid4[j] >> 4);
} }
sumf[row] += (float)dh[0] * (sum[0] * (2*(sc[0] & 7) + 1) + sum[1] * (2*((sc[0] >> 4) & 7) + 1)); sumf[row] += (float)dh[0] * (sum + sumy * (qh[0] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA)) * (2*((qh[0] >> 12) & 7) + 1);
dh += nb*sizeof(block_iq1_s)/2; dh += nb*sizeof(block_iq1_s)/2;
qs += nb*sizeof(block_iq1_s); qs += nb*sizeof(block_iq1_s);
sc += nb*sizeof(block_iq1_s); qh += nb*sizeof(block_iq1_s)/2;
} }
y4 += 16 * 32; y4 += 32 * 32;
} }
for (int row = 0; row < N_DST; ++row) { for (int row = 0; row < N_DST; ++row) {
@ -5066,16 +4896,21 @@ void dequantize_iq2_s(device const block_iq2_s * xb, short il, thread type4x4 &
template <typename type4x4> template <typename type4x4>
void dequantize_iq1_s(device const block_iq1_s * xb, short il, thread type4x4 & reg) { void dequantize_iq1_s(device const block_iq1_s * xb, short il, thread type4x4 & reg) {
// il is 0...15 for QK_K = 256 => index of block of 32 is il/2 // il is 0...15 for QK_K = 256 => index of block of 32 is il/2
const int ib32 = il/2;
il = il%2;
const float d = xb->d; const float d = xb->d;
device const uint8_t * qs = xb->qs + 2*il; device const uint8_t * qs = xb->qs + 4*ib32 + 2*il;
device const uint8_t * sc = xb->scales + il; device const uint16_t * qh = xb->qh;
const float dl1 = d * (2*(sc[0] & 7) + 1); const float dl = d * (2*((qh[ib32] >> 12) & 7) + 1);
const float dl2 = d * (2*((sc[0] >> 4) & 7) + 1); const float ml = dl * (qh[ib32] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA);
constant int8_t * grid1 = (constant int8_t *)(iq1s_grid + (qs[0] | ((sc[0] & 0x08) << 5))); const uint16_t h = qh[ib32] >> 6*il;
constant int8_t * grid2 = (constant int8_t *)(iq1s_grid + (qs[1] | ((sc[0] & 0x80) << 1))); constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | ((h << 8) & 0x700)));
for (int i = 0; i < 8; ++i) { constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | ((h << 5) & 0x700)));
reg[i/4+0][i%4] = dl1 * grid1[i]; for (int i = 0; i < 4; ++i) {
reg[i/4+2][i%4] = dl2 * grid2[i]; reg[0][i] = dl * (grid1[i] & 0xf) + ml;
reg[1][i] = dl * (grid1[i] >> 4) + ml;
reg[2][i] = dl * (grid2[i] & 0xf) + ml;
reg[3][i] = dl * (grid2[i] >> 4) + ml;
} }
} }
@ -5386,7 +5221,7 @@ template<typename block_q, short nl, void (*dequantize_func)(device const block_
void kernel_mul_mm_id_impl( void kernel_mul_mm_id_impl(
device const uchar * src0, device const uchar * src0,
device const uchar * src1, device const uchar * src1,
thread short * src1ids, threadgroup short * src1ids,
device float * dst, device float * dst,
constant int64_t & ne00, constant int64_t & ne00,
constant int64_t & ne02, constant int64_t & ne02,
@ -5589,9 +5424,9 @@ kernel void kernel_mul_mm_id(
tgpig.z = tgpig.z%(ne12*ne13); tgpig.z = tgpig.z%(ne12*ne13);
// row indices of src1 for expert id // row indices of src1 for expert id
int64_t _ne1 = 0; threadgroup short * src1ids = (threadgroup short *)(shared_memory + 8192);
short src1ids[512];
int64_t _ne1 = 0;
for (int64_t i1 = 0; i1 < ne1; i1++) { for (int64_t i1 = 0; i1 < ne1; i1++) {
if (((device int32_t *) (ids + i1*nbi1))[idx] == id) { if (((device int32_t *) (ids + i1*nbi1))[idx] == id) {
src1ids[_ne1++] = i1; src1ids[_ne1++] = i1;

File diff suppressed because it is too large Load diff

View file

@ -1,248 +1,12 @@
#pragma once #pragma once
#define GGML_COMMON_DECL_C
#include "ggml-common.h"
#include "ggml.h"
// GGML internal header // GGML internal header
#include "ggml-impl.h"
#include <stdint.h>
#include <stddef.h>
#define QK4_0 32
typedef struct {
ggml_fp16_t d; // delta
uint8_t qs[QK4_0 / 2]; // nibbles / quants
} block_q4_0;
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
#define QK4_1 32
typedef struct {
ggml_fp16_t d; // delta
ggml_fp16_t m; // min
uint8_t qs[QK4_1 / 2]; // nibbles / quants
} block_q4_1;
static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_fp16_t) + QK4_1 / 2, "wrong q4_1 block size/padding");
#define QK5_0 32
typedef struct {
ggml_fp16_t d; // delta
uint8_t qh[4]; // 5-th bit of quants
uint8_t qs[QK5_0 / 2]; // nibbles / quants
} block_q5_0;
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
#define QK5_1 32
typedef struct {
ggml_fp16_t d; // delta
ggml_fp16_t m; // min
uint8_t qh[4]; // 5-th bit of quants
uint8_t qs[QK5_1 / 2]; // nibbles / quants
} block_q5_1;
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
#define QK8_0 32
typedef struct {
ggml_fp16_t d; // delta
int8_t qs[QK8_0]; // quants
} block_q8_0;
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
#define QK8_1 32
typedef struct {
float d; // delta
float s; // d * sum(qs[i])
int8_t qs[QK8_1]; // quants
} block_q8_1;
static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block size/padding");
//
// Super-block quantization structures
//
// Super-block size
#ifdef GGML_QKK_64
#define QK_K 64
#define K_SCALE_SIZE 4
#else
#define QK_K 256
#define K_SCALE_SIZE 12
#endif
// 2-bit quantization
// weight is represented as x = a * q + b
// 16 blocks of 16 elements each
// Effectively 2.625 bits per weight
typedef struct {
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
uint8_t qs[QK_K/4]; // quants
ggml_fp16_t d; // super-block scale for quantized scales
ggml_fp16_t dmin; // super-block scale for quantized mins
} block_q2_K;
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
// 3-bit quantization
// weight is represented as x = a * q
// 16 blocks of 16 elements each
// Effectively 3.4375 bits per weight
#ifdef GGML_QKK_64
typedef struct {
uint8_t hmask[QK_K/8]; // quants - high bit
uint8_t qs[QK_K/4]; // quants - low 2 bits
uint8_t scales[2];
ggml_fp16_t d; // super-block scale
} block_q3_K;
static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding");
#else
typedef struct {
uint8_t hmask[QK_K/8]; // quants - high bit
uint8_t qs[QK_K/4]; // quants - low 2 bits
uint8_t scales[12]; // scales, quantized with 6 bits
ggml_fp16_t d; // super-block scale
} block_q3_K;
static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding");
#endif
// 4-bit quantization
// 8 blocks of 32 elements each
// weight is represented as x = a * q + b
// Effectively 4.5 bits per weight
#ifdef GGML_QKK_64
typedef struct {
ggml_fp16_t d[2]; // super-block scales/mins
uint8_t scales[2]; // 4-bit block scales/mins
uint8_t qs[QK_K/2]; // 4--bit quants
} block_q4_K;
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + QK_K/2 + 2, "wrong q4_K block size/padding");
#else
typedef struct {
ggml_fp16_t d; // super-block scale for quantized scales
ggml_fp16_t dmin; // super-block scale for quantized mins
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
uint8_t qs[QK_K/2]; // 4--bit quants
} block_q4_K;
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding");
#endif
// 5-bit quantization
// 8 blocks of 32 elements each
// weight is represented as x = a * q + b
// Effectively 5.5 bits per weight
#ifdef GGML_QKK_64
typedef struct {
ggml_fp16_t d; // super-block scale
int8_t scales[QK_K/16]; // 8-bit block scales
uint8_t qh[QK_K/8]; // quants, high bit
uint8_t qs[QK_K/2]; // quants, low 4 bits
} block_q5_K;
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
#else
typedef struct {
ggml_fp16_t d; // super-block scale for quantized scales
ggml_fp16_t dmin; // super-block scale for quantized mins
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
uint8_t qh[QK_K/8]; // quants, high bit
uint8_t qs[QK_K/2]; // quants, low 4 bits
} block_q5_K;
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
#endif
// 6-bit quantization
// weight is represented as x = a * q
// 16 blocks of 16 elements each
// Effectively 6.5625 bits per weight
typedef struct {
uint8_t ql[QK_K/2]; // quants, lower 4 bits
uint8_t qh[QK_K/4]; // quants, upper 2 bits
int8_t scales[QK_K/16]; // scales, quantized with 8 bits
ggml_fp16_t d; // super-block scale
} block_q6_K;
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding");
// This is only used for intermediate quantization and dot products
typedef struct {
float d; // delta
int8_t qs[QK_K]; // quants
int16_t bsums[QK_K/16]; // sum of quants in groups of 16
} block_q8_K;
static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding");
// (Almost) "true" 2-bit quantization.
// Due to the need to use blocks as per ggml design, it ends up using
// 2.0625 bpw because of the 16-bit scale for each block of 256.
typedef struct {
ggml_fp16_t d;
uint16_t qs[QK_K/8];
} block_iq2_xxs;
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
// 2.3125 bpw quants
typedef struct {
ggml_fp16_t d;
uint16_t qs[QK_K/8];
uint8_t scales[QK_K/32];
} block_iq2_xs;
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
// 2.5625 bpw quants
typedef struct {
ggml_fp16_t d;
uint8_t qs[QK_K/4];
uint8_t qh[QK_K/32];
uint8_t scales[QK_K/32];
} block_iq2_s;
static_assert(sizeof(block_iq2_s) == sizeof(ggml_fp16_t) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding");
// (Almost) "true" 3-bit quantization.
// Due to the need to use blocks as per ggml design, it ends up using
// 3.0625 bpw because of the 16-bit scale for each block of 256.
typedef struct {
ggml_fp16_t d;
uint8_t qs[3*QK_K/8];
} block_iq3_xxs;
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
// 3.4375 bpw
#if QK_K == 64
#define IQ3S_N_SCALE 2
#else
#define IQ3S_N_SCALE QK_K/64
#endif
typedef struct {
ggml_fp16_t d;
uint8_t qs[QK_K/4];
uint8_t qh[QK_K/32];
uint8_t signs[QK_K/8];
uint8_t scales[IQ3S_N_SCALE];
} block_iq3_s;
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
typedef struct {
ggml_fp16_t d;
uint8_t qs[QK_K/8];
uint8_t scales[QK_K/16];
} block_iq1_s;
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
// Non-linear quants
#define QK4_NL 32
typedef struct {
ggml_fp16_t d;
uint8_t qs[QK4_NL/2];
} block_iq4_nl;
static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding");
#if QK_K == 64
#define block_iq4_xs block_iq4_nl
//typedef struct block_iq4_nl block_iq4_xs;
#else
typedef struct {
ggml_fp16_t d;
uint16_t scales_h;
uint8_t scales_l[QK_K/64];
uint8_t qs[QK_K/2];
} block_iq4_xs;
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
#endif
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif

View file

@ -202,24 +202,29 @@ namespace dpct
// Version string has the following format: // Version string has the following format:
// a. OpenCL<space><major.minor><space><vendor-specific-information> // a. OpenCL<space><major.minor><space><vendor-specific-information>
// b. <major.minor> // b. <major.minor>
// c. <AmdGcnArchName> e.g gfx1030
std::string ver; std::string ver;
ver = dev.get_info<sycl::info::device::version>(); ver = dev.get_info<sycl::info::device::version>();
std::string::size_type i = 0; std::string::size_type i = 0;
while (i < ver.size()) while (i < ver.size()) {
{
if (isdigit(ver[i])) if (isdigit(ver[i]))
break; break;
i++; i++;
} }
major = std::stoi(&(ver[i])); major = std::stoi(&(ver[i]));
while (i < ver.size()) while (i < ver.size()) {
{
if (ver[i] == '.') if (ver[i] == '.')
break; break;
i++; i++;
} }
if (i < ver.size()) {
// a. and b.
i++; i++;
minor = std::stoi(&(ver[i])); minor = std::stoi(&(ver[i]));
} else {
// c.
minor = 0;
}
} }
template <typename tag, typename T> template <typename tag, typename T>
@ -3144,6 +3149,7 @@ namespace dpct
} // COPY from DPCT head files } // COPY from DPCT head files
#define GGML_COMMON_DECL_SYCL
#define GGML_COMMON_IMPL_SYCL #define GGML_COMMON_IMPL_SYCL
#include "ggml-common.h" #include "ggml-common.h"
@ -3312,66 +3318,6 @@ typedef void (*ggml_sycl_op_flatten_t)(const ggml_tensor *src0,
const float *src1_dd, float *dst_dd, const float *src1_dd, float *dst_dd,
const dpct::queue_ptr &main_stream); const dpct::queue_ptr &main_stream);
// QK = number of values after dequantization
// QR = QK / number of values before dequantization
// QI = number of 32 bit integers before dequantization
#define QK4_0 32
#define QR4_0 2
#define QI4_0 (QK4_0 / (4 * QR4_0))
typedef struct dpct_type_block_q4_0 {
sycl::half d; // delta
uint8_t qs[QK4_0 / 2]; // nibbles / quants
} block_q4_0;
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
#define QK4_1 32
#define QR4_1 2
#define QI4_1 (QK4_1 / (4 * QR4_1))
typedef struct dpct_type_block_q4_1 {
sycl::half2 dm; // dm.x = delta, dm.y = min
uint8_t qs[QK4_1 / 2]; // nibbles / quants
} block_q4_1;
static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding");
#define QK5_0 32
#define QR5_0 2
#define QI5_0 (QK5_0 / (4 * QR5_0))
typedef struct dpct_type_block_q5_0 {
sycl::half d; // delta
uint8_t qh[4]; // 5-th bit of quants
uint8_t qs[QK5_0 / 2]; // nibbles / quants
} block_q5_0;
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
#define QK5_1 32
#define QR5_1 2
#define QI5_1 (QK5_1 / (4 * QR5_1))
typedef struct dpct_type_block_q5_1 {
sycl::half2 dm; // dm.x = delta, dm.y = min
uint8_t qh[4]; // 5-th bit of quants
uint8_t qs[QK5_1 / 2]; // nibbles / quants
} block_q5_1;
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
#define QK8_0 32
#define QR8_0 1
#define QI8_0 (QK8_0 / (4 * QR8_0))
typedef struct dpct_type_block_q8_0 {
sycl::half d; // delta
int8_t qs[QK8_0]; // quants
} block_q8_0;
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
#define QK8_1 32
#define QR8_1 1
#define QI8_1 (QK8_1 / (4 * QR8_1))
typedef struct dpct_type_block_q8_1 {
sycl::half2 ds; // ds.x = delta, ds.y = sum
int8_t qs[QK8_0]; // quants
} block_q8_1;
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_fp16_t) + QK8_0, "wrong q8_1 block size/padding");
typedef float (*vec_dot_q_sycl_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs); typedef float (*vec_dot_q_sycl_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs);
typedef void (*allocate_tiles_sycl_t)(int **x_ql, sycl::half2 **x_dm, typedef void (*allocate_tiles_sycl_t)(int **x_ql, sycl::half2 **x_dm,
int **x_qh, int **x_sc); int **x_qh, int **x_sc);
@ -3388,112 +3334,6 @@ typedef float (*vec_dot_q_mul_mat_sycl_t)(
const int *__restrict__ y_qs, const sycl::half2 *__restrict__ y_ms, const int *__restrict__ y_qs, const sycl::half2 *__restrict__ y_ms,
const int &i, const int &j, const int &k); const int &i, const int &j, const int &k);
//================================= k-quants
#ifdef GGML_QKK_64
#define QK_K 64
#define K_SCALE_SIZE 4
#else
#define QK_K 256
#define K_SCALE_SIZE 12
#endif
#define QR2_K 4
#define QI2_K (QK_K / (4*QR2_K))
typedef struct dpct_type_block_q2_K {
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
uint8_t qs[QK_K/4]; // quants
sycl::half2 dm; // super-block scale for quantized scales/mins
} block_q2_K;
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
#define QR3_K 4
#define QI3_K (QK_K / (4*QR3_K))
typedef struct dpct_type_block_q3_K {
uint8_t hmask[QK_K/8]; // quants - high bit
uint8_t qs[QK_K/4]; // quants - low 2 bits
#ifdef GGML_QKK_64
uint8_t scales[2]; // scales, quantized with 8 bits
#else
uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
#endif
sycl::half d; // super-block scale
} block_q3_K;
//static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + K_SCALE_SIZE, "wrong q3_K block size/padding");
#define QR4_K 2
#define QI4_K (QK_K / (4*QR4_K))
#ifdef GGML_QKK_64
typedef struct {
sycl::half dm[2]; // super-block scales/mins
uint8_t scales[2]; // 4-bit block scales/mins
uint8_t qs[QK_K/2]; // 4--bit quants
} block_q4_K;
static_assert(sizeof(block_q4_K) == sizeof(sycl::half2) + QK_K/2 + 2, "wrong q4_K block size/padding");
#else
typedef struct dpct_type_block_q4_K {
sycl::half2 dm; // super-block scale for quantized scales/mins
uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits
uint8_t qs[QK_K/2]; // 4--bit quants
} block_q4_K;
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding");
#endif
#define QR5_K 2
#define QI5_K (QK_K / (4*QR5_K))
#ifdef GGML_QKK_64
typedef struct {
sycl::half d; // super-block scale
int8_t scales[QK_K/16]; // block scales
uint8_t qh[QK_K/8]; // quants, high bit
uint8_t qs[QK_K/2]; // quants, low 4 bits
} block_q5_K;
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
#else
typedef struct dpct_type_block_q5_K {
sycl::half2 dm; // super-block scale for quantized scales/mins
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
uint8_t qh[QK_K/8]; // quants, high bit
uint8_t qs[QK_K/2]; // quants, low 4 bits
} block_q5_K;
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
#endif
#define QR6_K 2
#define QI6_K (QK_K / (4*QR6_K))
typedef struct dpct_type_block_q6_K {
uint8_t ql[QK_K/2]; // quants, lower 4 bits
uint8_t qh[QK_K/4]; // quants, upper 2 bits
int8_t scales[QK_K/16]; // scales
sycl::half d; // delta
} block_q6_K;
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding");
#define QR2_XXS 8
#define QI2_XXS (QK_K / (4*QR2_XXS))
typedef struct dpct_type_block_iq2_xxs {
sycl::half d;
uint16_t qs[QK_K/8];
} block_iq2_xxs;
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
#define QR2_XS 8
#define QI2_XS (QK_K / (4*QR2_XS))
typedef struct dpct_type_block_iq2_xs {
sycl::half d;
uint16_t qs[QK_K/8];
uint8_t scales[QK_K/32];
} block_iq2_xs;
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
#define QR3_XXS 8
#define QI3_XXS (QK_K / (4*QR3_XXS))
typedef struct dpct_type_block_iq3_xxs {
sycl::half d;
uint8_t qs[3*(QK_K/8)];
} block_iq3_xxs;
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
#define WARP_SIZE 32 #define WARP_SIZE 32
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses #define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
@ -4833,6 +4673,65 @@ static void dequantize_block_iq3_xxs(const void * __restrict__ vx, dst_t * __res
} }
template<typename dst_t>
static void dequantize_block_iq3_s(const void * __restrict__ vx, dst_t * __restrict__ yy,
const sycl::nd_item<3> &item_ct1,
const uint32_t *iq3s_grid,
const uint8_t *ksigns_iq2xs,
const uint8_t *kmask_iq2xs) {
const int i = item_ct1.get_group(2);
const block_iq3_s * x = (const block_iq3_s *) vx;
const int tid = item_ct1.get_local_id(2);
#if QK_K == 256
const int il = tid/8; // 0...3
const int ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const uint8_t * qs = x[i].qs + 8*ib;
const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + qs[2*il+0]);
const uint8_t * grid2 = (const uint8_t *)(iq3s_grid + qs[2*il+1]);
const float d = (float)x[i].d * (1 + 2*((x[i].scales[ib/2] >> 4*(ib%2)) & 0xf));
const uint8_t signs = x[i].signs[4*ib + il];
for (int j = 0; j < 4; ++j) {
y[j+0] = d * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
y[j+4] = d * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
}
#else
assert(false);
#endif
}
template<typename dst_t>
static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restrict__ yy,
const sycl::nd_item<3> &item_ct1,
const uint32_t *iq1s_grid,
const uint8_t *ksigns_iq2xs,
const uint8_t *kmask_iq2xs) {
const int i = item_ct1.get_group(2);
const block_iq1_s * x = (const block_iq1_s *) vx;
const int tid = item_ct1.get_local_id(2);
#if QK_K == 256
const int il = tid/8; // 0...3
const int ib = tid%8; // 0...7
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
const uint8_t * qs = x[i].qs + 8*ib;
const uint8_t * grid1 = (const uint8_t *)(iq1s_grid + qs[2*il+0]);
const uint8_t * grid2 = (const uint8_t *)(iq1s_grid + qs[2*il+1]);
const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 0xf) + 1);
const uint8_t signs = ksigns_iq2xs[(x[i].qh[ib] >> 3*il) & 7];
for (int j = 0; j < 4; ++j) {
y[j+0] = d * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
y[j+4] = d * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
}
#else
assert(false);
#endif
}
/* /*
DPCT1110:4: The total declared local variable size in device function DPCT1110:4: The total declared local variable size in device function
dequantize_mul_mat_vec_q2_k exceeds 128 bytes and may cause high register dequantize_mul_mat_vec_q2_k exceeds 128 bytes and may cause high register
@ -7679,6 +7578,75 @@ vec_dot_iq3_xxs_q8_1(const void *__restrict__ vbq,
#endif #endif
} }
static __dpct_inline__ float
vec_dot_iq3_s_q8_1(const void *__restrict__ vbq,
const block_q8_1 *__restrict__ bq8_1, const int &iqs,
const uint32_t *iq3s_grid, const uint64_t *ksigns64) {
#if DPCT_COMPATIBILITY_TEMP >= \
MIN_CC_DP4A // lowest compute capability for integer intrinsics
#if QK_K == 256
const block_iq3_s * bq2 = (const block_iq3_s *) vbq;
const int ib32 = iqs;
const uint8_t * qs = bq2->qs + 8*ib32;
const int8_t * q8 = bq8_1[ib32].qs;
int sumi = 0;
for (int l = 0; l < 4; ++l) {
const uint32_t * grid1 = iq3s_grid + (qs[2*l+0] | ((bq2->qh[ib32] << (8 - 2*l)) & 256));
const uint32_t * grid2 = iq3s_grid + (qs[2*l+1] | ((bq2->qh[ib32] << (7 - 2*l)) & 256));
uint32_t signs0 = dpct::vectorized_binary<sycl::uchar4>(
((bq2->signs[4*ib32+l] & 0xf) * 0x01010101) & 0x08040201, 0x08040201, std::equal_to<>());
uint32_t signs1 = dpct::vectorized_binary<sycl::uchar4>(
((bq2->signs[4*ib32+l] >> 4) * 0x01010101) & 0x08040201, 0x08040201, std::equal_to<>());
const int grid_l = dpct::vectorized_binary<sycl::uchar4>(
grid1[0] ^ signs0, signs0, std::minus<>());
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
grid2[0] ^ signs1, signs1, std::minus<>());
sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi);
sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi);
q8 += 8;
}
const float d = (float)bq2->d * (1 + 2*((bq2->scales[ib32/2] >> 4*(ib32%2)) & 0xf)) * bq8_1[ib32].ds[0];
return d * sumi;
#else
assert(false);
return 0.f;
#endif
#else
assert(false);
return 0.f;
#endif
}
static __dpct_inline__ float
vec_dot_iq1_s_q8_1(const void *__restrict__ vbq,
const block_q8_1 *__restrict__ bq8_1, const int &iqs,
const uint32_t *iq1s_grid, const uint64_t *ksigns64) {
#if QK_K == 256
const block_iq1_s * bq1 = (const block_iq1_s *) vbq;
const int ib32 = iqs;
const uint8_t * qs = bq1->qs + 4*ib32;
const int8_t * q8 = bq8_1[ib32].qs;
int sumi = 0;
for (int l = 0; l < 4; ++l) {
const uint32_t * grid = (const uint32_t *)(iq1s_grid + qs[l]);
const uint32_t * signs = (const uint32_t *)(ksigns64 + (qs[l] >> 8));
const int grid_l = dpct::vectorized_binary<sycl::uchar4>(
grid[0] ^ signs[0], signs[0], std::minus<>());
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
grid[1] ^ signs[1], signs[1], std::minus<>());
sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi);
sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi);
q8 += 8;
}
const float d = (float)bq1->d * bq8_1[ib32].ds[0] * 0.25f;
return d * sumi;
#else
assert(false);
return 0.f;
#endif
}
template <int qk, int qr, int qi, bool need_sum, typename block_q_t, int mmq_x, template <int qk, int qr, int qi, bool need_sum, typename block_q_t, int mmq_x,
int mmq_y, int nwarps, load_tiles_sycl_t load_tiles, int vdr, int mmq_y, int nwarps, load_tiles_sycl_t load_tiles, int vdr,
@ -8444,6 +8412,98 @@ static void mul_mat_vec_q_iq3_xxs_q8_1(const void * __restrict__ vx, const void
} }
} }
template <int qk, int qi, typename block_q_t, int vdr>
static void mul_mat_vec_q_iq3_s_q8_1(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
const sycl::nd_item<3> &item_ct1,
const uint32_t *iq3s_grid_ptr, const uint64_t *ksigns64_ptr ) {
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
item_ct1.get_local_id(1);
if (row >= nrows) {
return;
}
const int blocks_per_row = ncols / qk;
const int blocks_per_warp = vdr * WARP_SIZE / qi;
// partial sum for each thread
float tmp = 0.0f;
const block_q_t * x = (const block_q_t *) vx;
const block_q8_1 * y = (const block_q8_1 *) vy;
for (int i = item_ct1.get_local_id(2) / (qi / vdr); i < blocks_per_row;
i += blocks_per_warp) {
const int ibx = row*blocks_per_row + i; // x block index
const int iby = i * (qk/QK8_1); // y block index that aligns with ibx
const int iqs =
vdr *
(item_ct1.get_local_id(2) %
(qi / vdr)); // x block quant index when casting the quants to int
tmp += vec_dot_iq3_s_q8_1(&x[ibx], &y[iby], iqs, iq3s_grid_ptr, ksigns64_ptr);
}
// sum up partial sums and write back result
#pragma unroll
for (int mask = 16; mask > 0; mask >>= 1) {
tmp +=
dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), tmp, mask);
}
if (item_ct1.get_local_id(2) == 0) {
dst[row] = tmp;
}
}
template <int qk, int qi, typename block_q_t, int vdr>
static void mul_mat_vec_q_iq1_s_q8_1(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
const sycl::nd_item<3> &item_ct1,
const uint32_t *iq1s_grid_ptr, const uint64_t *ksigns64_ptr ) {
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
item_ct1.get_local_id(1);
if (row >= nrows) {
return;
}
const int blocks_per_row = ncols / qk;
const int blocks_per_warp = vdr * WARP_SIZE / qi;
// partial sum for each thread
float tmp = 0.0f;
const block_q_t * x = (const block_q_t *) vx;
const block_q8_1 * y = (const block_q8_1 *) vy;
for (int i = item_ct1.get_local_id(2) / (qi / vdr); i < blocks_per_row;
i += blocks_per_warp) {
const int ibx = row*blocks_per_row + i; // x block index
const int iby = i * (qk/QK8_1); // y block index that aligns with ibx
const int iqs =
vdr *
(item_ct1.get_local_id(2) %
(qi / vdr)); // x block quant index when casting the quants to int
tmp += vec_dot_iq1_s_q8_1(&x[ibx], &y[iby], iqs, iq1s_grid_ptr, ksigns64_ptr);
}
// sum up partial sums and write back result
#pragma unroll
for (int mask = 16; mask > 0; mask >>= 1) {
tmp +=
dpct::permute_sub_group_by_xor(item_ct1.get_sub_group(), tmp, mask);
}
if (item_ct1.get_local_id(2) == 0) {
dst[row] = tmp;
}
}
template <int qk, int qr, dequantize_kernel_t dequantize_kernel> template <int qk, int qr, dequantize_kernel_t dequantize_kernel>
static void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat * __restrict__ y, float * __restrict__ dst, const int ncols, const int nrows, static void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat * __restrict__ y, float * __restrict__ dst, const int ncols, const int nrows,
const sycl::nd_item<3> &item_ct1) { const sycl::nd_item<3> &item_ct1) {
@ -10129,6 +10189,64 @@ static void dequantize_row_iq3_xxs_sycl(const void *vx, dst_t *y, const int k,
} }
} }
template <typename dst_t>
static void dequantize_row_iq3_s_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) {
const int nb = k / QK_K;
{
iq3s_grid.init(*stream);
ksigns_iq2xs.init(*stream);
kmask_iq2xs.init(*stream);
dpct::has_capability_or_fail(stream->get_device(),
{sycl::aspect::fp16});
stream->submit([&](sycl::handler &cgh) {
auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr();
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
sycl::range<3>(1, 1, 32),
sycl::range<3>(1, 1, 32)),
[=](sycl::nd_item<3> item_ct1) {
dequantize_block_iq3_s(
vx, y, item_ct1, iq3s_grid_ptr_ct1,
ksigns_iq2xs_ptr_ct1, kmask_iq2xs_ptr_ct1);
});
});
}
}
template <typename dst_t>
static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) {
const int nb = k / QK_K;
{
iq1s_grid_gpu.init(*stream);
ksigns_iq2xs.init(*stream);
kmask_iq2xs.init(*stream);
dpct::has_capability_or_fail(stream->get_device(),
{sycl::aspect::fp16});
stream->submit([&](sycl::handler &cgh) {
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
sycl::range<3>(1, 1, 32),
sycl::range<3>(1, 1, 32)),
[=](sycl::nd_item<3> item_ct1) {
dequantize_block_iq1_s(
vx, y, item_ct1, iq1s_grid_ptr_ct1,
ksigns_iq2xs_ptr_ct1, kmask_iq2xs_ptr_ct1);
});
});
}
}
template <typename src_t, typename dst_t> template <typename src_t, typename dst_t>
static void convert_unary_sycl(const void *__restrict__ vx, static void convert_unary_sycl(const void *__restrict__ vx,
dst_t *__restrict__ y, const int k, dst_t *__restrict__ y, const int k,
@ -10179,6 +10297,10 @@ static to_fp16_sycl_t ggml_get_to_fp16_sycl(ggml_type type) try {
return dequantize_row_iq2_xs_sycl; return dequantize_row_iq2_xs_sycl;
case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_XXS:
return dequantize_row_iq3_xxs_sycl; return dequantize_row_iq3_xxs_sycl;
case GGML_TYPE_IQ3_S:
return dequantize_row_iq3_s_sycl;
case GGML_TYPE_IQ1_S:
return dequantize_row_iq1_s_sycl;
case GGML_TYPE_F32: case GGML_TYPE_F32:
return convert_unary_sycl<float>; return convert_unary_sycl<float>;
default: default:
@ -10219,6 +10341,10 @@ static to_fp32_sycl_t ggml_get_to_fp32_sycl(ggml_type type) {
return dequantize_row_iq2_xs_sycl; return dequantize_row_iq2_xs_sycl;
case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_XXS:
return dequantize_row_iq3_xxs_sycl; return dequantize_row_iq3_xxs_sycl;
case GGML_TYPE_IQ3_S:
return dequantize_row_iq3_s_sycl;
case GGML_TYPE_IQ1_S:
return dequantize_row_iq1_s_sycl;
case GGML_TYPE_F16: case GGML_TYPE_F16:
return convert_unary_sycl<sycl::half>; return convert_unary_sycl<sycl::half>;
default: default:
@ -10808,6 +10934,61 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
} }
} }
static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
float *dst, const int ncols,
const int nrows,
dpct::queue_ptr stream) {
GGML_ASSERT(ncols % QK_K == 0);
const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y;
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3s_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1)
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q_iq3_s_q8_1<QK_K, QI3_XS, block_iq3_s, 1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3s_grid_ptr_ct1, ksigns64_ptr_ct1);
});
});
}
}
static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
float *dst, const int ncols,
const int nrows,
dpct::queue_ptr stream) {
GGML_ASSERT(ncols % QK_K == 0);
const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y;
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq1s_grid_gpu.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
[=](sycl::nd_item<3> item_ct1)
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q_iq1_s_q8_1<QK_K, QI1_S, block_iq1_s, 1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq1s_grid_ptr_ct1, ksigns64_ptr_ct1);
});
});
}
}
static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy,
float *dst, const int ncols_x, float *dst, const int ncols_x,
@ -13556,8 +13737,11 @@ static int64_t get_row_rounding(ggml_type type, const std::array<float, GGML_SYC
case GGML_TYPE_Q5_K: case GGML_TYPE_Q5_K:
case GGML_TYPE_IQ2_XXS: case GGML_TYPE_IQ2_XXS:
case GGML_TYPE_IQ2_XS: case GGML_TYPE_IQ2_XS:
case GGML_TYPE_IQ1_S:
case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_XXS:
return max_compute_capability >= VER_GEN9 ? 128 : 64; return max_compute_capability >= VER_GEN9 ? 128 : 64;
case GGML_TYPE_IQ3_S:
return max_compute_capability >= VER_GEN9 ? 128 : 64;
case GGML_TYPE_Q6_K: case GGML_TYPE_Q6_K:
return 64; return 64;
default: default:
@ -13618,6 +13802,12 @@ inline void ggml_sycl_op_mul_mat_vec_q(
case GGML_TYPE_IQ3_XXS: case GGML_TYPE_IQ3_XXS:
mul_mat_vec_iq3_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream); mul_mat_vec_iq3_xxs_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break; break;
case GGML_TYPE_IQ3_S:
mul_mat_vec_iq3_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break;
case GGML_TYPE_IQ1_S:
mul_mat_vec_iq1_s_q8_1_sycl(src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stream);
break;
default: default:
GGML_ASSERT(false); GGML_ASSERT(false);
break; break;
@ -16963,9 +17153,8 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
return false; return false;
} }
ggml_type a_type = a->type; ggml_type a_type = a->type;
if (a_type == GGML_TYPE_IQ2_XXS || a_type == GGML_TYPE_IQ2_XS || a_type == GGML_TYPE_IQ3_XXS || if (a_type == GGML_TYPE_IQ4_NL || a_type == GGML_TYPE_IQ2_S ||
a_type == GGML_TYPE_IQ1_S || a_type == GGML_TYPE_IQ4_NL || a_type == GGML_TYPE_IQ3_S || a_type == GGML_TYPE_IQ4_XS) {
a_type == GGML_TYPE_IQ2_S || a_type == GGML_TYPE_IQ4_XS) {
return false; return false;
} }
return true; return true;
@ -17060,13 +17249,18 @@ static ggml_backend_i ggml_backend_sycl_interface = {
/* .get_default_buffer_type = */ ggml_backend_sycl_get_default_buffer_type, /* .get_default_buffer_type = */ ggml_backend_sycl_get_default_buffer_type,
/* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async, /* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async,
/* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async, /* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async,
/* .cpy_tensor_async = */ ggml_backend_sycl_cpy_tensor_async, /* .cpy_tensor_async = */ NULL, //ggml_backend_sycl_cpy_tensor_async, // TODO: update for the new interface
/* .synchronize = */ ggml_backend_sycl_synchronize, /* .synchronize = */ ggml_backend_sycl_synchronize,
/* .graph_plan_create = */ NULL, /* .graph_plan_create = */ NULL,
/* .graph_plan_free = */ NULL, /* .graph_plan_free = */ NULL,
/* .graph_plan_compute = */ NULL, /* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_sycl_graph_compute, /* .graph_compute = */ ggml_backend_sycl_graph_compute,
/* .supports_op = */ ggml_backend_sycl_supports_op, /* .supports_op = */ ggml_backend_sycl_supports_op,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .event_synchronize = */ NULL,
}; };
static ggml_guid_t ggml_backend_sycl_guid() { static ggml_guid_t ggml_backend_sycl_guid() {

View file

@ -5693,6 +5693,11 @@ static ggml_backend_i ggml_backend_vk_interface = {
/* .graph_plan_compute = */ NULL, /* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_vk_graph_compute, /* .graph_compute = */ ggml_backend_vk_graph_compute,
/* .supports_op = */ ggml_backend_vk_supports_op, /* .supports_op = */ ggml_backend_vk_supports_op,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
/* .event_wait = */ NULL,
/* .event_synchronize = */ NULL,
}; };
static ggml_guid_t ggml_backend_vk_guid() { static ggml_guid_t ggml_backend_vk_guid() {

93
ggml.c
View file

@ -857,7 +857,7 @@ inline static float vaddvq_f32(float32x4_t v) {
#define GGML_F16x8 float16x8_t #define GGML_F16x8 float16x8_t
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f) #define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
#define GGML_F16x8_SET1(x) vdupq_n_f16(x) #define GGML_F16x8_SET1(x) vdupq_n_f16(x)
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x)) #define GGML_F16x8_LOAD(x) vld1q_f16((const ggml_fp16_internal_t *)(x))
#define GGML_F16x8_STORE vst1q_f16 #define GGML_F16x8_STORE vst1q_f16
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c) #define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
#define GGML_F16x8_ADD vaddq_f16 #define GGML_F16x8_ADD vaddq_f16
@ -900,7 +900,7 @@ inline static float vaddvq_f32(float32x4_t v) {
#define GGML_F32Cx4 float32x4_t #define GGML_F32Cx4 float32x4_t
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f) #define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x) #define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x))) #define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const ggml_fp16_internal_t *)(x)))
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y)) #define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c) #define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
#define GGML_F32Cx4_ADD vaddq_f32 #define GGML_F32Cx4_ADD vaddq_f32
@ -11560,8 +11560,6 @@ static void ggml_compute_forward_get_rows_q(
const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1]; const struct ggml_tensor * src1 = dst->src[1];
assert(params->ith == 0);
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return; return;
} }
@ -11569,7 +11567,7 @@ static void ggml_compute_forward_get_rows_q(
GGML_TENSOR_BINARY_OP_LOCALS GGML_TENSOR_BINARY_OP_LOCALS
const int64_t nc = ne00; const int64_t nc = ne00;
const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr); const int64_t nr = ggml_nelements(src1);
const enum ggml_type type = src0->type; const enum ggml_type type = src0->type;
ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
@ -11579,18 +11577,26 @@ static void ggml_compute_forward_get_rows_q(
assert(nb00 == ggml_type_size(type)); assert(nb00 == ggml_type_size(type));
assert(ggml_nrows(dst) == nr); assert(ggml_nrows(dst) == nr);
// TODO: multi-thread const int ith = params->ith;
for (int64_t i12 = 0; i12 < ne12; ++i12) { const int nth = params->nth;
for (int64_t i11 = 0; i11 < ne11; ++i11) {
for (int64_t i10 = 0; i10 < ne10; ++i10) { // rows per thread
const int dr = (nr + nth - 1)/nth;
// row range for this thread
const int ir0 = dr*ith;
const int ir1 = MIN(ir0 + dr, nr);
for (int64_t i = ir0; i < ir1; ++i) {
const int64_t i12 = i/(ne11*ne10);
const int64_t i11 = (i - i12*ne11*ne10)/ne10;
const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
dequantize_row_q( dequantize_row_q(
(const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03), (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc); (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
} }
}
}
} }
static void ggml_compute_forward_get_rows_f16( static void ggml_compute_forward_get_rows_f16(
@ -11600,8 +11606,6 @@ static void ggml_compute_forward_get_rows_f16(
const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1]; const struct ggml_tensor * src1 = dst->src[1];
assert(params->ith == 0);
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return; return;
} }
@ -11609,25 +11613,33 @@ static void ggml_compute_forward_get_rows_f16(
GGML_TENSOR_BINARY_OP_LOCALS GGML_TENSOR_BINARY_OP_LOCALS
const int64_t nc = ne00; const int64_t nc = ne00;
const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr); const int64_t nr = ggml_nelements(src1);
assert(ne0 == nc); assert(ne0 == nc);
assert(ne02 == ne11); assert(ne02 == ne11);
assert(nb00 == sizeof(ggml_fp16_t)); assert(nb00 == sizeof(ggml_fp16_t));
assert(ggml_nrows(dst) == nr); assert(ggml_nrows(dst) == nr);
// TODO: multi-thread const int ith = params->ith;
for (int64_t i12 = 0; i12 < ne12; ++i12) { const int nth = params->nth;
for (int64_t i11 = 0; i11 < ne11; ++i11) {
for (int64_t i10 = 0; i10 < ne10; ++i10) { // rows per thread
const int dr = (nr + nth - 1)/nth;
// row range for this thread
const int ir0 = dr*ith;
const int ir1 = MIN(ir0 + dr, nr);
for (int64_t i = ir0; i < ir1; ++i) {
const int64_t i12 = i/(ne11*ne10);
const int64_t i11 = (i - i12*ne11*ne10)/ne10;
const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
ggml_fp16_to_fp32_row( ggml_fp16_to_fp32_row(
(const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03), (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc); (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
} }
}
}
} }
static void ggml_compute_forward_get_rows_f32( static void ggml_compute_forward_get_rows_f32(
@ -11637,8 +11649,6 @@ static void ggml_compute_forward_get_rows_f32(
const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * src0 = dst->src[0];
const struct ggml_tensor * src1 = dst->src[1]; const struct ggml_tensor * src1 = dst->src[1];
assert(params->ith == 0);
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
return; return;
} }
@ -11646,25 +11656,33 @@ static void ggml_compute_forward_get_rows_f32(
GGML_TENSOR_BINARY_OP_LOCALS GGML_TENSOR_BINARY_OP_LOCALS
const int64_t nc = ne00; const int64_t nc = ne00;
const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr); const int64_t nr = ggml_nelements(src1);
assert(ne0 == nc); assert(ne0 == nc);
assert(ne02 == ne11); assert(ne02 == ne11);
assert(nb00 == sizeof(float)); assert(nb00 == sizeof(float));
assert(ggml_nrows(dst) == nr); assert(ggml_nrows(dst) == nr);
// TODO: multi-thread const int ith = params->ith;
for (int64_t i12 = 0; i12 < ne12; ++i12) { const int nth = params->nth;
for (int64_t i11 = 0; i11 < ne11; ++i11) {
for (int64_t i10 = 0; i10 < ne10; ++i10) { // rows per thread
const int dr = (nr + nth - 1)/nth;
// row range for this thread
const int ir0 = dr*ith;
const int ir1 = MIN(ir0 + dr, nr);
for (int64_t i = ir0; i < ir1; ++i) {
const int64_t i12 = i/(ne11*ne10);
const int64_t i11 = (i - i12*ne11*ne10)/ne10;
const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
ggml_vec_cpy_f32(nc, ggml_vec_cpy_f32(nc,
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3),
(float *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03)); (float *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03));
} }
}
}
} }
static void ggml_compute_forward_get_rows( static void ggml_compute_forward_get_rows(
@ -17796,7 +17814,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
node->perf_time_us += time_us_cur; node->perf_time_us += time_us_cur;
} }
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_threads) {
int n_tasks = 0; int n_tasks = 0;
switch (node->op) { switch (node->op) {
@ -17877,6 +17895,12 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
{ {
n_tasks = n_threads; n_tasks = n_threads;
} break; } break;
case GGML_OP_GET_ROWS:
{
// FIXME: the cost of launching additional threads decreases performance with GPU offloading
//n_tasks = MIN(n_threads, ggml_nelements(node->src[1]));
n_tasks = MIN(n_cur_threads, ggml_nelements(node->src[1]));
} break;
case GGML_OP_SCALE: case GGML_OP_SCALE:
case GGML_OP_SET: case GGML_OP_SET:
case GGML_OP_CONT: case GGML_OP_CONT:
@ -17884,7 +17908,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
case GGML_OP_VIEW: case GGML_OP_VIEW:
case GGML_OP_PERMUTE: case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE: case GGML_OP_TRANSPOSE:
case GGML_OP_GET_ROWS:
case GGML_OP_GET_ROWS_BACK: case GGML_OP_GET_ROWS_BACK:
case GGML_OP_DIAG: case GGML_OP_DIAG:
{ {
@ -18102,7 +18125,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
/* FINALIZE */ /* FINALIZE */
struct ggml_tensor * node = cgraph->nodes[node_n]; struct ggml_tensor * node = cgraph->nodes[node_n];
if (GGML_OP_HAS_FINALIZE[node->op]) { if (GGML_OP_HAS_FINALIZE[node->op]) {
params.nth = ggml_get_n_tasks(node, n_threads); params.nth = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
ggml_compute_forward(&params, node); ggml_compute_forward(&params, node);
} }
ggml_graph_compute_perf_stats_node(node, state->shared); ggml_graph_compute_perf_stats_node(node, state->shared);
@ -18112,7 +18135,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
while (++node_n < cgraph->n_nodes) { while (++node_n < cgraph->n_nodes) {
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes); GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
struct ggml_tensor * node = cgraph->nodes[node_n]; struct ggml_tensor * node = cgraph->nodes[node_n];
const int n_tasks = ggml_get_n_tasks(node, n_threads); const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
state->shared->perf_node_start_cycles = ggml_perf_cycles(); state->shared->perf_node_start_cycles = ggml_perf_cycles();
state->shared->perf_node_start_time_us = ggml_perf_time_us(); state->shared->perf_node_start_time_us = ggml_perf_time_us();
@ -18160,7 +18183,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
/* INIT & COMPUTE */ /* INIT & COMPUTE */
struct ggml_tensor * node = cgraph->nodes[node_n]; struct ggml_tensor * node = cgraph->nodes[node_n];
const int n_tasks = ggml_get_n_tasks(node, n_threads); const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
struct ggml_compute_params params = { struct ggml_compute_params params = {
/*.type =*/ GGML_TASK_TYPE_INIT, /*.type =*/ GGML_TASK_TYPE_INIT,
@ -18225,7 +18248,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
for (int i = 0; i < cgraph->n_nodes; i++) { for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i]; struct ggml_tensor * node = cgraph->nodes[i];
const int n_tasks = ggml_get_n_tasks(node, n_threads); const int n_tasks = ggml_get_n_tasks(node, n_threads, 1);
max_tasks = MAX(max_tasks, n_tasks); max_tasks = MAX(max_tasks, n_tasks);

10
ggml.h
View file

@ -337,18 +337,18 @@ extern "C" {
struct ggml_object; struct ggml_object;
struct ggml_context; struct ggml_context;
// NOTE: always add types at the end of the enum to keep backward compatibility
enum ggml_type { enum ggml_type {
GGML_TYPE_F32 = 0, GGML_TYPE_F32 = 0,
GGML_TYPE_F16 = 1, GGML_TYPE_F16 = 1,
GGML_TYPE_Q4_0 = 2, GGML_TYPE_Q4_0 = 2,
GGML_TYPE_Q4_1 = 3, GGML_TYPE_Q4_1 = 3,
// GGML_TYPE_Q4_2 = 4, support has been removed // GGML_TYPE_Q4_2 = 4, support has been removed
// GGML_TYPE_Q4_3 (5) support has been removed // GGML_TYPE_Q4_3 = 5, support has been removed
GGML_TYPE_Q5_0 = 6, GGML_TYPE_Q5_0 = 6,
GGML_TYPE_Q5_1 = 7, GGML_TYPE_Q5_1 = 7,
GGML_TYPE_Q8_0 = 8, GGML_TYPE_Q8_0 = 8,
GGML_TYPE_Q8_1 = 9, GGML_TYPE_Q8_1 = 9,
// k-quantizations
GGML_TYPE_Q2_K = 10, GGML_TYPE_Q2_K = 10,
GGML_TYPE_Q3_K = 11, GGML_TYPE_Q3_K = 11,
GGML_TYPE_Q4_K = 12, GGML_TYPE_Q4_K = 12,
@ -363,9 +363,9 @@ extern "C" {
GGML_TYPE_IQ3_S = 21, GGML_TYPE_IQ3_S = 21,
GGML_TYPE_IQ2_S = 22, GGML_TYPE_IQ2_S = 22,
GGML_TYPE_IQ4_XS = 23, GGML_TYPE_IQ4_XS = 23,
GGML_TYPE_I8, GGML_TYPE_I8 = 24,
GGML_TYPE_I16, GGML_TYPE_I16 = 25,
GGML_TYPE_I32, GGML_TYPE_I32 = 26,
GGML_TYPE_COUNT, GGML_TYPE_COUNT,
}; };

View file

@ -32,6 +32,7 @@ class Keys:
FILE_TYPE = "general.file_type" FILE_TYPE = "general.file_type"
class LLM: class LLM:
VOCAB_SIZE = "{arch}.vocab_size"
CONTEXT_LENGTH = "{arch}.context_length" CONTEXT_LENGTH = "{arch}.context_length"
EMBEDDING_LENGTH = "{arch}.embedding_length" EMBEDDING_LENGTH = "{arch}.embedding_length"
BLOCK_COUNT = "{arch}.block_count" BLOCK_COUNT = "{arch}.block_count"
@ -661,6 +662,9 @@ class GGMLQuantizationType(IntEnum):
IQ3_S = 21 IQ3_S = 21
IQ2_S = 22 IQ2_S = 22
IQ4_XS = 23 IQ4_XS = 23
I8 = 24
I16 = 25
I32 = 26
class GGUFEndian(IntEnum): class GGUFEndian(IntEnum):
@ -727,6 +731,9 @@ GGML_QUANT_SIZES = {
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4), GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16), GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64), GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
GGMLQuantizationType.I8: (1, 1),
GGMLQuantizationType.I16: (1, 2),
GGMLQuantizationType.I32: (1, 4),
} }
@ -746,6 +753,7 @@ KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO
KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
# LLM # LLM
KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE
KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH
KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH
KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT

View file

@ -248,6 +248,15 @@ class GGUFReader:
elif ggml_type == GGMLQuantizationType.F16: elif ggml_type == GGMLQuantizationType.F16:
item_count = n_elems item_count = n_elems
item_type = np.float16 item_type = np.float16
elif ggml_type == GGMLQuantizationType.I8:
item_count = n_elems
item_type = np.int8
elif ggml_type == GGMLQuantizationType.I16:
item_count = n_elems
item_type = np.int16
elif ggml_type == GGMLQuantizationType.I32:
item_count = n_elems
item_type = np.int32
else: else:
item_count = n_bytes item_count = n_bytes
item_type = np.uint8 item_type = np.uint8

View file

@ -196,9 +196,6 @@ class GGUFWriter:
if self.state is not WriterState.EMPTY: if self.state is not WriterState.EMPTY:
raise ValueError(f'Expected output file to be empty, got {self.state}') raise ValueError(f'Expected output file to be empty, got {self.state}')
if raw_dtype is None and tensor_dtype not in (np.float32, np.float16):
raise ValueError("Only F32 and F16 tensors are supported for now")
encoded_name = name.encode("utf8") encoded_name = name.encode("utf8")
self.ti_data += self._pack("Q", len(encoded_name)) self.ti_data += self._pack("Q", len(encoded_name))
self.ti_data += encoded_name self.ti_data += encoded_name
@ -207,7 +204,18 @@ class GGUFWriter:
for i in range(n_dims): for i in range(n_dims):
self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i]) self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
if raw_dtype is None: if raw_dtype is None:
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16 if tensor_dtype == np.float32:
dtype = GGMLQuantizationType.F32
elif tensor_dtype == np.float16:
dtype = GGMLQuantizationType.F16
elif tensor_dtype == np.int8:
dtype = GGMLQuantizationType.I8
elif tensor_dtype == np.int16:
dtype = GGMLQuantizationType.I16
elif tensor_dtype == np.int32:
dtype = GGMLQuantizationType.I32
else:
raise ValueError("Only F32, F16, I8, I16, I32 tensors are supported for now")
else: else:
dtype = raw_dtype dtype = raw_dtype
self.ti_data += self._pack("I", dtype) self.ti_data += self._pack("I", dtype)
@ -313,6 +321,9 @@ class GGUFWriter:
self.data_alignment = alignment self.data_alignment = alignment
self.add_uint32(Keys.General.ALIGNMENT, alignment) self.add_uint32(Keys.General.ALIGNMENT, alignment)
def add_vocab_size(self, size: int) -> None:
self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size)
def add_context_length(self, length: int) -> None: def add_context_length(self, length: int) -> None:
self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length) self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length)

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "gguf" name = "gguf"
version = "0.7.0" version = "0.8.0"
description = "Read and write ML models in GGUF for GGML" description = "Read and write ML models in GGUF for GGML"
authors = ["GGML <ggml@ggml.ai>"] authors = ["GGML <ggml@ggml.ai>"]
packages = [ packages = [

1114
llama.cpp

File diff suppressed because it is too large Load diff

36
llama.h
View file

@ -59,9 +59,10 @@ extern "C" {
typedef int32_t llama_seq_id; typedef int32_t llama_seq_id;
enum llama_vocab_type { enum llama_vocab_type {
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding LLAMA_VOCAB_TYPE_SPM = 1, // SentencePiece
LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece LLAMA_VOCAB_TYPE_BPE = 2, // Byte Pair Encoding
LLAMA_VOCAB_TYPE_WPM = 3, // WordPiece
}; };
// note: these values should be synchronized with ggml_rope // note: these values should be synchronized with ggml_rope
@ -234,8 +235,9 @@ extern "C" {
struct llama_context_params { struct llama_context_params {
uint32_t seed; // RNG seed, -1 for random uint32_t seed; // RNG seed, -1 for random
uint32_t n_ctx; // text context, 0 = from model uint32_t n_ctx; // text context, 0 = from model
uint32_t n_batch; // prompt processing maximum batch size uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode
uint32_t n_parallel; // number of parallel sequences (i.e. distinct states for recurrent models) uint32_t n_ubatch; // physical maximum batch size
uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models)
uint32_t n_threads; // number of threads to use for generation uint32_t n_threads; // number of threads to use for generation
uint32_t n_threads_batch; // number of threads to use for batch processing uint32_t n_threads_batch; // number of threads to use for batch processing
@ -278,7 +280,7 @@ extern "C" {
bool allow_requantize; // allow quantizing non-f32/f16 tensors bool allow_requantize; // allow quantizing non-f32/f16 tensors
bool quantize_output_tensor; // quantize output.weight bool quantize_output_tensor; // quantize output.weight
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
bool pure; // disable k-quant mixtures and quantize all tensors to the same type bool pure; // quantize all tensors to the default type
void * imatrix; // pointer to importance matrix data void * imatrix; // pointer to importance matrix data
} llama_model_quantize_params; } llama_model_quantize_params;
@ -377,7 +379,8 @@ extern "C" {
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
LLAMA_API uint32_t llama_n_max_seq (const struct llama_context * ctx); LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx);
LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx);
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model); LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model); LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
@ -456,7 +459,7 @@ extern "C" {
// Maximum number of sequences that can exist in a cell. It's not an error // Maximum number of sequences that can exist in a cell. It's not an error
// if there are more sequences in a cell than this value, however they will // if there are more sequences in a cell than this value, however they will
// not be visible in the view cells_sequences. // not be visible in the view cells_sequences.
int32_t n_max_seq; int32_t n_seq_max;
// Number of tokens in the cache. For example, if there are two populated // Number of tokens in the cache. For example, if there are two populated
// cells, the first with 1 sequence id in it and the second with 2 sequence // cells, the first with 1 sequence id in it and the second with 2 sequence
@ -476,12 +479,12 @@ extern "C" {
// Information for an individual cell. // Information for an individual cell.
struct llama_kv_cache_view_cell * cells; struct llama_kv_cache_view_cell * cells;
// The sequences for each cell. There will be n_max_seq items per cell. // The sequences for each cell. There will be n_seq_max items per cell.
llama_seq_id * cells_sequences; llama_seq_id * cells_sequences;
}; };
// Create an empty KV cache view. (use only for debugging purposes) // Create an empty KV cache view. (use only for debugging purposes)
LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq); LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max);
// Free a KV cache view. (use only for debugging purposes) // Free a KV cache view. (use only for debugging purposes)
LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view); LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
@ -643,9 +646,18 @@ extern "C" {
// n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens) // n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch); LLAMA_API void llama_set_n_threads(struct llama_context * ctx, uint32_t n_threads, uint32_t n_threads_batch);
// Set whether to use causal attention or not
// If set to true, the model will only attend to the past tokens
LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
// Set abort callback // Set abort callback
LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data); LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);
// Wait until all computations are finished
// This is automatically done when using one of the functions below to obtain the computation results
// and is not necessary to call it explicitly in most cases
LLAMA_API void llama_synchronize(struct llama_context * ctx);
// Token logits obtained from the last call to llama_decode() // Token logits obtained from the last call to llama_decode()
// The logits for the last token are stored in the last row // The logits for the last token are stored in the last row
// Logits for which llama_batch.logits[i] == 0 are undefined // Logits for which llama_batch.logits[i] == 0 are undefined
@ -704,7 +716,7 @@ extern "C" {
/// @details Convert the provided text into tokens. /// @details Convert the provided text into tokens.
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens. /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
/// @return Returns the number of tokens on success, no more than n_max_tokens /// @return Returns the number of tokens on success, no more than n_tokens_max
/// @return Returns a negative number on failure - the number of tokens that would have been returned /// @return Returns a negative number on failure - the number of tokens that would have been returned
/// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. /// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
/// Does not insert a leading space. /// Does not insert a leading space.
@ -713,7 +725,7 @@ extern "C" {
const char * text, const char * text,
int32_t text_len, int32_t text_len,
llama_token * tokens, llama_token * tokens,
int32_t n_max_tokens, int32_t n_tokens_max,
bool add_bos, bool add_bos,
bool special); bool special);

View file

@ -1 +1 @@
8695910a39102609073d0e099aa7c97d6bcb3bf9 43a6d4af1971ee2912ff7bc2404011ff327b6a60

View file

@ -2222,8 +2222,8 @@ static void usage(char ** argv) {
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
test_mode mode = MODE_TEST; test_mode mode = MODE_TEST;
const char * op_name = NULL; const char * op_name_filter = NULL;
const char * backend = NULL; const char * backend_filter = NULL;
for (int i = 1; i < argc; i++) { for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "test") == 0) { if (strcmp(argv[i], "test") == 0) {
@ -2232,14 +2232,14 @@ int main(int argc, char ** argv) {
mode = MODE_PERF; mode = MODE_PERF;
} else if (strcmp(argv[i], "-o") == 0) { } else if (strcmp(argv[i], "-o") == 0) {
if (i + 1 < argc) { if (i + 1 < argc) {
op_name = argv[++i]; op_name_filter = argv[++i];
} else { } else {
usage(argv); usage(argv);
return 1; return 1;
} }
} else if (strcmp(argv[i], "-b") == 0) { } else if (strcmp(argv[i], "-b") == 0) {
if (i + 1 < argc) { if (i + 1 < argc) {
backend = argv[++i]; backend_filter = argv[++i];
} else { } else {
usage(argv); usage(argv);
return 1; return 1;
@ -2258,7 +2258,7 @@ int main(int argc, char ** argv) {
for (size_t i = 0; i < ggml_backend_reg_get_count(); i++) { for (size_t i = 0; i < ggml_backend_reg_get_count(); i++) {
printf("Backend %zu/%zu (%s)\n", i + 1, ggml_backend_reg_get_count(), ggml_backend_reg_get_name(i)); printf("Backend %zu/%zu (%s)\n", i + 1, ggml_backend_reg_get_count(), ggml_backend_reg_get_name(i));
if (backend != NULL && strcmp(backend, ggml_backend_reg_get_name(i)) != 0) { if (backend_filter != NULL && strcmp(backend_filter, ggml_backend_reg_get_name(i)) != 0) {
printf(" Skipping\n"); printf(" Skipping\n");
n_ok++; n_ok++;
continue; continue;
@ -2266,9 +2266,17 @@ int main(int argc, char ** argv) {
ggml_backend_t backend = ggml_backend_reg_init_backend(i, NULL); ggml_backend_t backend = ggml_backend_reg_init_backend(i, NULL);
GGML_ASSERT(backend != NULL); GGML_ASSERT(backend != NULL);
if (backend_filter == NULL && ggml_backend_is_cpu(backend)) {
printf(" Skipping CPU backend\n");
ggml_backend_free(backend);
n_ok++;
continue;
}
printf(" Backend name: %s\n", ggml_backend_name(backend)); printf(" Backend name: %s\n", ggml_backend_name(backend));
bool ok = test_backend(backend, mode, op_name); bool ok = test_backend(backend, mode, op_name_filter);
printf(" Backend %s: ", ggml_backend_name(backend)); printf(" Backend %s: ", ggml_backend_name(backend));
if (ok) { if (ok) {

View file

@ -64,7 +64,7 @@ int main(int argc, char **argv) {
for (int i = 0; i < n_vocab; ++i) { for (int i = 0; i < n_vocab; ++i) {
std::string str = llama_detokenize_bpe(ctx, std::vector<int>(1, i)); std::string str = llama_detokenize_bpe(ctx, std::vector<int>(1, i));
try { try {
auto cps = codepoints_from_utf8(str); auto cps = unicode_cpts_from_utf8(str);
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false); std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
std::string check = llama_detokenize_bpe(ctx, tokens); std::string check = llama_detokenize_bpe(ctx, tokens);
if (check != str) { if (check != str) {
@ -97,7 +97,7 @@ int main(int argc, char **argv) {
continue; continue;
} }
std::string str = codepoint_to_utf8(cp); std::string str = unicode_cpt_to_utf8(cp);
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false); std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
std::string check = llama_detokenize_bpe(ctx, tokens); std::string check = llama_detokenize_bpe(ctx, tokens);
if (cp != 9601 && str != check) { if (cp != 9601 && str != check) {

View file

@ -85,7 +85,7 @@ int main(int argc, char **argv) {
continue; continue;
} }
std::string str = codepoint_to_utf8(cp); std::string str = unicode_cpt_to_utf8(cp);
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false); std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
std::string check = llama_detokenize_spm(ctx, tokens); std::string check = llama_detokenize_spm(ctx, tokens);
if (cp != 9601 && str != check) { if (cp != 9601 && str != check) {

1672
unicode.cpp Normal file

File diff suppressed because it is too large Load diff

776
unicode.h
View file

@ -1,666 +1,9 @@
#pragma once #pragma once
#include <cassert> #include <cstdint>
#include <map>
#include <stdexcept>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
{0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},
{0xCE6, 0xCEF}, {0xD66, 0xD6F}, {0xDE6, 0xDEF}, {0xE50, 0xE59}, {0xED0, 0xED9}, {0xF20, 0xF29}, {0x1040, 0x1049}, {0x1090, 0x1099}, {0x1369, 0x1371}, {0x17E0, 0x17E9}, {0x1810, 0x1819}, {0x1946, 0x194F},
{0x19D0, 0x19DA}, {0x1A80, 0x1A89}, {0x1A90, 0x1A99}, {0x1B50, 0x1B59}, {0x1BB0, 0x1BB9}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x2070, 0x2070}, {0x2074, 0x2079}, {0x2080, 0x2089}, {0x2460, 0x2468},
{0x2474, 0x247C}, {0x2488, 0x2490}, {0x24EA, 0x24EA}, {0x24F5, 0x24FD}, {0x24FF, 0x24FF}, {0x2776, 0x277E}, {0x2780, 0x2788}, {0x278A, 0x2792}, {0xA620, 0xA629}, {0xA8D0, 0xA8D9}, {0xA900, 0xA909},
{0xA9D0, 0xA9D9}, {0xA9F0, 0xA9F9}, {0xAA50, 0xAA59}, {0xABF0, 0xABF9}, {0xFF10, 0xFF19}, {0x104A0, 0x104A9}, {0x10A40, 0x10A43}, {0x10D30, 0x10D39}, {0x10E60, 0x10E68}, {0x11052, 0x1105A},
{0x11066, 0x1106F}, {0x110F0, 0x110F9}, {0x11136, 0x1113F}, {0x111D0, 0x111D9}, {0x112F0, 0x112F9}, {0x11450, 0x11459}, {0x114D0, 0x114D9}, {0x11650, 0x11659}, {0x116C0, 0x116C9}, {0x11730, 0x11739},
{0x118E0, 0x118E9}, {0x11950, 0x11959}, {0x11C50, 0x11C59}, {0x11D50, 0x11D59}, {0x11DA0, 0x11DA9}, {0x16A60, 0x16A69}, {0x16B50, 0x16B59}, {0x1D7CE, 0x1D7FF}, {0x1E140, 0x1E149}, {0x1E2F0, 0x1E2F9},
{0x1E950, 0x1E959}, {0x1F100, 0x1F10A}, {0x1FBF0, 0x1FBF9},
};
static const std::vector<std::pair<uint32_t, uint32_t>> letter_ranges = {
{0x41, 0x5A}, {0x61, 0x7A}, {0xAA, 0xAA}, {0xB5, 0xB5}, {0xBA, 0xBA}, {0xC0, 0xD6}, {0xD8, 0xF6}, {0xF8, 0x2C1}, {0x2C6, 0x2D1}, {0x2E0, 0x2E4}, {0x2EC, 0x2EC}, {0x2EE, 0x2EE}, {0x370, 0x374},
{0x376, 0x377}, {0x37A, 0x37D}, {0x37F, 0x37F}, {0x386, 0x386}, {0x388, 0x38A}, {0x38C, 0x38C}, {0x38E, 0x3A1}, {0x3A3, 0x3F5}, {0x3F7, 0x481}, {0x48A, 0x52F}, {0x531, 0x556}, {0x559, 0x559},
{0x560, 0x588}, {0x5D0, 0x5EA}, {0x5EF, 0x5F2}, {0x620, 0x64A}, {0x66E, 0x66F}, {0x671, 0x6D3}, {0x6D5, 0x6D5}, {0x6E5, 0x6E6}, {0x6EE, 0x6EF}, {0x6FA, 0x6FC}, {0x6FF, 0x6FF}, {0x710, 0x710},
{0x712, 0x72F}, {0x74D, 0x7A5}, {0x7B1, 0x7B1}, {0x7CA, 0x7EA}, {0x7F4, 0x7F5}, {0x7FA, 0x7FA}, {0x800, 0x815}, {0x81A, 0x81A}, {0x824, 0x824}, {0x828, 0x828}, {0x840, 0x858}, {0x860, 0x86A},
{0x8A0, 0x8B4}, {0x8B6, 0x8C7}, {0x904, 0x939}, {0x93D, 0x93D}, {0x950, 0x950}, {0x958, 0x961}, {0x971, 0x980}, {0x985, 0x98C}, {0x98F, 0x990}, {0x993, 0x9A8}, {0x9AA, 0x9B0}, {0x9B2, 0x9B2},
{0x9B6, 0x9B9}, {0x9BD, 0x9BD}, {0x9CE, 0x9CE}, {0x9DC, 0x9DD}, {0x9DF, 0x9E1}, {0x9F0, 0x9F1}, {0x9FC, 0x9FC}, {0xA05, 0xA0A}, {0xA0F, 0xA10}, {0xA13, 0xA28}, {0xA2A, 0xA30}, {0xA32, 0xA33},
{0xA35, 0xA36}, {0xA38, 0xA39}, {0xA59, 0xA5C}, {0xA5E, 0xA5E}, {0xA72, 0xA74}, {0xA85, 0xA8D}, {0xA8F, 0xA91}, {0xA93, 0xAA8}, {0xAAA, 0xAB0}, {0xAB2, 0xAB3}, {0xAB5, 0xAB9}, {0xABD, 0xABD},
{0xAD0, 0xAD0}, {0xAE0, 0xAE1}, {0xAF9, 0xAF9}, {0xB05, 0xB0C}, {0xB0F, 0xB10}, {0xB13, 0xB28}, {0xB2A, 0xB30}, {0xB32, 0xB33}, {0xB35, 0xB39}, {0xB3D, 0xB3D}, {0xB5C, 0xB5D}, {0xB5F, 0xB61},
{0xB71, 0xB71}, {0xB83, 0xB83}, {0xB85, 0xB8A}, {0xB8E, 0xB90}, {0xB92, 0xB95}, {0xB99, 0xB9A}, {0xB9C, 0xB9C}, {0xB9E, 0xB9F}, {0xBA3, 0xBA4}, {0xBA8, 0xBAA}, {0xBAE, 0xBB9}, {0xBD0, 0xBD0},
{0xC05, 0xC0C}, {0xC0E, 0xC10}, {0xC12, 0xC28}, {0xC2A, 0xC39}, {0xC3D, 0xC3D}, {0xC58, 0xC5A}, {0xC60, 0xC61}, {0xC80, 0xC80}, {0xC85, 0xC8C}, {0xC8E, 0xC90}, {0xC92, 0xCA8}, {0xCAA, 0xCB3},
{0xCB5, 0xCB9}, {0xCBD, 0xCBD}, {0xCDE, 0xCDE}, {0xCE0, 0xCE1}, {0xCF1, 0xCF2}, {0xD04, 0xD0C}, {0xD0E, 0xD10}, {0xD12, 0xD3A}, {0xD3D, 0xD3D}, {0xD4E, 0xD4E}, {0xD54, 0xD56}, {0xD5F, 0xD61},
{0xD7A, 0xD7F}, {0xD85, 0xD96}, {0xD9A, 0xDB1}, {0xDB3, 0xDBB}, {0xDBD, 0xDBD}, {0xDC0, 0xDC6}, {0xE01, 0xE30}, {0xE32, 0xE33}, {0xE40, 0xE46}, {0xE81, 0xE82}, {0xE84, 0xE84}, {0xE86, 0xE8A},
{0xE8C, 0xEA3}, {0xEA5, 0xEA5}, {0xEA7, 0xEB0}, {0xEB2, 0xEB3}, {0xEBD, 0xEBD}, {0xEC0, 0xEC4}, {0xEC6, 0xEC6}, {0xEDC, 0xEDF}, {0xF00, 0xF00}, {0xF40, 0xF47}, {0xF49, 0xF6C}, {0xF88, 0xF8C},
{0x1000, 0x102A}, {0x103F, 0x103F}, {0x1050, 0x1055}, {0x105A, 0x105D}, {0x1061, 0x1061}, {0x1065, 0x1066}, {0x106E, 0x1070}, {0x1075, 0x1081}, {0x108E, 0x108E}, {0x10A0, 0x10C5}, {0x10C7, 0x10C7},
{0x10CD, 0x10CD}, {0x10D0, 0x10FA}, {0x10FC, 0x1248}, {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5},
{0x12B8, 0x12BE}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, {0x1380, 0x138F}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1401, 0x166C},
{0x166F, 0x167F}, {0x1681, 0x169A}, {0x16A0, 0x16EA}, {0x16F1, 0x16F8}, {0x1700, 0x170C}, {0x170E, 0x1711}, {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1780, 0x17B3},
{0x17D7, 0x17D7}, {0x17DC, 0x17DC}, {0x1820, 0x1878}, {0x1880, 0x1884}, {0x1887, 0x18A8}, {0x18AA, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1950, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
{0x19B0, 0x19C9}, {0x1A00, 0x1A16}, {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7}, {0x1B05, 0x1B33}, {0x1B45, 0x1B4B}, {0x1B83, 0x1BA0}, {0x1BAE, 0x1BAF}, {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23}, {0x1C4D, 0x1C4F},
{0x1C5A, 0x1C7D}, {0x1C80, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC}, {0x1CEE, 0x1CF3}, {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA}, {0x1D00, 0x1DBF}, {0x1E00, 0x1F15}, {0x1F18, 0x1F1D},
{0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4},
{0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x2071, 0x2071}, {0x207F, 0x207F}, {0x2090, 0x209C}, {0x2102, 0x2102}, {0x2107, 0x2107},
{0x210A, 0x2113}, {0x2115, 0x2115}, {0x2119, 0x211D}, {0x2124, 0x2124}, {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212A, 0x212D}, {0x212F, 0x2139}, {0x213C, 0x213F}, {0x2145, 0x2149}, {0x214E, 0x214E},
{0x2183, 0x2184}, {0x2C00, 0x2C2E}, {0x2C30, 0x2C5E}, {0x2C60, 0x2CE4}, {0x2CEB, 0x2CEE}, {0x2CF2, 0x2CF3}, {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F},
{0x2D80, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2E2F, 0x2E2F}, {0x3005, 0x3006},
{0x3031, 0x3035}, {0x303B, 0x303C}, {0x3041, 0x3096}, {0x309D, 0x309F}, {0x30A1, 0x30FA}, {0x30FC, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, {0x3400, 0x4DBF},
{0x4E00, 0x9FFC}, {0xA000, 0xA48C}, {0xA4D0, 0xA4FD}, {0xA500, 0xA60C}, {0xA610, 0xA61F}, {0xA62A, 0xA62B}, {0xA640, 0xA66E}, {0xA67F, 0xA69D}, {0xA6A0, 0xA6E5}, {0xA717, 0xA71F}, {0xA722, 0xA788},
{0xA78B, 0xA7BF}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA801}, {0xA803, 0xA805}, {0xA807, 0xA80A}, {0xA80C, 0xA822}, {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7}, {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE},
{0xA90A, 0xA925}, {0xA930, 0xA946}, {0xA960, 0xA97C}, {0xA984, 0xA9B2}, {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9EF}, {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42}, {0xAA44, 0xAA4B},
{0xAA60, 0xAA76}, {0xAA7A, 0xAA7A}, {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1}, {0xAAB5, 0xAAB6}, {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2}, {0xAADB, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF4},
{0xAB01, 0xAB06}, {0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A}, {0xAB5C, 0xAB69}, {0xAB70, 0xABE2}, {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
{0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
{0xFB46, 0xFBB1}, {0xFBD3, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFB}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC}, {0xFF21, 0xFF3A}, {0xFF41, 0xFF5A}, {0xFF66, 0xFFBE}, {0xFFC2, 0xFFC7},
{0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA},
{0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F}, {0x1032D, 0x10340}, {0x10342, 0x10349}, {0x10350, 0x10375}, {0x10380, 0x1039D}, {0x103A0, 0x103C3}, {0x103C8, 0x103CF}, {0x10400, 0x1049D},
{0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835},
{0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089E}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x10900, 0x10915}, {0x10920, 0x10939}, {0x10980, 0x109B7},
{0x109BE, 0x109BF}, {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A60, 0x10A7C}, {0x10A80, 0x10A9C}, {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, {0x10B00, 0x10B35},
{0x10B40, 0x10B55}, {0x10B60, 0x10B72}, {0x10B80, 0x10B91}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F1C},
{0x10F27, 0x10F27}, {0x10F30, 0x10F45}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6}, {0x11003, 0x11037}, {0x11083, 0x110AF}, {0x110D0, 0x110E8}, {0x11103, 0x11126}, {0x11144, 0x11144}, {0x11147, 0x11147},
{0x11150, 0x11172}, {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, {0x11200, 0x11211}, {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288},
{0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339},
{0x1133D, 0x1133D}, {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A}, {0x1145F, 0x11461}, {0x11480, 0x114AF}, {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE},
{0x115D8, 0x115DB}, {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A}, {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, {0x11909, 0x11909},
{0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00},
{0x11A0B, 0x11A32}, {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F},
{0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, {0x11FB0, 0x11FB0},
{0x12000, 0x12399}, {0x12480, 0x12543}, {0x13000, 0x1342E}, {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, {0x16B63, 0x16B77},
{0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
{0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C},
{0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
{0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA},
{0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D},
{0x1E14E, 0x1E14E}, {0x1E2C0, 0x1E2EB}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
{0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},
{0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72},
{0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DD}, {0x2A700, 0x2B734},
{0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
};
static const std::vector<std::pair<uint32_t, uint32_t>> whitespace_ranges = {
{0x9, 0xD}, {0x1C, 0x20}, {0x85, 0x85}, {0xA0, 0xA0}, {0x1680, 0x1680}, {0x2000, 0x200A}, {0x2028, 0x2029}, {0x202F, 0x202F}, {0x205F, 0x205F}, {0x3000, 0x3000},
};
static const std::vector<std::pair<uint32_t, uint32_t>> accent_mark_ranges = {
{0x300, 0x36F}, {0x483, 0x489}, {0x591, 0x5BD}, {0x5BF, 0x5BF}, {0x5C1, 0x5C2}, {0x5C4, 0x5C5}, {0x5C7, 0x5C7}, {0x610, 0x61A}, {0x64B, 0x65F}, {0x670, 0x670}, {0x6D6, 0x6DC}, {0x6DF, 0x6E4},
{0x6E7, 0x6E8}, {0x6EA, 0x6ED}, {0x711, 0x711}, {0x730, 0x74A}, {0x7A6, 0x7B0}, {0x7EB, 0x7F3}, {0x7FD, 0x7FD}, {0x816, 0x819}, {0x81B, 0x823}, {0x825, 0x827}, {0x829, 0x82D}, {0x859, 0x85B},
{0x8D3, 0x8E1}, {0x8E3, 0x903}, {0x93A, 0x93C}, {0x93E, 0x94F}, {0x951, 0x957}, {0x962, 0x963}, {0x981, 0x983}, {0x9BC, 0x9BC}, {0x9BE, 0x9C4}, {0x9C7, 0x9C8}, {0x9CB, 0x9CD}, {0x9D7, 0x9D7},
{0x9E2, 0x9E3}, {0x9FE, 0x9FE}, {0xA01, 0xA03}, {0xA3C, 0xA3C}, {0xA3E, 0xA42}, {0xA47, 0xA48}, {0xA4B, 0xA4D}, {0xA51, 0xA51}, {0xA70, 0xA71}, {0xA75, 0xA75}, {0xA81, 0xA83}, {0xABC, 0xABC},
{0xABE, 0xAC5}, {0xAC7, 0xAC9}, {0xACB, 0xACD}, {0xAE2, 0xAE3}, {0xAFA, 0xAFF}, {0xB01, 0xB03}, {0xB3C, 0xB3C}, {0xB3E, 0xB44}, {0xB47, 0xB48}, {0xB4B, 0xB4D}, {0xB55, 0xB57}, {0xB62, 0xB63},
{0xB82, 0xB82}, {0xBBE, 0xBC2}, {0xBC6, 0xBC8}, {0xBCA, 0xBCD}, {0xBD7, 0xBD7}, {0xC00, 0xC04}, {0xC3E, 0xC44}, {0xC46, 0xC48}, {0xC4A, 0xC4D}, {0xC55, 0xC56}, {0xC62, 0xC63}, {0xC81, 0xC83},
{0xCBC, 0xCBC}, {0xCBE, 0xCC4}, {0xCC6, 0xCC8}, {0xCCA, 0xCCD}, {0xCD5, 0xCD6}, {0xCE2, 0xCE3}, {0xD00, 0xD03}, {0xD3B, 0xD3C}, {0xD3E, 0xD44}, {0xD46, 0xD48}, {0xD4A, 0xD4D}, {0xD57, 0xD57},
{0xD62, 0xD63}, {0xD81, 0xD83}, {0xDCA, 0xDCA}, {0xDCF, 0xDD4}, {0xDD6, 0xDD6}, {0xDD8, 0xDDF}, {0xDF2, 0xDF3}, {0xE31, 0xE31}, {0xE34, 0xE3A}, {0xE47, 0xE4E}, {0xEB1, 0xEB1}, {0xEB4, 0xEBC},
{0xEC8, 0xECD}, {0xF18, 0xF19}, {0xF35, 0xF35}, {0xF37, 0xF37}, {0xF39, 0xF39}, {0xF3E, 0xF3F}, {0xF71, 0xF84}, {0xF86, 0xF87}, {0xF8D, 0xF97}, {0xF99, 0xFBC}, {0xFC6, 0xFC6}, {0x102B, 0x103E},
{0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D}, {0x108F, 0x108F}, {0x109A, 0x109D}, {0x135D, 0x135F}, {0x1712, 0x1714}, {0x1732, 0x1734},
{0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D}, {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1A17, 0x1A1B}, {0x1A55, 0x1A5E},
{0x1A60, 0x1A7C}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD}, {0x1BE6, 0x1BF3}, {0x1C24, 0x1C37}, {0x1CD0, 0x1CD2},
{0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F},
{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881},
{0xA8B4, 0xA8C5}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9E5, 0xA9E5}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4D},
{0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED}, {0xFB1E, 0xFB1E},
{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F},
{0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11000, 0x11002}, {0x11038, 0x11046}, {0x1107F, 0x11082}, {0x110B0, 0x110BA}, {0x11100, 0x11102}, {0x11127, 0x11134},
{0x11145, 0x11146}, {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111C9, 0x111CC}, {0x111CE, 0x111CF}, {0x1122C, 0x11237}, {0x1123E, 0x1123E}, {0x112DF, 0x112EA}, {0x11300, 0x11303},
{0x1133B, 0x1133C}, {0x1133E, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11435, 0x11446}, {0x1145E, 0x1145E},
{0x114B0, 0x114C3}, {0x115AF, 0x115B5}, {0x115B8, 0x115C0}, {0x115DC, 0x115DD}, {0x11630, 0x11640}, {0x116AB, 0x116B7}, {0x1171D, 0x1172B}, {0x1182C, 0x1183A}, {0x11930, 0x11935}, {0x11937, 0x11938},
{0x1193B, 0x1193E}, {0x11940, 0x11940}, {0x11942, 0x11943}, {0x119D1, 0x119D7}, {0x119DA, 0x119E0}, {0x119E4, 0x119E4}, {0x11A01, 0x11A0A}, {0x11A33, 0x11A39}, {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47},
{0x11A51, 0x11A5B}, {0x11A8A, 0x11A99}, {0x11C2F, 0x11C36}, {0x11C38, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45},
{0x11D47, 0x11D47}, {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D97}, {0x11EF3, 0x11EF6}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F51, 0x16F87}, {0x16F8F, 0x16F92},
{0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1DA00, 0x1DA36},
{0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
{0x1E130, 0x1E136}, {0x1E2EC, 0x1E2EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, {0xE0100, 0xE01EF},
};
static const std::vector<std::pair<uint32_t, uint32_t>> punctuation_ranges = {
{0x21, 0x23}, {0x25, 0x2A}, {0x2C, 0x2F}, {0x3A, 0x3B}, {0x3F, 0x40}, {0x5B, 0x5D}, {0x5F, 0x5F}, {0x7B, 0x7B}, {0x7D, 0x7D}, {0xA1, 0xA1}, {0xA7, 0xA7}, {0xAB, 0xAB}, {0xB6, 0xB7}, {0xBB, 0xBB},
{0xBF, 0xBF}, {0x37E, 0x37E}, {0x387, 0x387}, {0x55A, 0x55F}, {0x589, 0x58A}, {0x5BE, 0x5BE}, {0x5C0, 0x5C0}, {0x5C3, 0x5C3}, {0x5C6, 0x5C6}, {0x5F3, 0x5F4}, {0x609, 0x60A}, {0x60C, 0x60D},
{0x61B, 0x61B}, {0x61E, 0x61F}, {0x66A, 0x66D}, {0x6D4, 0x6D4}, {0x700, 0x70D}, {0x7F7, 0x7F9}, {0x830, 0x83E}, {0x85E, 0x85E}, {0x964, 0x965}, {0x970, 0x970}, {0x9FD, 0x9FD}, {0xA76, 0xA76},
{0xAF0, 0xAF0}, {0xC77, 0xC77}, {0xC84, 0xC84}, {0xDF4, 0xDF4}, {0xE4F, 0xE4F}, {0xE5A, 0xE5B}, {0xF04, 0xF12}, {0xF14, 0xF14}, {0xF3A, 0xF3D}, {0xF85, 0xF85}, {0xFD0, 0xFD4}, {0xFD9, 0xFDA},
{0x104A, 0x104F}, {0x10FB, 0x10FB}, {0x1360, 0x1368}, {0x1400, 0x1400}, {0x166E, 0x166E}, {0x169B, 0x169C}, {0x16EB, 0x16ED}, {0x1735, 0x1736}, {0x17D4, 0x17D6}, {0x17D8, 0x17DA}, {0x1800, 0x180A},
{0x1944, 0x1945}, {0x1A1E, 0x1A1F}, {0x1AA0, 0x1AA6}, {0x1AA8, 0x1AAD}, {0x1B5A, 0x1B60}, {0x1BFC, 0x1BFF}, {0x1C3B, 0x1C3F}, {0x1C7E, 0x1C7F}, {0x1CC0, 0x1CC7}, {0x1CD3, 0x1CD3}, {0x2010, 0x2027},
{0x2030, 0x2043}, {0x2045, 0x2051}, {0x2053, 0x205E}, {0x207D, 0x207E}, {0x208D, 0x208E}, {0x2308, 0x230B}, {0x2329, 0x232A}, {0x2768, 0x2775}, {0x27C5, 0x27C6}, {0x27E6, 0x27EF}, {0x2983, 0x2998},
{0x29D8, 0x29DB}, {0x29FC, 0x29FD}, {0x2CF9, 0x2CFC}, {0x2CFE, 0x2CFF}, {0x2D70, 0x2D70}, {0x2E00, 0x2E2E}, {0x2E30, 0x2E4F}, {0x2E52, 0x2E52}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301F},
{0x3030, 0x3030}, {0x303D, 0x303D}, {0x30A0, 0x30A0}, {0x30FB, 0x30FB}, {0xA4FE, 0xA4FF}, {0xA60D, 0xA60F}, {0xA673, 0xA673}, {0xA67E, 0xA67E}, {0xA6F2, 0xA6F7}, {0xA874, 0xA877}, {0xA8CE, 0xA8CF},
{0xA8F8, 0xA8FA}, {0xA8FC, 0xA8FC}, {0xA92E, 0xA92F}, {0xA95F, 0xA95F}, {0xA9C1, 0xA9CD}, {0xA9DE, 0xA9DF}, {0xAA5C, 0xAA5F}, {0xAADE, 0xAADF}, {0xAAF0, 0xAAF1}, {0xABEB, 0xABEB}, {0xFD3E, 0xFD3F},
{0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE61}, {0xFE63, 0xFE63}, {0xFE68, 0xFE68}, {0xFE6A, 0xFE6B}, {0xFF01, 0xFF03}, {0xFF05, 0xFF0A}, {0xFF0C, 0xFF0F}, {0xFF1A, 0xFF1B}, {0xFF1F, 0xFF20},
{0xFF3B, 0xFF3D}, {0xFF3F, 0xFF3F}, {0xFF5B, 0xFF5B}, {0xFF5D, 0xFF5D}, {0xFF5F, 0xFF65}, {0x10100, 0x10102}, {0x1039F, 0x1039F}, {0x103D0, 0x103D0}, {0x1056F, 0x1056F}, {0x10857, 0x10857},
{0x1091F, 0x1091F}, {0x1093F, 0x1093F}, {0x10A50, 0x10A58}, {0x10A7F, 0x10A7F}, {0x10AF0, 0x10AF6}, {0x10B39, 0x10B3F}, {0x10B99, 0x10B9C}, {0x10EAD, 0x10EAD}, {0x10F55, 0x10F59}, {0x11047, 0x1104D},
{0x110BB, 0x110BC}, {0x110BE, 0x110C1}, {0x11140, 0x11143}, {0x11174, 0x11175}, {0x111C5, 0x111C8}, {0x111CD, 0x111CD}, {0x111DB, 0x111DB}, {0x111DD, 0x111DF}, {0x11238, 0x1123D}, {0x112A9, 0x112A9},
{0x1144B, 0x1144F}, {0x1145A, 0x1145B}, {0x1145D, 0x1145D}, {0x114C6, 0x114C6}, {0x115C1, 0x115D7}, {0x11641, 0x11643}, {0x11660, 0x1166C}, {0x1173C, 0x1173E}, {0x1183B, 0x1183B}, {0x11944, 0x11946},
{0x119E2, 0x119E2}, {0x11A3F, 0x11A46}, {0x11A9A, 0x11A9C}, {0x11A9E, 0x11AA2}, {0x11C41, 0x11C45}, {0x11C70, 0x11C71}, {0x11EF7, 0x11EF8}, {0x11FFF, 0x11FFF}, {0x12470, 0x12474}, {0x16A6E, 0x16A6F},
{0x16AF5, 0x16AF5}, {0x16B37, 0x16B3B}, {0x16B44, 0x16B44}, {0x16E97, 0x16E9A}, {0x16FE2, 0x16FE2}, {0x1BC9F, 0x1BC9F}, {0x1DA87, 0x1DA8B}, {0x1E95E, 0x1E95F},
};
static const std::vector<std::pair<uint32_t, uint32_t>> symbol_ranges = {
{0x24, 0x24}, {0x2B, 0x2B}, {0x3C, 0x3E}, {0x5E, 0x5E}, {0x60, 0x60}, {0x7C, 0x7C}, {0x7E, 0x7E}, {0xA2, 0xA6}, {0xA8, 0xA9}, {0xAC, 0xAC}, {0xAE, 0xB1}, {0xB4, 0xB4}, {0xB8, 0xB8}, {0xD7, 0xD7},
{0xF7, 0xF7}, {0x2C2, 0x2C5}, {0x2D2, 0x2DF}, {0x2E5, 0x2EB}, {0x2ED, 0x2ED}, {0x2EF, 0x2FF}, {0x375, 0x375}, {0x384, 0x385}, {0x3F6, 0x3F6}, {0x482, 0x482}, {0x58D, 0x58F}, {0x606, 0x608},
{0x60B, 0x60B}, {0x60E, 0x60F}, {0x6DE, 0x6DE}, {0x6E9, 0x6E9}, {0x6FD, 0x6FE}, {0x7F6, 0x7F6}, {0x7FE, 0x7FF}, {0x9F2, 0x9F3}, {0x9FA, 0x9FB}, {0xAF1, 0xAF1}, {0xB70, 0xB70}, {0xBF3, 0xBFA},
{0xC7F, 0xC7F}, {0xD4F, 0xD4F}, {0xD79, 0xD79}, {0xE3F, 0xE3F}, {0xF01, 0xF03}, {0xF13, 0xF13}, {0xF15, 0xF17}, {0xF1A, 0xF1F}, {0xF34, 0xF34}, {0xF36, 0xF36}, {0xF38, 0xF38}, {0xFBE, 0xFC5},
{0xFC7, 0xFCC}, {0xFCE, 0xFCF}, {0xFD5, 0xFD8}, {0x109E, 0x109F}, {0x1390, 0x1399}, {0x166D, 0x166D}, {0x17DB, 0x17DB}, {0x1940, 0x1940}, {0x19DE, 0x19FF}, {0x1B61, 0x1B6A}, {0x1B74, 0x1B7C},
{0x1FBD, 0x1FBD}, {0x1FBF, 0x1FC1}, {0x1FCD, 0x1FCF}, {0x1FDD, 0x1FDF}, {0x1FED, 0x1FEF}, {0x1FFD, 0x1FFE}, {0x2044, 0x2044}, {0x2052, 0x2052}, {0x207A, 0x207C}, {0x208A, 0x208C}, {0x20A0, 0x20BF},
{0x2100, 0x2101}, {0x2103, 0x2106}, {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118}, {0x211E, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x213A, 0x213B},
{0x2140, 0x2144}, {0x214A, 0x214D}, {0x214F, 0x214F}, {0x218A, 0x218B}, {0x2190, 0x2307}, {0x230C, 0x2328}, {0x232B, 0x2426}, {0x2440, 0x244A}, {0x249C, 0x24E9}, {0x2500, 0x2767}, {0x2794, 0x27C4},
{0x27C7, 0x27E5}, {0x27F0, 0x2982}, {0x2999, 0x29D7}, {0x29DC, 0x29FB}, {0x29FE, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2BFF}, {0x2CE5, 0x2CEA}, {0x2E50, 0x2E51}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
{0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3004, 0x3004}, {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037}, {0x303E, 0x303F}, {0x309B, 0x309C}, {0x3190, 0x3191}, {0x3196, 0x319F}, {0x31C0, 0x31E3},
{0x3200, 0x321E}, {0x322A, 0x3247}, {0x3250, 0x3250}, {0x3260, 0x327F}, {0x328A, 0x32B0}, {0x32C0, 0x33FF}, {0x4DC0, 0x4DFF}, {0xA490, 0xA4C6}, {0xA700, 0xA716}, {0xA720, 0xA721}, {0xA789, 0xA78A},
{0xA828, 0xA82B}, {0xA836, 0xA839}, {0xAA77, 0xAA79}, {0xAB5B, 0xAB5B}, {0xAB6A, 0xAB6B}, {0xFB29, 0xFB29}, {0xFBB2, 0xFBC1}, {0xFDFC, 0xFDFD}, {0xFE62, 0xFE62}, {0xFE64, 0xFE66}, {0xFE69, 0xFE69},
{0xFF04, 0xFF04}, {0xFF0B, 0xFF0B}, {0xFF1C, 0xFF1E}, {0xFF3E, 0xFF3E}, {0xFF40, 0xFF40}, {0xFF5C, 0xFF5C}, {0xFF5E, 0xFF5E}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}, {0x10137, 0x1013F},
{0x10179, 0x10189}, {0x1018C, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0}, {0x101D0, 0x101FC}, {0x10877, 0x10878}, {0x10AC8, 0x10AC8}, {0x1173F, 0x1173F}, {0x11FD5, 0x11FF1}, {0x16B3C, 0x16B3F},
{0x16B45, 0x16B45}, {0x1BC9C, 0x1BC9C}, {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D164}, {0x1D16A, 0x1D16C}, {0x1D183, 0x1D184}, {0x1D18C, 0x1D1A9}, {0x1D1AE, 0x1D1E8}, {0x1D200, 0x1D241},
{0x1D245, 0x1D245}, {0x1D300, 0x1D356}, {0x1D6C1, 0x1D6C1}, {0x1D6DB, 0x1D6DB}, {0x1D6FB, 0x1D6FB}, {0x1D715, 0x1D715}, {0x1D735, 0x1D735}, {0x1D74F, 0x1D74F}, {0x1D76F, 0x1D76F}, {0x1D789, 0x1D789},
{0x1D7A9, 0x1D7A9}, {0x1D7C3, 0x1D7C3}, {0x1D800, 0x1D9FF}, {0x1DA37, 0x1DA3A}, {0x1DA6D, 0x1DA74}, {0x1DA76, 0x1DA83}, {0x1DA85, 0x1DA86}, {0x1E14F, 0x1E14F}, {0x1E2FF, 0x1E2FF}, {0x1ECAC, 0x1ECAC},
{0x1ECB0, 0x1ECB0}, {0x1ED2E, 0x1ED2E}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B}, {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F10D, 0x1F1AD},
{0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7}, {0x1F6E0, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D8},
{0x1F7E0, 0x1F7EB}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F978}, {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1FA53},
{0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA74}, {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8}, {0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6}, {0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA},
};
static const std::vector<std::pair<uint32_t, uint32_t>> control_ranges = {
{0x0, 0x8}, {0xE, 0x1B}, {0x7F, 0x84}, {0x86, 0x9F}, {0xAD, 0xAD}, {0x378, 0x379}, {0x380, 0x383}, {0x38B, 0x38B}, {0x38D, 0x38D}, {0x3A2, 0x3A2}, {0x530, 0x530}, {0x557, 0x558}, {0x58B, 0x58C},
{0x590, 0x590}, {0x5C8, 0x5CF}, {0x5EB, 0x5EE}, {0x5F5, 0x605}, {0x61C, 0x61D}, {0x6DD, 0x6DD}, {0x70E, 0x70F}, {0x74B, 0x74C}, {0x7B2, 0x7BF}, {0x7FB, 0x7FC}, {0x82E, 0x82F}, {0x83F, 0x83F},
{0x85C, 0x85D}, {0x85F, 0x85F}, {0x86B, 0x89F}, {0x8B5, 0x8B5}, {0x8C8, 0x8D2}, {0x8E2, 0x8E2}, {0x984, 0x984}, {0x98D, 0x98E}, {0x991, 0x992}, {0x9A9, 0x9A9}, {0x9B1, 0x9B1}, {0x9B3, 0x9B5},
{0x9BA, 0x9BB}, {0x9C5, 0x9C6}, {0x9C9, 0x9CA}, {0x9CF, 0x9D6}, {0x9D8, 0x9DB}, {0x9DE, 0x9DE}, {0x9E4, 0x9E5}, {0x9FF, 0xA00}, {0xA04, 0xA04}, {0xA0B, 0xA0E}, {0xA11, 0xA12}, {0xA29, 0xA29},
{0xA31, 0xA31}, {0xA34, 0xA34}, {0xA37, 0xA37}, {0xA3A, 0xA3B}, {0xA3D, 0xA3D}, {0xA43, 0xA46}, {0xA49, 0xA4A}, {0xA4E, 0xA50}, {0xA52, 0xA58}, {0xA5D, 0xA5D}, {0xA5F, 0xA65}, {0xA77, 0xA80},
{0xA84, 0xA84}, {0xA8E, 0xA8E}, {0xA92, 0xA92}, {0xAA9, 0xAA9}, {0xAB1, 0xAB1}, {0xAB4, 0xAB4}, {0xABA, 0xABB}, {0xAC6, 0xAC6}, {0xACA, 0xACA}, {0xACE, 0xACF}, {0xAD1, 0xADF}, {0xAE4, 0xAE5},
{0xAF2, 0xAF8}, {0xB00, 0xB00}, {0xB04, 0xB04}, {0xB0D, 0xB0E}, {0xB11, 0xB12}, {0xB29, 0xB29}, {0xB31, 0xB31}, {0xB34, 0xB34}, {0xB3A, 0xB3B}, {0xB45, 0xB46}, {0xB49, 0xB4A}, {0xB4E, 0xB54},
{0xB58, 0xB5B}, {0xB5E, 0xB5E}, {0xB64, 0xB65}, {0xB78, 0xB81}, {0xB84, 0xB84}, {0xB8B, 0xB8D}, {0xB91, 0xB91}, {0xB96, 0xB98}, {0xB9B, 0xB9B}, {0xB9D, 0xB9D}, {0xBA0, 0xBA2}, {0xBA5, 0xBA7},
{0xBAB, 0xBAD}, {0xBBA, 0xBBD}, {0xBC3, 0xBC5}, {0xBC9, 0xBC9}, {0xBCE, 0xBCF}, {0xBD1, 0xBD6}, {0xBD8, 0xBE5}, {0xBFB, 0xBFF}, {0xC0D, 0xC0D}, {0xC11, 0xC11}, {0xC29, 0xC29}, {0xC3A, 0xC3C},
{0xC45, 0xC45}, {0xC49, 0xC49}, {0xC4E, 0xC54}, {0xC57, 0xC57}, {0xC5B, 0xC5F}, {0xC64, 0xC65}, {0xC70, 0xC76}, {0xC8D, 0xC8D}, {0xC91, 0xC91}, {0xCA9, 0xCA9}, {0xCB4, 0xCB4}, {0xCBA, 0xCBB},
{0xCC5, 0xCC5}, {0xCC9, 0xCC9}, {0xCCE, 0xCD4}, {0xCD7, 0xCDD}, {0xCDF, 0xCDF}, {0xCE4, 0xCE5}, {0xCF0, 0xCF0}, {0xCF3, 0xCFF}, {0xD0D, 0xD0D}, {0xD11, 0xD11}, {0xD45, 0xD45}, {0xD49, 0xD49},
{0xD50, 0xD53}, {0xD64, 0xD65}, {0xD80, 0xD80}, {0xD84, 0xD84}, {0xD97, 0xD99}, {0xDB2, 0xDB2}, {0xDBC, 0xDBC}, {0xDBE, 0xDBF}, {0xDC7, 0xDC9}, {0xDCB, 0xDCE}, {0xDD5, 0xDD5}, {0xDD7, 0xDD7},
{0xDE0, 0xDE5}, {0xDF0, 0xDF1}, {0xDF5, 0xE00}, {0xE3B, 0xE3E}, {0xE5C, 0xE80}, {0xE83, 0xE83}, {0xE85, 0xE85}, {0xE8B, 0xE8B}, {0xEA4, 0xEA4}, {0xEA6, 0xEA6}, {0xEBE, 0xEBF}, {0xEC5, 0xEC5},
{0xEC7, 0xEC7}, {0xECE, 0xECF}, {0xEDA, 0xEDB}, {0xEE0, 0xEFF}, {0xF48, 0xF48}, {0xF6D, 0xF70}, {0xF98, 0xF98}, {0xFBD, 0xFBD}, {0xFCD, 0xFCD}, {0xFDB, 0xFFF}, {0x10C6, 0x10C6}, {0x10C8, 0x10CC},
{0x10CE, 0x10CF}, {0x1249, 0x1249}, {0x124E, 0x124F}, {0x1257, 0x1257}, {0x1259, 0x1259}, {0x125E, 0x125F}, {0x1289, 0x1289}, {0x128E, 0x128F}, {0x12B1, 0x12B1}, {0x12B6, 0x12B7}, {0x12BF, 0x12BF},
{0x12C1, 0x12C1}, {0x12C6, 0x12C7}, {0x12D7, 0x12D7}, {0x1311, 0x1311}, {0x1316, 0x1317}, {0x135B, 0x135C}, {0x137D, 0x137F}, {0x139A, 0x139F}, {0x13F6, 0x13F7}, {0x13FE, 0x13FF}, {0x169D, 0x169F},
{0x16F9, 0x16FF}, {0x170D, 0x170D}, {0x1715, 0x171F}, {0x1737, 0x173F}, {0x1754, 0x175F}, {0x176D, 0x176D}, {0x1771, 0x1771}, {0x1774, 0x177F}, {0x17DE, 0x17DF}, {0x17EA, 0x17EF}, {0x17FA, 0x17FF},
{0x180E, 0x180F}, {0x181A, 0x181F}, {0x1879, 0x187F}, {0x18AB, 0x18AF}, {0x18F6, 0x18FF}, {0x191F, 0x191F}, {0x192C, 0x192F}, {0x193C, 0x193F}, {0x1941, 0x1943}, {0x196E, 0x196F}, {0x1975, 0x197F},
{0x19AC, 0x19AF}, {0x19CA, 0x19CF}, {0x19DB, 0x19DD}, {0x1A1C, 0x1A1D}, {0x1A5F, 0x1A5F}, {0x1A7D, 0x1A7E}, {0x1A8A, 0x1A8F}, {0x1A9A, 0x1A9F}, {0x1AAE, 0x1AAF}, {0x1AC1, 0x1AFF}, {0x1B4C, 0x1B4F},
{0x1B7D, 0x1B7F}, {0x1BF4, 0x1BFB}, {0x1C38, 0x1C3A}, {0x1C4A, 0x1C4C}, {0x1C89, 0x1C8F}, {0x1CBB, 0x1CBC}, {0x1CC8, 0x1CCF}, {0x1CFB, 0x1CFF}, {0x1DFA, 0x1DFA}, {0x1F16, 0x1F17}, {0x1F1E, 0x1F1F},
{0x1F46, 0x1F47}, {0x1F4E, 0x1F4F}, {0x1F58, 0x1F58}, {0x1F5A, 0x1F5A}, {0x1F5C, 0x1F5C}, {0x1F5E, 0x1F5E}, {0x1F7E, 0x1F7F}, {0x1FB5, 0x1FB5}, {0x1FC5, 0x1FC5}, {0x1FD4, 0x1FD5}, {0x1FDC, 0x1FDC},
{0x1FF0, 0x1FF1}, {0x1FF5, 0x1FF5}, {0x1FFF, 0x1FFF}, {0x200B, 0x200F}, {0x202A, 0x202E}, {0x2060, 0x206F}, {0x2072, 0x2073}, {0x208F, 0x208F}, {0x209D, 0x209F}, {0x20C0, 0x20CF}, {0x20F1, 0x20FF},
{0x218C, 0x218F}, {0x2427, 0x243F}, {0x244B, 0x245F}, {0x2B74, 0x2B75}, {0x2B96, 0x2B96}, {0x2C2F, 0x2C2F}, {0x2C5F, 0x2C5F}, {0x2CF4, 0x2CF8}, {0x2D26, 0x2D26}, {0x2D28, 0x2D2C}, {0x2D2E, 0x2D2F},
{0x2D68, 0x2D6E}, {0x2D71, 0x2D7E}, {0x2D97, 0x2D9F}, {0x2DA7, 0x2DA7}, {0x2DAF, 0x2DAF}, {0x2DB7, 0x2DB7}, {0x2DBF, 0x2DBF}, {0x2DC7, 0x2DC7}, {0x2DCF, 0x2DCF}, {0x2DD7, 0x2DD7}, {0x2DDF, 0x2DDF},
{0x2E53, 0x2E7F}, {0x2E9A, 0x2E9A}, {0x2EF4, 0x2EFF}, {0x2FD6, 0x2FEF}, {0x2FFC, 0x2FFF}, {0x3040, 0x3040}, {0x3097, 0x3098}, {0x3100, 0x3104}, {0x3130, 0x3130}, {0x318F, 0x318F}, {0x31E4, 0x31EF},
{0x321F, 0x321F}, {0x9FFD, 0x9FFF}, {0xA48D, 0xA48F}, {0xA4C7, 0xA4CF}, {0xA62C, 0xA63F}, {0xA6F8, 0xA6FF}, {0xA7C0, 0xA7C1}, {0xA7CB, 0xA7F4}, {0xA82D, 0xA82F}, {0xA83A, 0xA83F}, {0xA878, 0xA87F},
{0xA8C6, 0xA8CD}, {0xA8DA, 0xA8DF}, {0xA954, 0xA95E}, {0xA97D, 0xA97F}, {0xA9CE, 0xA9CE}, {0xA9DA, 0xA9DD}, {0xA9FF, 0xA9FF}, {0xAA37, 0xAA3F}, {0xAA4E, 0xAA4F}, {0xAA5A, 0xAA5B}, {0xAAC3, 0xAADA},
{0xAAF7, 0xAB00}, {0xAB07, 0xAB08}, {0xAB0F, 0xAB10}, {0xAB17, 0xAB1F}, {0xAB27, 0xAB27}, {0xAB2F, 0xAB2F}, {0xAB6C, 0xAB6F}, {0xABEE, 0xABEF}, {0xABFA, 0xABFF}, {0xD7A4, 0xD7AF}, {0xD7C7, 0xD7CA},
{0xD7FC, 0xF8FF}, {0xFA6E, 0xFA6F}, {0xFADA, 0xFAFF}, {0xFB07, 0xFB12}, {0xFB18, 0xFB1C}, {0xFB37, 0xFB37}, {0xFB3D, 0xFB3D}, {0xFB3F, 0xFB3F}, {0xFB42, 0xFB42}, {0xFB45, 0xFB45}, {0xFBC2, 0xFBD2},
{0xFD40, 0xFD4F}, {0xFD90, 0xFD91}, {0xFDC8, 0xFDEF}, {0xFDFE, 0xFDFF}, {0xFE1A, 0xFE1F}, {0xFE53, 0xFE53}, {0xFE67, 0xFE67}, {0xFE6C, 0xFE6F}, {0xFE75, 0xFE75}, {0xFEFD, 0xFF00}, {0xFFBF, 0xFFC1},
{0xFFC8, 0xFFC9}, {0xFFD0, 0xFFD1}, {0xFFD8, 0xFFD9}, {0xFFDD, 0xFFDF}, {0xFFE7, 0xFFE7}, {0xFFEF, 0xFFFB}, {0xFFFE, 0xFFFF}, {0x1000C, 0x1000C}, {0x10027, 0x10027}, {0x1003B, 0x1003B},
{0x1003E, 0x1003E}, {0x1004E, 0x1004F}, {0x1005E, 0x1007F}, {0x100FB, 0x100FF}, {0x10103, 0x10106}, {0x10134, 0x10136}, {0x1018F, 0x1018F}, {0x1019D, 0x1019F}, {0x101A1, 0x101CF}, {0x101FE, 0x1027F},
{0x1029D, 0x1029F}, {0x102D1, 0x102DF}, {0x102FC, 0x102FF}, {0x10324, 0x1032C}, {0x1034B, 0x1034F}, {0x1037B, 0x1037F}, {0x1039E, 0x1039E}, {0x103C4, 0x103C7}, {0x103D6, 0x103FF}, {0x1049E, 0x1049F},
{0x104AA, 0x104AF}, {0x104D4, 0x104D7}, {0x104FC, 0x104FF}, {0x10528, 0x1052F}, {0x10564, 0x1056E}, {0x10570, 0x105FF}, {0x10737, 0x1073F}, {0x10756, 0x1075F}, {0x10768, 0x107FF}, {0x10806, 0x10807},
{0x10809, 0x10809}, {0x10836, 0x10836}, {0x10839, 0x1083B}, {0x1083D, 0x1083E}, {0x10856, 0x10856}, {0x1089F, 0x108A6}, {0x108B0, 0x108DF}, {0x108F3, 0x108F3}, {0x108F6, 0x108FA}, {0x1091C, 0x1091E},
{0x1093A, 0x1093E}, {0x10940, 0x1097F}, {0x109B8, 0x109BB}, {0x109D0, 0x109D1}, {0x10A04, 0x10A04}, {0x10A07, 0x10A0B}, {0x10A14, 0x10A14}, {0x10A18, 0x10A18}, {0x10A36, 0x10A37}, {0x10A3B, 0x10A3E},
{0x10A49, 0x10A4F}, {0x10A59, 0x10A5F}, {0x10AA0, 0x10ABF}, {0x10AE7, 0x10AEA}, {0x10AF7, 0x10AFF}, {0x10B36, 0x10B38}, {0x10B56, 0x10B57}, {0x10B73, 0x10B77}, {0x10B92, 0x10B98}, {0x10B9D, 0x10BA8},
{0x10BB0, 0x10BFF}, {0x10C49, 0x10C7F}, {0x10CB3, 0x10CBF}, {0x10CF3, 0x10CF9}, {0x10D28, 0x10D2F}, {0x10D3A, 0x10E5F}, {0x10E7F, 0x10E7F}, {0x10EAA, 0x10EAA}, {0x10EAE, 0x10EAF}, {0x10EB2, 0x10EFF},
{0x10F28, 0x10F2F}, {0x10F5A, 0x10FAF}, {0x10FCC, 0x10FDF}, {0x10FF7, 0x10FFF}, {0x1104E, 0x11051}, {0x11070, 0x1107E}, {0x110BD, 0x110BD}, {0x110C2, 0x110CF}, {0x110E9, 0x110EF}, {0x110FA, 0x110FF},
{0x11135, 0x11135}, {0x11148, 0x1114F}, {0x11177, 0x1117F}, {0x111E0, 0x111E0}, {0x111F5, 0x111FF}, {0x11212, 0x11212}, {0x1123F, 0x1127F}, {0x11287, 0x11287}, {0x11289, 0x11289}, {0x1128E, 0x1128E},
{0x1129E, 0x1129E}, {0x112AA, 0x112AF}, {0x112EB, 0x112EF}, {0x112FA, 0x112FF}, {0x11304, 0x11304}, {0x1130D, 0x1130E}, {0x11311, 0x11312}, {0x11329, 0x11329}, {0x11331, 0x11331}, {0x11334, 0x11334},
{0x1133A, 0x1133A}, {0x11345, 0x11346}, {0x11349, 0x1134A}, {0x1134E, 0x1134F}, {0x11351, 0x11356}, {0x11358, 0x1135C}, {0x11364, 0x11365}, {0x1136D, 0x1136F}, {0x11375, 0x113FF}, {0x1145C, 0x1145C},
{0x11462, 0x1147F}, {0x114C8, 0x114CF}, {0x114DA, 0x1157F}, {0x115B6, 0x115B7}, {0x115DE, 0x115FF}, {0x11645, 0x1164F}, {0x1165A, 0x1165F}, {0x1166D, 0x1167F}, {0x116B9, 0x116BF}, {0x116CA, 0x116FF},
{0x1171B, 0x1171C}, {0x1172C, 0x1172F}, {0x11740, 0x117FF}, {0x1183C, 0x1189F}, {0x118F3, 0x118FE}, {0x11907, 0x11908}, {0x1190A, 0x1190B}, {0x11914, 0x11914}, {0x11917, 0x11917}, {0x11936, 0x11936},
{0x11939, 0x1193A}, {0x11947, 0x1194F}, {0x1195A, 0x1199F}, {0x119A8, 0x119A9}, {0x119D8, 0x119D9}, {0x119E5, 0x119FF}, {0x11A48, 0x11A4F}, {0x11AA3, 0x11ABF}, {0x11AF9, 0x11BFF}, {0x11C09, 0x11C09},
{0x11C37, 0x11C37}, {0x11C46, 0x11C4F}, {0x11C6D, 0x11C6F}, {0x11C90, 0x11C91}, {0x11CA8, 0x11CA8}, {0x11CB7, 0x11CFF}, {0x11D07, 0x11D07}, {0x11D0A, 0x11D0A}, {0x11D37, 0x11D39}, {0x11D3B, 0x11D3B},
{0x11D3E, 0x11D3E}, {0x11D48, 0x11D4F}, {0x11D5A, 0x11D5F}, {0x11D66, 0x11D66}, {0x11D69, 0x11D69}, {0x11D8F, 0x11D8F}, {0x11D92, 0x11D92}, {0x11D99, 0x11D9F}, {0x11DAA, 0x11EDF}, {0x11EF9, 0x11FAF},
{0x11FB1, 0x11FBF}, {0x11FF2, 0x11FFE}, {0x1239A, 0x123FF}, {0x1246F, 0x1246F}, {0x12475, 0x1247F}, {0x12544, 0x12FFF}, {0x1342F, 0x143FF}, {0x14647, 0x167FF}, {0x16A39, 0x16A3F}, {0x16A5F, 0x16A5F},
{0x16A6A, 0x16A6D}, {0x16A70, 0x16ACF}, {0x16AEE, 0x16AEF}, {0x16AF6, 0x16AFF}, {0x16B46, 0x16B4F}, {0x16B5A, 0x16B5A}, {0x16B62, 0x16B62}, {0x16B78, 0x16B7C}, {0x16B90, 0x16E3F}, {0x16E9B, 0x16EFF},
{0x16F4B, 0x16F4E}, {0x16F88, 0x16F8E}, {0x16FA0, 0x16FDF}, {0x16FE5, 0x16FEF}, {0x16FF2, 0x16FFF}, {0x187F8, 0x187FF}, {0x18CD6, 0x18CFF}, {0x18D09, 0x1AFFF}, {0x1B11F, 0x1B14F}, {0x1B153, 0x1B163},
{0x1B168, 0x1B16F}, {0x1B2FC, 0x1BBFF}, {0x1BC6B, 0x1BC6F}, {0x1BC7D, 0x1BC7F}, {0x1BC89, 0x1BC8F}, {0x1BC9A, 0x1BC9B}, {0x1BCA0, 0x1CFFF}, {0x1D0F6, 0x1D0FF}, {0x1D127, 0x1D128}, {0x1D173, 0x1D17A},
{0x1D1E9, 0x1D1FF}, {0x1D246, 0x1D2DF}, {0x1D2F4, 0x1D2FF}, {0x1D357, 0x1D35F}, {0x1D379, 0x1D3FF}, {0x1D455, 0x1D455}, {0x1D49D, 0x1D49D}, {0x1D4A0, 0x1D4A1}, {0x1D4A3, 0x1D4A4}, {0x1D4A7, 0x1D4A8},
{0x1D4AD, 0x1D4AD}, {0x1D4BA, 0x1D4BA}, {0x1D4BC, 0x1D4BC}, {0x1D4C4, 0x1D4C4}, {0x1D506, 0x1D506}, {0x1D50B, 0x1D50C}, {0x1D515, 0x1D515}, {0x1D51D, 0x1D51D}, {0x1D53A, 0x1D53A}, {0x1D53F, 0x1D53F},
{0x1D545, 0x1D545}, {0x1D547, 0x1D549}, {0x1D551, 0x1D551}, {0x1D6A6, 0x1D6A7}, {0x1D7CC, 0x1D7CD}, {0x1DA8C, 0x1DA9A}, {0x1DAA0, 0x1DAA0}, {0x1DAB0, 0x1DFFF}, {0x1E007, 0x1E007}, {0x1E019, 0x1E01A},
{0x1E022, 0x1E022}, {0x1E025, 0x1E025}, {0x1E02B, 0x1E0FF}, {0x1E12D, 0x1E12F}, {0x1E13E, 0x1E13F}, {0x1E14A, 0x1E14D}, {0x1E150, 0x1E2BF}, {0x1E2FA, 0x1E2FE}, {0x1E300, 0x1E7FF}, {0x1E8C5, 0x1E8C6},
{0x1E8D7, 0x1E8FF}, {0x1E94C, 0x1E94F}, {0x1E95A, 0x1E95D}, {0x1E960, 0x1EC70}, {0x1ECB5, 0x1ED00}, {0x1ED3E, 0x1EDFF}, {0x1EE04, 0x1EE04}, {0x1EE20, 0x1EE20}, {0x1EE23, 0x1EE23}, {0x1EE25, 0x1EE26},
{0x1EE28, 0x1EE28}, {0x1EE33, 0x1EE33}, {0x1EE38, 0x1EE38}, {0x1EE3A, 0x1EE3A}, {0x1EE3C, 0x1EE41}, {0x1EE43, 0x1EE46}, {0x1EE48, 0x1EE48}, {0x1EE4A, 0x1EE4A}, {0x1EE4C, 0x1EE4C}, {0x1EE50, 0x1EE50},
{0x1EE53, 0x1EE53}, {0x1EE55, 0x1EE56}, {0x1EE58, 0x1EE58}, {0x1EE5A, 0x1EE5A}, {0x1EE5C, 0x1EE5C}, {0x1EE5E, 0x1EE5E}, {0x1EE60, 0x1EE60}, {0x1EE63, 0x1EE63}, {0x1EE65, 0x1EE66}, {0x1EE6B, 0x1EE6B},
{0x1EE73, 0x1EE73}, {0x1EE78, 0x1EE78}, {0x1EE7D, 0x1EE7D}, {0x1EE7F, 0x1EE7F}, {0x1EE8A, 0x1EE8A}, {0x1EE9C, 0x1EEA0}, {0x1EEA4, 0x1EEA4}, {0x1EEAA, 0x1EEAA}, {0x1EEBC, 0x1EEEF}, {0x1EEF2, 0x1EFFF},
{0x1F02C, 0x1F02F}, {0x1F094, 0x1F09F}, {0x1F0AF, 0x1F0B0}, {0x1F0C0, 0x1F0C0}, {0x1F0D0, 0x1F0D0}, {0x1F0F6, 0x1F0FF}, {0x1F1AE, 0x1F1E5}, {0x1F203, 0x1F20F}, {0x1F23C, 0x1F23F}, {0x1F249, 0x1F24F},
{0x1F252, 0x1F25F}, {0x1F266, 0x1F2FF}, {0x1F6D8, 0x1F6DF}, {0x1F6ED, 0x1F6EF}, {0x1F6FD, 0x1F6FF}, {0x1F774, 0x1F77F}, {0x1F7D9, 0x1F7DF}, {0x1F7EC, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F},
{0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8AF}, {0x1F8B2, 0x1F8FF}, {0x1F979, 0x1F979}, {0x1F9CC, 0x1F9CC}, {0x1FA54, 0x1FA5F}, {0x1FA6E, 0x1FA6F}, {0x1FA75, 0x1FA77}, {0x1FA7B, 0x1FA7F},
{0x1FA87, 0x1FA8F}, {0x1FAA9, 0x1FAAF}, {0x1FAB7, 0x1FABF}, {0x1FAC3, 0x1FACF}, {0x1FAD7, 0x1FAFF}, {0x1FB93, 0x1FB93}, {0x1FBCB, 0x1FBEF}, {0x1FBFA, 0x1FFFF}, {0x2A6DE, 0x2A6FF}, {0x2B735, 0x2B73F},
{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
};
static const std::multimap<uint32_t, uint32_t> nfd_map = {
{0xC0, 0x41}, {0xC0, 0x300}, {0xC1, 0x41}, {0xC1, 0x301}, {0xC2, 0x41}, {0xC2, 0x302}, {0xC3, 0x41}, {0xC3, 0x303}, {0xC4, 0x41}, {0xC4, 0x308}, {0xC5, 0x41}, {0xC5, 0x30A}, {0xC7, 0x43},
{0xC7, 0x327}, {0xC8, 0x45}, {0xC8, 0x300}, {0xC9, 0x45}, {0xC9, 0x301}, {0xCA, 0x45}, {0xCA, 0x302}, {0xCB, 0x45}, {0xCB, 0x308}, {0xCC, 0x49}, {0xCC, 0x300}, {0xCD, 0x49}, {0xCD, 0x301},
{0xCE, 0x49}, {0xCE, 0x302}, {0xCF, 0x49}, {0xCF, 0x308}, {0xD1, 0x4E}, {0xD1, 0x303}, {0xD2, 0x4F}, {0xD2, 0x300}, {0xD3, 0x4F}, {0xD3, 0x301}, {0xD4, 0x4F}, {0xD4, 0x302}, {0xD5, 0x4F},
{0xD5, 0x303}, {0xD6, 0x4F}, {0xD6, 0x308}, {0xD9, 0x55}, {0xD9, 0x300}, {0xDA, 0x55}, {0xDA, 0x301}, {0xDB, 0x55}, {0xDB, 0x302}, {0xDC, 0x55}, {0xDC, 0x308}, {0xDD, 0x59}, {0xDD, 0x301},
{0xE0, 0x61}, {0xE0, 0x300}, {0xE1, 0x61}, {0xE1, 0x301}, {0xE2, 0x61}, {0xE2, 0x302}, {0xE3, 0x61}, {0xE3, 0x303}, {0xE4, 0x61}, {0xE4, 0x308}, {0xE5, 0x61}, {0xE5, 0x30A}, {0xE7, 0x63},
{0xE7, 0x327}, {0xE8, 0x65}, {0xE8, 0x300}, {0xE9, 0x65}, {0xE9, 0x301}, {0xEA, 0x65}, {0xEA, 0x302}, {0xEB, 0x65}, {0xEB, 0x308}, {0xEC, 0x69}, {0xEC, 0x300}, {0xED, 0x69}, {0xED, 0x301},
{0xEE, 0x69}, {0xEE, 0x302}, {0xEF, 0x69}, {0xEF, 0x308}, {0xF1, 0x6E}, {0xF1, 0x303}, {0xF2, 0x6F}, {0xF2, 0x300}, {0xF3, 0x6F}, {0xF3, 0x301}, {0xF4, 0x6F}, {0xF4, 0x302}, {0xF5, 0x6F},
{0xF5, 0x303}, {0xF6, 0x6F}, {0xF6, 0x308}, {0xF9, 0x75}, {0xF9, 0x300}, {0xFA, 0x75}, {0xFA, 0x301}, {0xFB, 0x75}, {0xFB, 0x302}, {0xFC, 0x75}, {0xFC, 0x308}, {0xFD, 0x79}, {0xFD, 0x301},
{0xFF, 0x79}, {0xFF, 0x308}, {0x100, 0x41}, {0x100, 0x304}, {0x101, 0x61}, {0x101, 0x304}, {0x102, 0x41}, {0x102, 0x306}, {0x103, 0x61}, {0x103, 0x306}, {0x104, 0x41}, {0x104, 0x328}, {0x105, 0x61},
{0x105, 0x328}, {0x106, 0x43}, {0x106, 0x301}, {0x107, 0x63}, {0x107, 0x301}, {0x108, 0x43}, {0x108, 0x302}, {0x109, 0x63}, {0x109, 0x302}, {0x10A, 0x43}, {0x10A, 0x307}, {0x10B, 0x63},
{0x10B, 0x307}, {0x10C, 0x43}, {0x10C, 0x30C}, {0x10D, 0x63}, {0x10D, 0x30C}, {0x10E, 0x44}, {0x10E, 0x30C}, {0x10F, 0x64}, {0x10F, 0x30C}, {0x112, 0x45}, {0x112, 0x304}, {0x113, 0x65},
{0x113, 0x304}, {0x114, 0x45}, {0x114, 0x306}, {0x115, 0x65}, {0x115, 0x306}, {0x116, 0x45}, {0x116, 0x307}, {0x117, 0x65}, {0x117, 0x307}, {0x118, 0x45}, {0x118, 0x328}, {0x119, 0x65},
{0x119, 0x328}, {0x11A, 0x45}, {0x11A, 0x30C}, {0x11B, 0x65}, {0x11B, 0x30C}, {0x11C, 0x47}, {0x11C, 0x302}, {0x11D, 0x67}, {0x11D, 0x302}, {0x11E, 0x47}, {0x11E, 0x306}, {0x11F, 0x67},
{0x11F, 0x306}, {0x120, 0x47}, {0x120, 0x307}, {0x121, 0x67}, {0x121, 0x307}, {0x122, 0x47}, {0x122, 0x327}, {0x123, 0x67}, {0x123, 0x327}, {0x124, 0x48}, {0x124, 0x302}, {0x125, 0x68},
{0x125, 0x302}, {0x128, 0x49}, {0x128, 0x303}, {0x129, 0x69}, {0x129, 0x303}, {0x12A, 0x49}, {0x12A, 0x304}, {0x12B, 0x69}, {0x12B, 0x304}, {0x12C, 0x49}, {0x12C, 0x306}, {0x12D, 0x69},
{0x12D, 0x306}, {0x12E, 0x49}, {0x12E, 0x328}, {0x12F, 0x69}, {0x12F, 0x328}, {0x130, 0x49}, {0x130, 0x307}, {0x134, 0x4A}, {0x134, 0x302}, {0x135, 0x6A}, {0x135, 0x302}, {0x136, 0x4B},
{0x136, 0x327}, {0x137, 0x6B}, {0x137, 0x327}, {0x139, 0x4C}, {0x139, 0x301}, {0x13A, 0x6C}, {0x13A, 0x301}, {0x13B, 0x4C}, {0x13B, 0x327}, {0x13C, 0x6C}, {0x13C, 0x327}, {0x13D, 0x4C},
{0x13D, 0x30C}, {0x13E, 0x6C}, {0x13E, 0x30C}, {0x143, 0x4E}, {0x143, 0x301}, {0x144, 0x6E}, {0x144, 0x301}, {0x145, 0x4E}, {0x145, 0x327}, {0x146, 0x6E}, {0x146, 0x327}, {0x147, 0x4E},
{0x147, 0x30C}, {0x148, 0x6E}, {0x148, 0x30C}, {0x14C, 0x4F}, {0x14C, 0x304}, {0x14D, 0x6F}, {0x14D, 0x304}, {0x14E, 0x4F}, {0x14E, 0x306}, {0x14F, 0x6F}, {0x14F, 0x306}, {0x150, 0x4F},
{0x150, 0x30B}, {0x151, 0x6F}, {0x151, 0x30B}, {0x154, 0x52}, {0x154, 0x301}, {0x155, 0x72}, {0x155, 0x301}, {0x156, 0x52}, {0x156, 0x327}, {0x157, 0x72}, {0x157, 0x327}, {0x158, 0x52},
{0x158, 0x30C}, {0x159, 0x72}, {0x159, 0x30C}, {0x15A, 0x53}, {0x15A, 0x301}, {0x15B, 0x73}, {0x15B, 0x301}, {0x15C, 0x53}, {0x15C, 0x302}, {0x15D, 0x73}, {0x15D, 0x302}, {0x15E, 0x53},
{0x15E, 0x327}, {0x15F, 0x73}, {0x15F, 0x327}, {0x160, 0x53}, {0x160, 0x30C}, {0x161, 0x73}, {0x161, 0x30C}, {0x162, 0x54}, {0x162, 0x327}, {0x163, 0x74}, {0x163, 0x327}, {0x164, 0x54},
{0x164, 0x30C}, {0x165, 0x74}, {0x165, 0x30C}, {0x168, 0x55}, {0x168, 0x303}, {0x169, 0x75}, {0x169, 0x303}, {0x16A, 0x55}, {0x16A, 0x304}, {0x16B, 0x75}, {0x16B, 0x304}, {0x16C, 0x55},
{0x16C, 0x306}, {0x16D, 0x75}, {0x16D, 0x306}, {0x16E, 0x55}, {0x16E, 0x30A}, {0x16F, 0x75}, {0x16F, 0x30A}, {0x170, 0x55}, {0x170, 0x30B}, {0x171, 0x75}, {0x171, 0x30B}, {0x172, 0x55},
{0x172, 0x328}, {0x173, 0x75}, {0x173, 0x328}, {0x174, 0x57}, {0x174, 0x302}, {0x175, 0x77}, {0x175, 0x302}, {0x176, 0x59}, {0x176, 0x302}, {0x177, 0x79}, {0x177, 0x302}, {0x178, 0x59},
{0x178, 0x308}, {0x179, 0x5A}, {0x179, 0x301}, {0x17A, 0x7A}, {0x17A, 0x301}, {0x17B, 0x5A}, {0x17B, 0x307}, {0x17C, 0x7A}, {0x17C, 0x307}, {0x17D, 0x5A}, {0x17D, 0x30C}, {0x17E, 0x7A},
{0x17E, 0x30C}, {0x1A0, 0x4F}, {0x1A0, 0x31B}, {0x1A1, 0x6F}, {0x1A1, 0x31B}, {0x1AF, 0x55}, {0x1AF, 0x31B}, {0x1B0, 0x75}, {0x1B0, 0x31B}, {0x1CD, 0x41}, {0x1CD, 0x30C}, {0x1CE, 0x61},
{0x1CE, 0x30C}, {0x1CF, 0x49}, {0x1CF, 0x30C}, {0x1D0, 0x69}, {0x1D0, 0x30C}, {0x1D1, 0x4F}, {0x1D1, 0x30C}, {0x1D2, 0x6F}, {0x1D2, 0x30C}, {0x1D3, 0x55}, {0x1D3, 0x30C}, {0x1D4, 0x75},
{0x1D4, 0x30C}, {0x1D5, 0x55}, {0x1D5, 0x308}, {0x1D5, 0x304}, {0x1D6, 0x75}, {0x1D6, 0x308}, {0x1D6, 0x304}, {0x1D7, 0x55}, {0x1D7, 0x308}, {0x1D7, 0x301}, {0x1D8, 0x75}, {0x1D8, 0x308},
{0x1D8, 0x301}, {0x1D9, 0x55}, {0x1D9, 0x308}, {0x1D9, 0x30C}, {0x1DA, 0x75}, {0x1DA, 0x308}, {0x1DA, 0x30C}, {0x1DB, 0x55}, {0x1DB, 0x308}, {0x1DB, 0x300}, {0x1DC, 0x75}, {0x1DC, 0x308},
{0x1DC, 0x300}, {0x1DE, 0x41}, {0x1DE, 0x308}, {0x1DE, 0x304}, {0x1DF, 0x61}, {0x1DF, 0x308}, {0x1DF, 0x304}, {0x1E0, 0x41}, {0x1E0, 0x307}, {0x1E0, 0x304}, {0x1E1, 0x61}, {0x1E1, 0x307},
{0x1E1, 0x304}, {0x1E2, 0xC6}, {0x1E2, 0x304}, {0x1E3, 0xE6}, {0x1E3, 0x304}, {0x1E6, 0x47}, {0x1E6, 0x30C}, {0x1E7, 0x67}, {0x1E7, 0x30C}, {0x1E8, 0x4B}, {0x1E8, 0x30C}, {0x1E9, 0x6B},
{0x1E9, 0x30C}, {0x1EA, 0x4F}, {0x1EA, 0x328}, {0x1EB, 0x6F}, {0x1EB, 0x328}, {0x1EC, 0x4F}, {0x1EC, 0x328}, {0x1EC, 0x304}, {0x1ED, 0x6F}, {0x1ED, 0x328}, {0x1ED, 0x304}, {0x1EE, 0x1B7},
{0x1EE, 0x30C}, {0x1EF, 0x292}, {0x1EF, 0x30C}, {0x1F0, 0x6A}, {0x1F0, 0x30C}, {0x1F4, 0x47}, {0x1F4, 0x301}, {0x1F5, 0x67}, {0x1F5, 0x301}, {0x1F8, 0x4E}, {0x1F8, 0x300}, {0x1F9, 0x6E},
{0x1F9, 0x300}, {0x1FA, 0x41}, {0x1FA, 0x30A}, {0x1FA, 0x301}, {0x1FB, 0x61}, {0x1FB, 0x30A}, {0x1FB, 0x301}, {0x1FC, 0xC6}, {0x1FC, 0x301}, {0x1FD, 0xE6}, {0x1FD, 0x301}, {0x1FE, 0xD8},
{0x1FE, 0x301}, {0x1FF, 0xF8}, {0x1FF, 0x301}, {0x200, 0x41}, {0x200, 0x30F}, {0x201, 0x61}, {0x201, 0x30F}, {0x202, 0x41}, {0x202, 0x311}, {0x203, 0x61}, {0x203, 0x311}, {0x204, 0x45},
{0x204, 0x30F}, {0x205, 0x65}, {0x205, 0x30F}, {0x206, 0x45}, {0x206, 0x311}, {0x207, 0x65}, {0x207, 0x311}, {0x208, 0x49}, {0x208, 0x30F}, {0x209, 0x69}, {0x209, 0x30F}, {0x20A, 0x49},
{0x20A, 0x311}, {0x20B, 0x69}, {0x20B, 0x311}, {0x20C, 0x4F}, {0x20C, 0x30F}, {0x20D, 0x6F}, {0x20D, 0x30F}, {0x20E, 0x4F}, {0x20E, 0x311}, {0x20F, 0x6F}, {0x20F, 0x311}, {0x210, 0x52},
{0x210, 0x30F}, {0x211, 0x72}, {0x211, 0x30F}, {0x212, 0x52}, {0x212, 0x311}, {0x213, 0x72}, {0x213, 0x311}, {0x214, 0x55}, {0x214, 0x30F}, {0x215, 0x75}, {0x215, 0x30F}, {0x216, 0x55},
{0x216, 0x311}, {0x217, 0x75}, {0x217, 0x311}, {0x218, 0x53}, {0x218, 0x326}, {0x219, 0x73}, {0x219, 0x326}, {0x21A, 0x54}, {0x21A, 0x326}, {0x21B, 0x74}, {0x21B, 0x326}, {0x21E, 0x48},
{0x21E, 0x30C}, {0x21F, 0x68}, {0x21F, 0x30C}, {0x226, 0x41}, {0x226, 0x307}, {0x227, 0x61}, {0x227, 0x307}, {0x228, 0x45}, {0x228, 0x327}, {0x229, 0x65}, {0x229, 0x327}, {0x22A, 0x4F},
{0x22A, 0x308}, {0x22A, 0x304}, {0x22B, 0x6F}, {0x22B, 0x308}, {0x22B, 0x304}, {0x22C, 0x4F}, {0x22C, 0x303}, {0x22C, 0x304}, {0x22D, 0x6F}, {0x22D, 0x303}, {0x22D, 0x304}, {0x22E, 0x4F},
{0x22E, 0x307}, {0x22F, 0x6F}, {0x22F, 0x307}, {0x230, 0x4F}, {0x230, 0x307}, {0x230, 0x304}, {0x231, 0x6F}, {0x231, 0x307}, {0x231, 0x304}, {0x232, 0x59}, {0x232, 0x304}, {0x233, 0x79},
{0x233, 0x304}, {0x340, 0x300}, {0x341, 0x301}, {0x343, 0x313}, {0x344, 0x308}, {0x344, 0x301}, {0x374, 0x2B9}, {0x37E, 0x3B}, {0x385, 0xA8}, {0x385, 0x301}, {0x386, 0x391}, {0x386, 0x301},
{0x387, 0xB7}, {0x388, 0x395}, {0x388, 0x301}, {0x389, 0x397}, {0x389, 0x301}, {0x38A, 0x399}, {0x38A, 0x301}, {0x38C, 0x39F}, {0x38C, 0x301}, {0x38E, 0x3A5}, {0x38E, 0x301}, {0x38F, 0x3A9},
{0x38F, 0x301}, {0x390, 0x3B9}, {0x390, 0x308}, {0x390, 0x301}, {0x3AA, 0x399}, {0x3AA, 0x308}, {0x3AB, 0x3A5}, {0x3AB, 0x308}, {0x3AC, 0x3B1}, {0x3AC, 0x301}, {0x3AD, 0x3B5}, {0x3AD, 0x301},
{0x3AE, 0x3B7}, {0x3AE, 0x301}, {0x3AF, 0x3B9}, {0x3AF, 0x301}, {0x3B0, 0x3C5}, {0x3B0, 0x308}, {0x3B0, 0x301}, {0x3CA, 0x3B9}, {0x3CA, 0x308}, {0x3CB, 0x3C5}, {0x3CB, 0x308}, {0x3CC, 0x3BF},
{0x3CC, 0x301}, {0x3CD, 0x3C5}, {0x3CD, 0x301}, {0x3CE, 0x3C9}, {0x3CE, 0x301}, {0x3D3, 0x3D2}, {0x3D3, 0x301}, {0x3D4, 0x3D2}, {0x3D4, 0x308}, {0x400, 0x415}, {0x400, 0x300}, {0x401, 0x415},
{0x401, 0x308}, {0x403, 0x413}, {0x403, 0x301}, {0x407, 0x406}, {0x407, 0x308}, {0x40C, 0x41A}, {0x40C, 0x301}, {0x40D, 0x418}, {0x40D, 0x300}, {0x40E, 0x423}, {0x40E, 0x306}, {0x419, 0x418},
{0x419, 0x306}, {0x439, 0x438}, {0x439, 0x306}, {0x450, 0x435}, {0x450, 0x300}, {0x451, 0x435}, {0x451, 0x308}, {0x453, 0x433}, {0x453, 0x301}, {0x457, 0x456}, {0x457, 0x308}, {0x45C, 0x43A},
{0x45C, 0x301}, {0x45D, 0x438}, {0x45D, 0x300}, {0x45E, 0x443}, {0x45E, 0x306}, {0x476, 0x474}, {0x476, 0x30F}, {0x477, 0x475}, {0x477, 0x30F}, {0x4C1, 0x416}, {0x4C1, 0x306}, {0x4C2, 0x436},
{0x4C2, 0x306}, {0x4D0, 0x410}, {0x4D0, 0x306}, {0x4D1, 0x430}, {0x4D1, 0x306}, {0x4D2, 0x410}, {0x4D2, 0x308}, {0x4D3, 0x430}, {0x4D3, 0x308}, {0x4D6, 0x415}, {0x4D6, 0x306}, {0x4D7, 0x435},
{0x4D7, 0x306}, {0x4DA, 0x4D8}, {0x4DA, 0x308}, {0x4DB, 0x4D9}, {0x4DB, 0x308}, {0x4DC, 0x416}, {0x4DC, 0x308}, {0x4DD, 0x436}, {0x4DD, 0x308}, {0x4DE, 0x417}, {0x4DE, 0x308}, {0x4DF, 0x437},
{0x4DF, 0x308}, {0x4E2, 0x418}, {0x4E2, 0x304}, {0x4E3, 0x438}, {0x4E3, 0x304}, {0x4E4, 0x418}, {0x4E4, 0x308}, {0x4E5, 0x438}, {0x4E5, 0x308}, {0x4E6, 0x41E}, {0x4E6, 0x308}, {0x4E7, 0x43E},
{0x4E7, 0x308}, {0x4EA, 0x4E8}, {0x4EA, 0x308}, {0x4EB, 0x4E9}, {0x4EB, 0x308}, {0x4EC, 0x42D}, {0x4EC, 0x308}, {0x4ED, 0x44D}, {0x4ED, 0x308}, {0x4EE, 0x423}, {0x4EE, 0x304}, {0x4EF, 0x443},
{0x4EF, 0x304}, {0x4F0, 0x423}, {0x4F0, 0x308}, {0x4F1, 0x443}, {0x4F1, 0x308}, {0x4F2, 0x423}, {0x4F2, 0x30B}, {0x4F3, 0x443}, {0x4F3, 0x30B}, {0x4F4, 0x427}, {0x4F4, 0x308}, {0x4F5, 0x447},
{0x4F5, 0x308}, {0x4F8, 0x42B}, {0x4F8, 0x308}, {0x4F9, 0x44B}, {0x4F9, 0x308}, {0x622, 0x627}, {0x622, 0x653}, {0x623, 0x627}, {0x623, 0x654}, {0x624, 0x648}, {0x624, 0x654}, {0x625, 0x627},
{0x625, 0x655}, {0x626, 0x64A}, {0x626, 0x654}, {0x6C0, 0x6D5}, {0x6C0, 0x654}, {0x6C2, 0x6C1}, {0x6C2, 0x654}, {0x6D3, 0x6D2}, {0x6D3, 0x654}, {0x929, 0x928}, {0x929, 0x93C}, {0x931, 0x930},
{0x931, 0x93C}, {0x934, 0x933}, {0x934, 0x93C}, {0x958, 0x915}, {0x958, 0x93C}, {0x959, 0x916}, {0x959, 0x93C}, {0x95A, 0x917}, {0x95A, 0x93C}, {0x95B, 0x91C}, {0x95B, 0x93C}, {0x95C, 0x921},
{0x95C, 0x93C}, {0x95D, 0x922}, {0x95D, 0x93C}, {0x95E, 0x92B}, {0x95E, 0x93C}, {0x95F, 0x92F}, {0x95F, 0x93C}, {0x9CB, 0x9C7}, {0x9CB, 0x9BE}, {0x9CC, 0x9C7}, {0x9CC, 0x9D7}, {0x9DC, 0x9A1},
{0x9DC, 0x9BC}, {0x9DD, 0x9A2}, {0x9DD, 0x9BC}, {0x9DF, 0x9AF}, {0x9DF, 0x9BC}, {0xA33, 0xA32}, {0xA33, 0xA3C}, {0xA36, 0xA38}, {0xA36, 0xA3C}, {0xA59, 0xA16}, {0xA59, 0xA3C}, {0xA5A, 0xA17},
{0xA5A, 0xA3C}, {0xA5B, 0xA1C}, {0xA5B, 0xA3C}, {0xA5E, 0xA2B}, {0xA5E, 0xA3C}, {0xB48, 0xB47}, {0xB48, 0xB56}, {0xB4B, 0xB47}, {0xB4B, 0xB3E}, {0xB4C, 0xB47}, {0xB4C, 0xB57}, {0xB5C, 0xB21},
{0xB5C, 0xB3C}, {0xB5D, 0xB22}, {0xB5D, 0xB3C}, {0xB94, 0xB92}, {0xB94, 0xBD7}, {0xBCA, 0xBC6}, {0xBCA, 0xBBE}, {0xBCB, 0xBC7}, {0xBCB, 0xBBE}, {0xBCC, 0xBC6}, {0xBCC, 0xBD7}, {0xC48, 0xC46},
{0xC48, 0xC56}, {0xCC0, 0xCBF}, {0xCC0, 0xCD5}, {0xCC7, 0xCC6}, {0xCC7, 0xCD5}, {0xCC8, 0xCC6}, {0xCC8, 0xCD6}, {0xCCA, 0xCC6}, {0xCCA, 0xCC2}, {0xCCB, 0xCC6}, {0xCCB, 0xCC2}, {0xCCB, 0xCD5},
{0xD4A, 0xD46}, {0xD4A, 0xD3E}, {0xD4B, 0xD47}, {0xD4B, 0xD3E}, {0xD4C, 0xD46}, {0xD4C, 0xD57}, {0xDDA, 0xDD9}, {0xDDA, 0xDCA}, {0xDDC, 0xDD9}, {0xDDC, 0xDCF}, {0xDDD, 0xDD9}, {0xDDD, 0xDCF},
{0xDDD, 0xDCA}, {0xDDE, 0xDD9}, {0xDDE, 0xDDF}, {0xF43, 0xF42}, {0xF43, 0xFB7}, {0xF4D, 0xF4C}, {0xF4D, 0xFB7}, {0xF52, 0xF51}, {0xF52, 0xFB7}, {0xF57, 0xF56}, {0xF57, 0xFB7}, {0xF5C, 0xF5B},
{0xF5C, 0xFB7}, {0xF69, 0xF40}, {0xF69, 0xFB5}, {0xF73, 0xF71}, {0xF73, 0xF72}, {0xF75, 0xF71}, {0xF75, 0xF74}, {0xF76, 0xFB2}, {0xF76, 0xF80}, {0xF78, 0xFB3}, {0xF78, 0xF80}, {0xF81, 0xF71},
{0xF81, 0xF80}, {0xF93, 0xF92}, {0xF93, 0xFB7}, {0xF9D, 0xF9C}, {0xF9D, 0xFB7}, {0xFA2, 0xFA1}, {0xFA2, 0xFB7}, {0xFA7, 0xFA6}, {0xFA7, 0xFB7}, {0xFAC, 0xFAB}, {0xFAC, 0xFB7}, {0xFB9, 0xF90},
{0xFB9, 0xFB5}, {0x1026, 0x1025}, {0x1026, 0x102E}, {0x1B06, 0x1B05}, {0x1B06, 0x1B35}, {0x1B08, 0x1B07}, {0x1B08, 0x1B35}, {0x1B0A, 0x1B09}, {0x1B0A, 0x1B35}, {0x1B0C, 0x1B0B}, {0x1B0C, 0x1B35},
{0x1B0E, 0x1B0D}, {0x1B0E, 0x1B35}, {0x1B12, 0x1B11}, {0x1B12, 0x1B35}, {0x1B3B, 0x1B3A}, {0x1B3B, 0x1B35}, {0x1B3D, 0x1B3C}, {0x1B3D, 0x1B35}, {0x1B40, 0x1B3E}, {0x1B40, 0x1B35}, {0x1B41, 0x1B3F},
{0x1B41, 0x1B35}, {0x1B43, 0x1B42}, {0x1B43, 0x1B35}, {0x1E00, 0x41}, {0x1E00, 0x325}, {0x1E01, 0x61}, {0x1E01, 0x325}, {0x1E02, 0x42}, {0x1E02, 0x307}, {0x1E03, 0x62}, {0x1E03, 0x307},
{0x1E04, 0x42}, {0x1E04, 0x323}, {0x1E05, 0x62}, {0x1E05, 0x323}, {0x1E06, 0x42}, {0x1E06, 0x331}, {0x1E07, 0x62}, {0x1E07, 0x331}, {0x1E08, 0x43}, {0x1E08, 0x327}, {0x1E08, 0x301}, {0x1E09, 0x63},
{0x1E09, 0x327}, {0x1E09, 0x301}, {0x1E0A, 0x44}, {0x1E0A, 0x307}, {0x1E0B, 0x64}, {0x1E0B, 0x307}, {0x1E0C, 0x44}, {0x1E0C, 0x323}, {0x1E0D, 0x64}, {0x1E0D, 0x323}, {0x1E0E, 0x44}, {0x1E0E, 0x331},
{0x1E0F, 0x64}, {0x1E0F, 0x331}, {0x1E10, 0x44}, {0x1E10, 0x327}, {0x1E11, 0x64}, {0x1E11, 0x327}, {0x1E12, 0x44}, {0x1E12, 0x32D}, {0x1E13, 0x64}, {0x1E13, 0x32D}, {0x1E14, 0x45}, {0x1E14, 0x304},
{0x1E14, 0x300}, {0x1E15, 0x65}, {0x1E15, 0x304}, {0x1E15, 0x300}, {0x1E16, 0x45}, {0x1E16, 0x304}, {0x1E16, 0x301}, {0x1E17, 0x65}, {0x1E17, 0x304}, {0x1E17, 0x301}, {0x1E18, 0x45}, {0x1E18, 0x32D},
{0x1E19, 0x65}, {0x1E19, 0x32D}, {0x1E1A, 0x45}, {0x1E1A, 0x330}, {0x1E1B, 0x65}, {0x1E1B, 0x330}, {0x1E1C, 0x45}, {0x1E1C, 0x327}, {0x1E1C, 0x306}, {0x1E1D, 0x65}, {0x1E1D, 0x327}, {0x1E1D, 0x306},
{0x1E1E, 0x46}, {0x1E1E, 0x307}, {0x1E1F, 0x66}, {0x1E1F, 0x307}, {0x1E20, 0x47}, {0x1E20, 0x304}, {0x1E21, 0x67}, {0x1E21, 0x304}, {0x1E22, 0x48}, {0x1E22, 0x307}, {0x1E23, 0x68}, {0x1E23, 0x307},
{0x1E24, 0x48}, {0x1E24, 0x323}, {0x1E25, 0x68}, {0x1E25, 0x323}, {0x1E26, 0x48}, {0x1E26, 0x308}, {0x1E27, 0x68}, {0x1E27, 0x308}, {0x1E28, 0x48}, {0x1E28, 0x327}, {0x1E29, 0x68}, {0x1E29, 0x327},
{0x1E2A, 0x48}, {0x1E2A, 0x32E}, {0x1E2B, 0x68}, {0x1E2B, 0x32E}, {0x1E2C, 0x49}, {0x1E2C, 0x330}, {0x1E2D, 0x69}, {0x1E2D, 0x330}, {0x1E2E, 0x49}, {0x1E2E, 0x308}, {0x1E2E, 0x301}, {0x1E2F, 0x69},
{0x1E2F, 0x308}, {0x1E2F, 0x301}, {0x1E30, 0x4B}, {0x1E30, 0x301}, {0x1E31, 0x6B}, {0x1E31, 0x301}, {0x1E32, 0x4B}, {0x1E32, 0x323}, {0x1E33, 0x6B}, {0x1E33, 0x323}, {0x1E34, 0x4B}, {0x1E34, 0x331},
{0x1E35, 0x6B}, {0x1E35, 0x331}, {0x1E36, 0x4C}, {0x1E36, 0x323}, {0x1E37, 0x6C}, {0x1E37, 0x323}, {0x1E38, 0x4C}, {0x1E38, 0x323}, {0x1E38, 0x304}, {0x1E39, 0x6C}, {0x1E39, 0x323}, {0x1E39, 0x304},
{0x1E3A, 0x4C}, {0x1E3A, 0x331}, {0x1E3B, 0x6C}, {0x1E3B, 0x331}, {0x1E3C, 0x4C}, {0x1E3C, 0x32D}, {0x1E3D, 0x6C}, {0x1E3D, 0x32D}, {0x1E3E, 0x4D}, {0x1E3E, 0x301}, {0x1E3F, 0x6D}, {0x1E3F, 0x301},
{0x1E40, 0x4D}, {0x1E40, 0x307}, {0x1E41, 0x6D}, {0x1E41, 0x307}, {0x1E42, 0x4D}, {0x1E42, 0x323}, {0x1E43, 0x6D}, {0x1E43, 0x323}, {0x1E44, 0x4E}, {0x1E44, 0x307}, {0x1E45, 0x6E}, {0x1E45, 0x307},
{0x1E46, 0x4E}, {0x1E46, 0x323}, {0x1E47, 0x6E}, {0x1E47, 0x323}, {0x1E48, 0x4E}, {0x1E48, 0x331}, {0x1E49, 0x6E}, {0x1E49, 0x331}, {0x1E4A, 0x4E}, {0x1E4A, 0x32D}, {0x1E4B, 0x6E}, {0x1E4B, 0x32D},
{0x1E4C, 0x4F}, {0x1E4C, 0x303}, {0x1E4C, 0x301}, {0x1E4D, 0x6F}, {0x1E4D, 0x303}, {0x1E4D, 0x301}, {0x1E4E, 0x4F}, {0x1E4E, 0x303}, {0x1E4E, 0x308}, {0x1E4F, 0x6F}, {0x1E4F, 0x303}, {0x1E4F, 0x308},
{0x1E50, 0x4F}, {0x1E50, 0x304}, {0x1E50, 0x300}, {0x1E51, 0x6F}, {0x1E51, 0x304}, {0x1E51, 0x300}, {0x1E52, 0x4F}, {0x1E52, 0x304}, {0x1E52, 0x301}, {0x1E53, 0x6F}, {0x1E53, 0x304}, {0x1E53, 0x301},
{0x1E54, 0x50}, {0x1E54, 0x301}, {0x1E55, 0x70}, {0x1E55, 0x301}, {0x1E56, 0x50}, {0x1E56, 0x307}, {0x1E57, 0x70}, {0x1E57, 0x307}, {0x1E58, 0x52}, {0x1E58, 0x307}, {0x1E59, 0x72}, {0x1E59, 0x307},
{0x1E5A, 0x52}, {0x1E5A, 0x323}, {0x1E5B, 0x72}, {0x1E5B, 0x323}, {0x1E5C, 0x52}, {0x1E5C, 0x323}, {0x1E5C, 0x304}, {0x1E5D, 0x72}, {0x1E5D, 0x323}, {0x1E5D, 0x304}, {0x1E5E, 0x52}, {0x1E5E, 0x331},
{0x1E5F, 0x72}, {0x1E5F, 0x331}, {0x1E60, 0x53}, {0x1E60, 0x307}, {0x1E61, 0x73}, {0x1E61, 0x307}, {0x1E62, 0x53}, {0x1E62, 0x323}, {0x1E63, 0x73}, {0x1E63, 0x323}, {0x1E64, 0x53}, {0x1E64, 0x301},
{0x1E64, 0x307}, {0x1E65, 0x73}, {0x1E65, 0x301}, {0x1E65, 0x307}, {0x1E66, 0x53}, {0x1E66, 0x30C}, {0x1E66, 0x307}, {0x1E67, 0x73}, {0x1E67, 0x30C}, {0x1E67, 0x307}, {0x1E68, 0x53}, {0x1E68, 0x323},
{0x1E68, 0x307}, {0x1E69, 0x73}, {0x1E69, 0x323}, {0x1E69, 0x307}, {0x1E6A, 0x54}, {0x1E6A, 0x307}, {0x1E6B, 0x74}, {0x1E6B, 0x307}, {0x1E6C, 0x54}, {0x1E6C, 0x323}, {0x1E6D, 0x74}, {0x1E6D, 0x323},
{0x1E6E, 0x54}, {0x1E6E, 0x331}, {0x1E6F, 0x74}, {0x1E6F, 0x331}, {0x1E70, 0x54}, {0x1E70, 0x32D}, {0x1E71, 0x74}, {0x1E71, 0x32D}, {0x1E72, 0x55}, {0x1E72, 0x324}, {0x1E73, 0x75}, {0x1E73, 0x324},
{0x1E74, 0x55}, {0x1E74, 0x330}, {0x1E75, 0x75}, {0x1E75, 0x330}, {0x1E76, 0x55}, {0x1E76, 0x32D}, {0x1E77, 0x75}, {0x1E77, 0x32D}, {0x1E78, 0x55}, {0x1E78, 0x303}, {0x1E78, 0x301}, {0x1E79, 0x75},
{0x1E79, 0x303}, {0x1E79, 0x301}, {0x1E7A, 0x55}, {0x1E7A, 0x304}, {0x1E7A, 0x308}, {0x1E7B, 0x75}, {0x1E7B, 0x304}, {0x1E7B, 0x308}, {0x1E7C, 0x56}, {0x1E7C, 0x303}, {0x1E7D, 0x76}, {0x1E7D, 0x303},
{0x1E7E, 0x56}, {0x1E7E, 0x323}, {0x1E7F, 0x76}, {0x1E7F, 0x323}, {0x1E80, 0x57}, {0x1E80, 0x300}, {0x1E81, 0x77}, {0x1E81, 0x300}, {0x1E82, 0x57}, {0x1E82, 0x301}, {0x1E83, 0x77}, {0x1E83, 0x301},
{0x1E84, 0x57}, {0x1E84, 0x308}, {0x1E85, 0x77}, {0x1E85, 0x308}, {0x1E86, 0x57}, {0x1E86, 0x307}, {0x1E87, 0x77}, {0x1E87, 0x307}, {0x1E88, 0x57}, {0x1E88, 0x323}, {0x1E89, 0x77}, {0x1E89, 0x323},
{0x1E8A, 0x58}, {0x1E8A, 0x307}, {0x1E8B, 0x78}, {0x1E8B, 0x307}, {0x1E8C, 0x58}, {0x1E8C, 0x308}, {0x1E8D, 0x78}, {0x1E8D, 0x308}, {0x1E8E, 0x59}, {0x1E8E, 0x307}, {0x1E8F, 0x79}, {0x1E8F, 0x307},
{0x1E90, 0x5A}, {0x1E90, 0x302}, {0x1E91, 0x7A}, {0x1E91, 0x302}, {0x1E92, 0x5A}, {0x1E92, 0x323}, {0x1E93, 0x7A}, {0x1E93, 0x323}, {0x1E94, 0x5A}, {0x1E94, 0x331}, {0x1E95, 0x7A}, {0x1E95, 0x331},
{0x1E96, 0x68}, {0x1E96, 0x331}, {0x1E97, 0x74}, {0x1E97, 0x308}, {0x1E98, 0x77}, {0x1E98, 0x30A}, {0x1E99, 0x79}, {0x1E99, 0x30A}, {0x1E9B, 0x17F}, {0x1E9B, 0x307}, {0x1EA0, 0x41}, {0x1EA0, 0x323},
{0x1EA1, 0x61}, {0x1EA1, 0x323}, {0x1EA2, 0x41}, {0x1EA2, 0x309}, {0x1EA3, 0x61}, {0x1EA3, 0x309}, {0x1EA4, 0x41}, {0x1EA4, 0x302}, {0x1EA4, 0x301}, {0x1EA5, 0x61}, {0x1EA5, 0x302}, {0x1EA5, 0x301},
{0x1EA6, 0x41}, {0x1EA6, 0x302}, {0x1EA6, 0x300}, {0x1EA7, 0x61}, {0x1EA7, 0x302}, {0x1EA7, 0x300}, {0x1EA8, 0x41}, {0x1EA8, 0x302}, {0x1EA8, 0x309}, {0x1EA9, 0x61}, {0x1EA9, 0x302}, {0x1EA9, 0x309},
{0x1EAA, 0x41}, {0x1EAA, 0x302}, {0x1EAA, 0x303}, {0x1EAB, 0x61}, {0x1EAB, 0x302}, {0x1EAB, 0x303}, {0x1EAC, 0x41}, {0x1EAC, 0x323}, {0x1EAC, 0x302}, {0x1EAD, 0x61}, {0x1EAD, 0x323}, {0x1EAD, 0x302},
{0x1EAE, 0x41}, {0x1EAE, 0x306}, {0x1EAE, 0x301}, {0x1EAF, 0x61}, {0x1EAF, 0x306}, {0x1EAF, 0x301}, {0x1EB0, 0x41}, {0x1EB0, 0x306}, {0x1EB0, 0x300}, {0x1EB1, 0x61}, {0x1EB1, 0x306}, {0x1EB1, 0x300},
{0x1EB2, 0x41}, {0x1EB2, 0x306}, {0x1EB2, 0x309}, {0x1EB3, 0x61}, {0x1EB3, 0x306}, {0x1EB3, 0x309}, {0x1EB4, 0x41}, {0x1EB4, 0x306}, {0x1EB4, 0x303}, {0x1EB5, 0x61}, {0x1EB5, 0x306}, {0x1EB5, 0x303},
{0x1EB6, 0x41}, {0x1EB6, 0x323}, {0x1EB6, 0x306}, {0x1EB7, 0x61}, {0x1EB7, 0x323}, {0x1EB7, 0x306}, {0x1EB8, 0x45}, {0x1EB8, 0x323}, {0x1EB9, 0x65}, {0x1EB9, 0x323}, {0x1EBA, 0x45}, {0x1EBA, 0x309},
{0x1EBB, 0x65}, {0x1EBB, 0x309}, {0x1EBC, 0x45}, {0x1EBC, 0x303}, {0x1EBD, 0x65}, {0x1EBD, 0x303}, {0x1EBE, 0x45}, {0x1EBE, 0x302}, {0x1EBE, 0x301}, {0x1EBF, 0x65}, {0x1EBF, 0x302}, {0x1EBF, 0x301},
{0x1EC0, 0x45}, {0x1EC0, 0x302}, {0x1EC0, 0x300}, {0x1EC1, 0x65}, {0x1EC1, 0x302}, {0x1EC1, 0x300}, {0x1EC2, 0x45}, {0x1EC2, 0x302}, {0x1EC2, 0x309}, {0x1EC3, 0x65}, {0x1EC3, 0x302}, {0x1EC3, 0x309},
{0x1EC4, 0x45}, {0x1EC4, 0x302}, {0x1EC4, 0x303}, {0x1EC5, 0x65}, {0x1EC5, 0x302}, {0x1EC5, 0x303}, {0x1EC6, 0x45}, {0x1EC6, 0x323}, {0x1EC6, 0x302}, {0x1EC7, 0x65}, {0x1EC7, 0x323}, {0x1EC7, 0x302},
{0x1EC8, 0x49}, {0x1EC8, 0x309}, {0x1EC9, 0x69}, {0x1EC9, 0x309}, {0x1ECA, 0x49}, {0x1ECA, 0x323}, {0x1ECB, 0x69}, {0x1ECB, 0x323}, {0x1ECC, 0x4F}, {0x1ECC, 0x323}, {0x1ECD, 0x6F}, {0x1ECD, 0x323},
{0x1ECE, 0x4F}, {0x1ECE, 0x309}, {0x1ECF, 0x6F}, {0x1ECF, 0x309}, {0x1ED0, 0x4F}, {0x1ED0, 0x302}, {0x1ED0, 0x301}, {0x1ED1, 0x6F}, {0x1ED1, 0x302}, {0x1ED1, 0x301}, {0x1ED2, 0x4F}, {0x1ED2, 0x302},
{0x1ED2, 0x300}, {0x1ED3, 0x6F}, {0x1ED3, 0x302}, {0x1ED3, 0x300}, {0x1ED4, 0x4F}, {0x1ED4, 0x302}, {0x1ED4, 0x309}, {0x1ED5, 0x6F}, {0x1ED5, 0x302}, {0x1ED5, 0x309}, {0x1ED6, 0x4F}, {0x1ED6, 0x302},
{0x1ED6, 0x303}, {0x1ED7, 0x6F}, {0x1ED7, 0x302}, {0x1ED7, 0x303}, {0x1ED8, 0x4F}, {0x1ED8, 0x323}, {0x1ED8, 0x302}, {0x1ED9, 0x6F}, {0x1ED9, 0x323}, {0x1ED9, 0x302}, {0x1EDA, 0x4F}, {0x1EDA, 0x31B},
{0x1EDA, 0x301}, {0x1EDB, 0x6F}, {0x1EDB, 0x31B}, {0x1EDB, 0x301}, {0x1EDC, 0x4F}, {0x1EDC, 0x31B}, {0x1EDC, 0x300}, {0x1EDD, 0x6F}, {0x1EDD, 0x31B}, {0x1EDD, 0x300}, {0x1EDE, 0x4F}, {0x1EDE, 0x31B},
{0x1EDE, 0x309}, {0x1EDF, 0x6F}, {0x1EDF, 0x31B}, {0x1EDF, 0x309}, {0x1EE0, 0x4F}, {0x1EE0, 0x31B}, {0x1EE0, 0x303}, {0x1EE1, 0x6F}, {0x1EE1, 0x31B}, {0x1EE1, 0x303}, {0x1EE2, 0x4F}, {0x1EE2, 0x31B},
{0x1EE2, 0x323}, {0x1EE3, 0x6F}, {0x1EE3, 0x31B}, {0x1EE3, 0x323}, {0x1EE4, 0x55}, {0x1EE4, 0x323}, {0x1EE5, 0x75}, {0x1EE5, 0x323}, {0x1EE6, 0x55}, {0x1EE6, 0x309}, {0x1EE7, 0x75}, {0x1EE7, 0x309},
{0x1EE8, 0x55}, {0x1EE8, 0x31B}, {0x1EE8, 0x301}, {0x1EE9, 0x75}, {0x1EE9, 0x31B}, {0x1EE9, 0x301}, {0x1EEA, 0x55}, {0x1EEA, 0x31B}, {0x1EEA, 0x300}, {0x1EEB, 0x75}, {0x1EEB, 0x31B}, {0x1EEB, 0x300},
{0x1EEC, 0x55}, {0x1EEC, 0x31B}, {0x1EEC, 0x309}, {0x1EED, 0x75}, {0x1EED, 0x31B}, {0x1EED, 0x309}, {0x1EEE, 0x55}, {0x1EEE, 0x31B}, {0x1EEE, 0x303}, {0x1EEF, 0x75}, {0x1EEF, 0x31B}, {0x1EEF, 0x303},
{0x1EF0, 0x55}, {0x1EF0, 0x31B}, {0x1EF0, 0x323}, {0x1EF1, 0x75}, {0x1EF1, 0x31B}, {0x1EF1, 0x323}, {0x1EF2, 0x59}, {0x1EF2, 0x300}, {0x1EF3, 0x79}, {0x1EF3, 0x300}, {0x1EF4, 0x59}, {0x1EF4, 0x323},
{0x1EF5, 0x79}, {0x1EF5, 0x323}, {0x1EF6, 0x59}, {0x1EF6, 0x309}, {0x1EF7, 0x79}, {0x1EF7, 0x309}, {0x1EF8, 0x59}, {0x1EF8, 0x303}, {0x1EF9, 0x79}, {0x1EF9, 0x303}, {0x1F00, 0x3B1}, {0x1F00, 0x313},
{0x1F01, 0x3B1}, {0x1F01, 0x314}, {0x1F02, 0x3B1}, {0x1F02, 0x313}, {0x1F02, 0x300}, {0x1F03, 0x3B1}, {0x1F03, 0x314}, {0x1F03, 0x300}, {0x1F04, 0x3B1}, {0x1F04, 0x313}, {0x1F04, 0x301},
{0x1F05, 0x3B1}, {0x1F05, 0x314}, {0x1F05, 0x301}, {0x1F06, 0x3B1}, {0x1F06, 0x313}, {0x1F06, 0x342}, {0x1F07, 0x3B1}, {0x1F07, 0x314}, {0x1F07, 0x342}, {0x1F08, 0x391}, {0x1F08, 0x313},
{0x1F09, 0x391}, {0x1F09, 0x314}, {0x1F0A, 0x391}, {0x1F0A, 0x313}, {0x1F0A, 0x300}, {0x1F0B, 0x391}, {0x1F0B, 0x314}, {0x1F0B, 0x300}, {0x1F0C, 0x391}, {0x1F0C, 0x313}, {0x1F0C, 0x301},
{0x1F0D, 0x391}, {0x1F0D, 0x314}, {0x1F0D, 0x301}, {0x1F0E, 0x391}, {0x1F0E, 0x313}, {0x1F0E, 0x342}, {0x1F0F, 0x391}, {0x1F0F, 0x314}, {0x1F0F, 0x342}, {0x1F10, 0x3B5}, {0x1F10, 0x313},
{0x1F11, 0x3B5}, {0x1F11, 0x314}, {0x1F12, 0x3B5}, {0x1F12, 0x313}, {0x1F12, 0x300}, {0x1F13, 0x3B5}, {0x1F13, 0x314}, {0x1F13, 0x300}, {0x1F14, 0x3B5}, {0x1F14, 0x313}, {0x1F14, 0x301},
{0x1F15, 0x3B5}, {0x1F15, 0x314}, {0x1F15, 0x301}, {0x1F18, 0x395}, {0x1F18, 0x313}, {0x1F19, 0x395}, {0x1F19, 0x314}, {0x1F1A, 0x395}, {0x1F1A, 0x313}, {0x1F1A, 0x300}, {0x1F1B, 0x395},
{0x1F1B, 0x314}, {0x1F1B, 0x300}, {0x1F1C, 0x395}, {0x1F1C, 0x313}, {0x1F1C, 0x301}, {0x1F1D, 0x395}, {0x1F1D, 0x314}, {0x1F1D, 0x301}, {0x1F20, 0x3B7}, {0x1F20, 0x313}, {0x1F21, 0x3B7},
{0x1F21, 0x314}, {0x1F22, 0x3B7}, {0x1F22, 0x313}, {0x1F22, 0x300}, {0x1F23, 0x3B7}, {0x1F23, 0x314}, {0x1F23, 0x300}, {0x1F24, 0x3B7}, {0x1F24, 0x313}, {0x1F24, 0x301}, {0x1F25, 0x3B7},
{0x1F25, 0x314}, {0x1F25, 0x301}, {0x1F26, 0x3B7}, {0x1F26, 0x313}, {0x1F26, 0x342}, {0x1F27, 0x3B7}, {0x1F27, 0x314}, {0x1F27, 0x342}, {0x1F28, 0x397}, {0x1F28, 0x313}, {0x1F29, 0x397},
{0x1F29, 0x314}, {0x1F2A, 0x397}, {0x1F2A, 0x313}, {0x1F2A, 0x300}, {0x1F2B, 0x397}, {0x1F2B, 0x314}, {0x1F2B, 0x300}, {0x1F2C, 0x397}, {0x1F2C, 0x313}, {0x1F2C, 0x301}, {0x1F2D, 0x397},
{0x1F2D, 0x314}, {0x1F2D, 0x301}, {0x1F2E, 0x397}, {0x1F2E, 0x313}, {0x1F2E, 0x342}, {0x1F2F, 0x397}, {0x1F2F, 0x314}, {0x1F2F, 0x342}, {0x1F30, 0x3B9}, {0x1F30, 0x313}, {0x1F31, 0x3B9},
{0x1F31, 0x314}, {0x1F32, 0x3B9}, {0x1F32, 0x313}, {0x1F32, 0x300}, {0x1F33, 0x3B9}, {0x1F33, 0x314}, {0x1F33, 0x300}, {0x1F34, 0x3B9}, {0x1F34, 0x313}, {0x1F34, 0x301}, {0x1F35, 0x3B9},
{0x1F35, 0x314}, {0x1F35, 0x301}, {0x1F36, 0x3B9}, {0x1F36, 0x313}, {0x1F36, 0x342}, {0x1F37, 0x3B9}, {0x1F37, 0x314}, {0x1F37, 0x342}, {0x1F38, 0x399}, {0x1F38, 0x313}, {0x1F39, 0x399},
{0x1F39, 0x314}, {0x1F3A, 0x399}, {0x1F3A, 0x313}, {0x1F3A, 0x300}, {0x1F3B, 0x399}, {0x1F3B, 0x314}, {0x1F3B, 0x300}, {0x1F3C, 0x399}, {0x1F3C, 0x313}, {0x1F3C, 0x301}, {0x1F3D, 0x399},
{0x1F3D, 0x314}, {0x1F3D, 0x301}, {0x1F3E, 0x399}, {0x1F3E, 0x313}, {0x1F3E, 0x342}, {0x1F3F, 0x399}, {0x1F3F, 0x314}, {0x1F3F, 0x342}, {0x1F40, 0x3BF}, {0x1F40, 0x313}, {0x1F41, 0x3BF},
{0x1F41, 0x314}, {0x1F42, 0x3BF}, {0x1F42, 0x313}, {0x1F42, 0x300}, {0x1F43, 0x3BF}, {0x1F43, 0x314}, {0x1F43, 0x300}, {0x1F44, 0x3BF}, {0x1F44, 0x313}, {0x1F44, 0x301}, {0x1F45, 0x3BF},
{0x1F45, 0x314}, {0x1F45, 0x301}, {0x1F48, 0x39F}, {0x1F48, 0x313}, {0x1F49, 0x39F}, {0x1F49, 0x314}, {0x1F4A, 0x39F}, {0x1F4A, 0x313}, {0x1F4A, 0x300}, {0x1F4B, 0x39F}, {0x1F4B, 0x314},
{0x1F4B, 0x300}, {0x1F4C, 0x39F}, {0x1F4C, 0x313}, {0x1F4C, 0x301}, {0x1F4D, 0x39F}, {0x1F4D, 0x314}, {0x1F4D, 0x301}, {0x1F50, 0x3C5}, {0x1F50, 0x313}, {0x1F51, 0x3C5}, {0x1F51, 0x314},
{0x1F52, 0x3C5}, {0x1F52, 0x313}, {0x1F52, 0x300}, {0x1F53, 0x3C5}, {0x1F53, 0x314}, {0x1F53, 0x300}, {0x1F54, 0x3C5}, {0x1F54, 0x313}, {0x1F54, 0x301}, {0x1F55, 0x3C5}, {0x1F55, 0x314},
{0x1F55, 0x301}, {0x1F56, 0x3C5}, {0x1F56, 0x313}, {0x1F56, 0x342}, {0x1F57, 0x3C5}, {0x1F57, 0x314}, {0x1F57, 0x342}, {0x1F59, 0x3A5}, {0x1F59, 0x314}, {0x1F5B, 0x3A5}, {0x1F5B, 0x314},
{0x1F5B, 0x300}, {0x1F5D, 0x3A5}, {0x1F5D, 0x314}, {0x1F5D, 0x301}, {0x1F5F, 0x3A5}, {0x1F5F, 0x314}, {0x1F5F, 0x342}, {0x1F60, 0x3C9}, {0x1F60, 0x313}, {0x1F61, 0x3C9}, {0x1F61, 0x314},
{0x1F62, 0x3C9}, {0x1F62, 0x313}, {0x1F62, 0x300}, {0x1F63, 0x3C9}, {0x1F63, 0x314}, {0x1F63, 0x300}, {0x1F64, 0x3C9}, {0x1F64, 0x313}, {0x1F64, 0x301}, {0x1F65, 0x3C9}, {0x1F65, 0x314},
{0x1F65, 0x301}, {0x1F66, 0x3C9}, {0x1F66, 0x313}, {0x1F66, 0x342}, {0x1F67, 0x3C9}, {0x1F67, 0x314}, {0x1F67, 0x342}, {0x1F68, 0x3A9}, {0x1F68, 0x313}, {0x1F69, 0x3A9}, {0x1F69, 0x314},
{0x1F6A, 0x3A9}, {0x1F6A, 0x313}, {0x1F6A, 0x300}, {0x1F6B, 0x3A9}, {0x1F6B, 0x314}, {0x1F6B, 0x300}, {0x1F6C, 0x3A9}, {0x1F6C, 0x313}, {0x1F6C, 0x301}, {0x1F6D, 0x3A9}, {0x1F6D, 0x314},
{0x1F6D, 0x301}, {0x1F6E, 0x3A9}, {0x1F6E, 0x313}, {0x1F6E, 0x342}, {0x1F6F, 0x3A9}, {0x1F6F, 0x314}, {0x1F6F, 0x342}, {0x1F70, 0x3B1}, {0x1F70, 0x300}, {0x1F71, 0x3B1}, {0x1F71, 0x301},
{0x1F72, 0x3B5}, {0x1F72, 0x300}, {0x1F73, 0x3B5}, {0x1F73, 0x301}, {0x1F74, 0x3B7}, {0x1F74, 0x300}, {0x1F75, 0x3B7}, {0x1F75, 0x301}, {0x1F76, 0x3B9}, {0x1F76, 0x300}, {0x1F77, 0x3B9},
{0x1F77, 0x301}, {0x1F78, 0x3BF}, {0x1F78, 0x300}, {0x1F79, 0x3BF}, {0x1F79, 0x301}, {0x1F7A, 0x3C5}, {0x1F7A, 0x300}, {0x1F7B, 0x3C5}, {0x1F7B, 0x301}, {0x1F7C, 0x3C9}, {0x1F7C, 0x300},
{0x1F7D, 0x3C9}, {0x1F7D, 0x301}, {0x1F80, 0x3B1}, {0x1F80, 0x313}, {0x1F80, 0x345}, {0x1F81, 0x3B1}, {0x1F81, 0x314}, {0x1F81, 0x345}, {0x1F82, 0x3B1}, {0x1F82, 0x313}, {0x1F82, 0x300},
{0x1F82, 0x345}, {0x1F83, 0x3B1}, {0x1F83, 0x314}, {0x1F83, 0x300}, {0x1F83, 0x345}, {0x1F84, 0x3B1}, {0x1F84, 0x313}, {0x1F84, 0x301}, {0x1F84, 0x345}, {0x1F85, 0x3B1}, {0x1F85, 0x314},
{0x1F85, 0x301}, {0x1F85, 0x345}, {0x1F86, 0x3B1}, {0x1F86, 0x313}, {0x1F86, 0x342}, {0x1F86, 0x345}, {0x1F87, 0x3B1}, {0x1F87, 0x314}, {0x1F87, 0x342}, {0x1F87, 0x345}, {0x1F88, 0x391},
{0x1F88, 0x313}, {0x1F88, 0x345}, {0x1F89, 0x391}, {0x1F89, 0x314}, {0x1F89, 0x345}, {0x1F8A, 0x391}, {0x1F8A, 0x313}, {0x1F8A, 0x300}, {0x1F8A, 0x345}, {0x1F8B, 0x391}, {0x1F8B, 0x314},
{0x1F8B, 0x300}, {0x1F8B, 0x345}, {0x1F8C, 0x391}, {0x1F8C, 0x313}, {0x1F8C, 0x301}, {0x1F8C, 0x345}, {0x1F8D, 0x391}, {0x1F8D, 0x314}, {0x1F8D, 0x301}, {0x1F8D, 0x345}, {0x1F8E, 0x391},
{0x1F8E, 0x313}, {0x1F8E, 0x342}, {0x1F8E, 0x345}, {0x1F8F, 0x391}, {0x1F8F, 0x314}, {0x1F8F, 0x342}, {0x1F8F, 0x345}, {0x1F90, 0x3B7}, {0x1F90, 0x313}, {0x1F90, 0x345}, {0x1F91, 0x3B7},
{0x1F91, 0x314}, {0x1F91, 0x345}, {0x1F92, 0x3B7}, {0x1F92, 0x313}, {0x1F92, 0x300}, {0x1F92, 0x345}, {0x1F93, 0x3B7}, {0x1F93, 0x314}, {0x1F93, 0x300}, {0x1F93, 0x345}, {0x1F94, 0x3B7},
{0x1F94, 0x313}, {0x1F94, 0x301}, {0x1F94, 0x345}, {0x1F95, 0x3B7}, {0x1F95, 0x314}, {0x1F95, 0x301}, {0x1F95, 0x345}, {0x1F96, 0x3B7}, {0x1F96, 0x313}, {0x1F96, 0x342}, {0x1F96, 0x345},
{0x1F97, 0x3B7}, {0x1F97, 0x314}, {0x1F97, 0x342}, {0x1F97, 0x345}, {0x1F98, 0x397}, {0x1F98, 0x313}, {0x1F98, 0x345}, {0x1F99, 0x397}, {0x1F99, 0x314}, {0x1F99, 0x345}, {0x1F9A, 0x397},
{0x1F9A, 0x313}, {0x1F9A, 0x300}, {0x1F9A, 0x345}, {0x1F9B, 0x397}, {0x1F9B, 0x314}, {0x1F9B, 0x300}, {0x1F9B, 0x345}, {0x1F9C, 0x397}, {0x1F9C, 0x313}, {0x1F9C, 0x301}, {0x1F9C, 0x345},
{0x1F9D, 0x397}, {0x1F9D, 0x314}, {0x1F9D, 0x301}, {0x1F9D, 0x345}, {0x1F9E, 0x397}, {0x1F9E, 0x313}, {0x1F9E, 0x342}, {0x1F9E, 0x345}, {0x1F9F, 0x397}, {0x1F9F, 0x314}, {0x1F9F, 0x342},
{0x1F9F, 0x345}, {0x1FA0, 0x3C9}, {0x1FA0, 0x313}, {0x1FA0, 0x345}, {0x1FA1, 0x3C9}, {0x1FA1, 0x314}, {0x1FA1, 0x345}, {0x1FA2, 0x3C9}, {0x1FA2, 0x313}, {0x1FA2, 0x300}, {0x1FA2, 0x345},
{0x1FA3, 0x3C9}, {0x1FA3, 0x314}, {0x1FA3, 0x300}, {0x1FA3, 0x345}, {0x1FA4, 0x3C9}, {0x1FA4, 0x313}, {0x1FA4, 0x301}, {0x1FA4, 0x345}, {0x1FA5, 0x3C9}, {0x1FA5, 0x314}, {0x1FA5, 0x301},
{0x1FA5, 0x345}, {0x1FA6, 0x3C9}, {0x1FA6, 0x313}, {0x1FA6, 0x342}, {0x1FA6, 0x345}, {0x1FA7, 0x3C9}, {0x1FA7, 0x314}, {0x1FA7, 0x342}, {0x1FA7, 0x345}, {0x1FA8, 0x3A9}, {0x1FA8, 0x313},
{0x1FA8, 0x345}, {0x1FA9, 0x3A9}, {0x1FA9, 0x314}, {0x1FA9, 0x345}, {0x1FAA, 0x3A9}, {0x1FAA, 0x313}, {0x1FAA, 0x300}, {0x1FAA, 0x345}, {0x1FAB, 0x3A9}, {0x1FAB, 0x314}, {0x1FAB, 0x300},
{0x1FAB, 0x345}, {0x1FAC, 0x3A9}, {0x1FAC, 0x313}, {0x1FAC, 0x301}, {0x1FAC, 0x345}, {0x1FAD, 0x3A9}, {0x1FAD, 0x314}, {0x1FAD, 0x301}, {0x1FAD, 0x345}, {0x1FAE, 0x3A9}, {0x1FAE, 0x313},
{0x1FAE, 0x342}, {0x1FAE, 0x345}, {0x1FAF, 0x3A9}, {0x1FAF, 0x314}, {0x1FAF, 0x342}, {0x1FAF, 0x345}, {0x1FB0, 0x3B1}, {0x1FB0, 0x306}, {0x1FB1, 0x3B1}, {0x1FB1, 0x304}, {0x1FB2, 0x3B1},
{0x1FB2, 0x300}, {0x1FB2, 0x345}, {0x1FB3, 0x3B1}, {0x1FB3, 0x345}, {0x1FB4, 0x3B1}, {0x1FB4, 0x301}, {0x1FB4, 0x345}, {0x1FB6, 0x3B1}, {0x1FB6, 0x342}, {0x1FB7, 0x3B1}, {0x1FB7, 0x342},
{0x1FB7, 0x345}, {0x1FB8, 0x391}, {0x1FB8, 0x306}, {0x1FB9, 0x391}, {0x1FB9, 0x304}, {0x1FBA, 0x391}, {0x1FBA, 0x300}, {0x1FBB, 0x391}, {0x1FBB, 0x301}, {0x1FBC, 0x391}, {0x1FBC, 0x345},
{0x1FBE, 0x3B9}, {0x1FC1, 0xA8}, {0x1FC1, 0x342}, {0x1FC2, 0x3B7}, {0x1FC2, 0x300}, {0x1FC2, 0x345}, {0x1FC3, 0x3B7}, {0x1FC3, 0x345}, {0x1FC4, 0x3B7}, {0x1FC4, 0x301}, {0x1FC4, 0x345},
{0x1FC6, 0x3B7}, {0x1FC6, 0x342}, {0x1FC7, 0x3B7}, {0x1FC7, 0x342}, {0x1FC7, 0x345}, {0x1FC8, 0x395}, {0x1FC8, 0x300}, {0x1FC9, 0x395}, {0x1FC9, 0x301}, {0x1FCA, 0x397}, {0x1FCA, 0x300},
{0x1FCB, 0x397}, {0x1FCB, 0x301}, {0x1FCC, 0x397}, {0x1FCC, 0x345}, {0x1FCD, 0x1FBF}, {0x1FCD, 0x300}, {0x1FCE, 0x1FBF}, {0x1FCE, 0x301}, {0x1FCF, 0x1FBF}, {0x1FCF, 0x342}, {0x1FD0, 0x3B9},
{0x1FD0, 0x306}, {0x1FD1, 0x3B9}, {0x1FD1, 0x304}, {0x1FD2, 0x3B9}, {0x1FD2, 0x308}, {0x1FD2, 0x300}, {0x1FD3, 0x3B9}, {0x1FD3, 0x308}, {0x1FD3, 0x301}, {0x1FD6, 0x3B9}, {0x1FD6, 0x342},
{0x1FD7, 0x3B9}, {0x1FD7, 0x308}, {0x1FD7, 0x342}, {0x1FD8, 0x399}, {0x1FD8, 0x306}, {0x1FD9, 0x399}, {0x1FD9, 0x304}, {0x1FDA, 0x399}, {0x1FDA, 0x300}, {0x1FDB, 0x399}, {0x1FDB, 0x301},
{0x1FDD, 0x1FFE}, {0x1FDD, 0x300}, {0x1FDE, 0x1FFE}, {0x1FDE, 0x301}, {0x1FDF, 0x1FFE}, {0x1FDF, 0x342}, {0x1FE0, 0x3C5}, {0x1FE0, 0x306}, {0x1FE1, 0x3C5}, {0x1FE1, 0x304}, {0x1FE2, 0x3C5},
{0x1FE2, 0x308}, {0x1FE2, 0x300}, {0x1FE3, 0x3C5}, {0x1FE3, 0x308}, {0x1FE3, 0x301}, {0x1FE4, 0x3C1}, {0x1FE4, 0x313}, {0x1FE5, 0x3C1}, {0x1FE5, 0x314}, {0x1FE6, 0x3C5}, {0x1FE6, 0x342},
{0x1FE7, 0x3C5}, {0x1FE7, 0x308}, {0x1FE7, 0x342}, {0x1FE8, 0x3A5}, {0x1FE8, 0x306}, {0x1FE9, 0x3A5}, {0x1FE9, 0x304}, {0x1FEA, 0x3A5}, {0x1FEA, 0x300}, {0x1FEB, 0x3A5}, {0x1FEB, 0x301},
{0x1FEC, 0x3A1}, {0x1FEC, 0x314}, {0x1FED, 0xA8}, {0x1FED, 0x300}, {0x1FEE, 0xA8}, {0x1FEE, 0x301}, {0x1FEF, 0x60}, {0x1FF2, 0x3C9}, {0x1FF2, 0x300}, {0x1FF2, 0x345}, {0x1FF3, 0x3C9}, {0x1FF3, 0x345},
{0x1FF4, 0x3C9}, {0x1FF4, 0x301}, {0x1FF4, 0x345}, {0x1FF6, 0x3C9}, {0x1FF6, 0x342}, {0x1FF7, 0x3C9}, {0x1FF7, 0x342}, {0x1FF7, 0x345}, {0x1FF8, 0x39F}, {0x1FF8, 0x300}, {0x1FF9, 0x39F},
{0x1FF9, 0x301}, {0x1FFA, 0x3A9}, {0x1FFA, 0x300}, {0x1FFB, 0x3A9}, {0x1FFB, 0x301}, {0x1FFC, 0x3A9}, {0x1FFC, 0x345}, {0x1FFD, 0xB4}, {0x2000, 0x2002}, {0x2001, 0x2003}, {0x2126, 0x3A9},
{0x212A, 0x4B}, {0x212B, 0x41}, {0x212B, 0x30A}, {0x219A, 0x2190}, {0x219A, 0x338}, {0x219B, 0x2192}, {0x219B, 0x338}, {0x21AE, 0x2194}, {0x21AE, 0x338}, {0x21CD, 0x21D0}, {0x21CD, 0x338},
{0x21CE, 0x21D4}, {0x21CE, 0x338}, {0x21CF, 0x21D2}, {0x21CF, 0x338}, {0x2204, 0x2203}, {0x2204, 0x338}, {0x2209, 0x2208}, {0x2209, 0x338}, {0x220C, 0x220B}, {0x220C, 0x338}, {0x2224, 0x2223},
{0x2224, 0x338}, {0x2226, 0x2225}, {0x2226, 0x338}, {0x2241, 0x223C}, {0x2241, 0x338}, {0x2244, 0x2243}, {0x2244, 0x338}, {0x2247, 0x2245}, {0x2247, 0x338}, {0x2249, 0x2248}, {0x2249, 0x338},
{0x2260, 0x3D}, {0x2260, 0x338}, {0x2262, 0x2261}, {0x2262, 0x338}, {0x226D, 0x224D}, {0x226D, 0x338}, {0x226E, 0x3C}, {0x226E, 0x338}, {0x226F, 0x3E}, {0x226F, 0x338}, {0x2270, 0x2264},
{0x2270, 0x338}, {0x2271, 0x2265}, {0x2271, 0x338}, {0x2274, 0x2272}, {0x2274, 0x338}, {0x2275, 0x2273}, {0x2275, 0x338}, {0x2278, 0x2276}, {0x2278, 0x338}, {0x2279, 0x2277}, {0x2279, 0x338},
{0x2280, 0x227A}, {0x2280, 0x338}, {0x2281, 0x227B}, {0x2281, 0x338}, {0x2284, 0x2282}, {0x2284, 0x338}, {0x2285, 0x2283}, {0x2285, 0x338}, {0x2288, 0x2286}, {0x2288, 0x338}, {0x2289, 0x2287},
{0x2289, 0x338}, {0x22AC, 0x22A2}, {0x22AC, 0x338}, {0x22AD, 0x22A8}, {0x22AD, 0x338}, {0x22AE, 0x22A9}, {0x22AE, 0x338}, {0x22AF, 0x22AB}, {0x22AF, 0x338}, {0x22E0, 0x227C}, {0x22E0, 0x338},
{0x22E1, 0x227D}, {0x22E1, 0x338}, {0x22E2, 0x2291}, {0x22E2, 0x338}, {0x22E3, 0x2292}, {0x22E3, 0x338}, {0x22EA, 0x22B2}, {0x22EA, 0x338}, {0x22EB, 0x22B3}, {0x22EB, 0x338}, {0x22EC, 0x22B4},
{0x22EC, 0x338}, {0x22ED, 0x22B5}, {0x22ED, 0x338}, {0x2329, 0x3008}, {0x232A, 0x3009}, {0x2ADC, 0x2ADD}, {0x2ADC, 0x338}, {0x304C, 0x304B}, {0x304C, 0x3099}, {0x304E, 0x304D}, {0x304E, 0x3099},
{0x3050, 0x304F}, {0x3050, 0x3099}, {0x3052, 0x3051}, {0x3052, 0x3099}, {0x3054, 0x3053}, {0x3054, 0x3099}, {0x3056, 0x3055}, {0x3056, 0x3099}, {0x3058, 0x3057}, {0x3058, 0x3099}, {0x305A, 0x3059},
{0x305A, 0x3099}, {0x305C, 0x305B}, {0x305C, 0x3099}, {0x305E, 0x305D}, {0x305E, 0x3099}, {0x3060, 0x305F}, {0x3060, 0x3099}, {0x3062, 0x3061}, {0x3062, 0x3099}, {0x3065, 0x3064}, {0x3065, 0x3099},
{0x3067, 0x3066}, {0x3067, 0x3099}, {0x3069, 0x3068}, {0x3069, 0x3099}, {0x3070, 0x306F}, {0x3070, 0x3099}, {0x3071, 0x306F}, {0x3071, 0x309A}, {0x3073, 0x3072}, {0x3073, 0x3099}, {0x3074, 0x3072},
{0x3074, 0x309A}, {0x3076, 0x3075}, {0x3076, 0x3099}, {0x3077, 0x3075}, {0x3077, 0x309A}, {0x3079, 0x3078}, {0x3079, 0x3099}, {0x307A, 0x3078}, {0x307A, 0x309A}, {0x307C, 0x307B}, {0x307C, 0x3099},
{0x307D, 0x307B}, {0x307D, 0x309A}, {0x3094, 0x3046}, {0x3094, 0x3099}, {0x309E, 0x309D}, {0x309E, 0x3099}, {0x30AC, 0x30AB}, {0x30AC, 0x3099}, {0x30AE, 0x30AD}, {0x30AE, 0x3099}, {0x30B0, 0x30AF},
{0x30B0, 0x3099}, {0x30B2, 0x30B1}, {0x30B2, 0x3099}, {0x30B4, 0x30B3}, {0x30B4, 0x3099}, {0x30B6, 0x30B5}, {0x30B6, 0x3099}, {0x30B8, 0x30B7}, {0x30B8, 0x3099}, {0x30BA, 0x30B9}, {0x30BA, 0x3099},
{0x30BC, 0x30BB}, {0x30BC, 0x3099}, {0x30BE, 0x30BD}, {0x30BE, 0x3099}, {0x30C0, 0x30BF}, {0x30C0, 0x3099}, {0x30C2, 0x30C1}, {0x30C2, 0x3099}, {0x30C5, 0x30C4}, {0x30C5, 0x3099}, {0x30C7, 0x30C6},
{0x30C7, 0x3099}, {0x30C9, 0x30C8}, {0x30C9, 0x3099}, {0x30D0, 0x30CF}, {0x30D0, 0x3099}, {0x30D1, 0x30CF}, {0x30D1, 0x309A}, {0x30D3, 0x30D2}, {0x30D3, 0x3099}, {0x30D4, 0x30D2}, {0x30D4, 0x309A},
{0x30D6, 0x30D5}, {0x30D6, 0x3099}, {0x30D7, 0x30D5}, {0x30D7, 0x309A}, {0x30D9, 0x30D8}, {0x30D9, 0x3099}, {0x30DA, 0x30D8}, {0x30DA, 0x309A}, {0x30DC, 0x30DB}, {0x30DC, 0x3099}, {0x30DD, 0x30DB},
{0x30DD, 0x309A}, {0x30F4, 0x30A6}, {0x30F4, 0x3099}, {0x30F7, 0x30EF}, {0x30F7, 0x3099}, {0x30F8, 0x30F0}, {0x30F8, 0x3099}, {0x30F9, 0x30F1}, {0x30F9, 0x3099}, {0x30FA, 0x30F2}, {0x30FA, 0x3099},
{0x30FE, 0x30FD}, {0x30FE, 0x3099}, {0xF900, 0x8C48}, {0xF901, 0x66F4}, {0xF902, 0x8ECA}, {0xF903, 0x8CC8}, {0xF904, 0x6ED1}, {0xF905, 0x4E32}, {0xF906, 0x53E5}, {0xF907, 0x9F9C}, {0xF908, 0x9F9C},
{0xF909, 0x5951}, {0xF90A, 0x91D1}, {0xF90B, 0x5587}, {0xF90C, 0x5948}, {0xF90D, 0x61F6}, {0xF90E, 0x7669}, {0xF90F, 0x7F85}, {0xF910, 0x863F}, {0xF911, 0x87BA}, {0xF912, 0x88F8}, {0xF913, 0x908F},
{0xF914, 0x6A02}, {0xF915, 0x6D1B}, {0xF916, 0x70D9}, {0xF917, 0x73DE}, {0xF918, 0x843D}, {0xF919, 0x916A}, {0xF91A, 0x99F1}, {0xF91B, 0x4E82}, {0xF91C, 0x5375}, {0xF91D, 0x6B04}, {0xF91E, 0x721B},
{0xF91F, 0x862D}, {0xF920, 0x9E1E}, {0xF921, 0x5D50}, {0xF922, 0x6FEB}, {0xF923, 0x85CD}, {0xF924, 0x8964}, {0xF925, 0x62C9}, {0xF926, 0x81D8}, {0xF927, 0x881F}, {0xF928, 0x5ECA}, {0xF929, 0x6717},
{0xF92A, 0x6D6A}, {0xF92B, 0x72FC}, {0xF92C, 0x90CE}, {0xF92D, 0x4F86}, {0xF92E, 0x51B7}, {0xF92F, 0x52DE}, {0xF930, 0x64C4}, {0xF931, 0x6AD3}, {0xF932, 0x7210}, {0xF933, 0x76E7}, {0xF934, 0x8001},
{0xF935, 0x8606}, {0xF936, 0x865C}, {0xF937, 0x8DEF}, {0xF938, 0x9732}, {0xF939, 0x9B6F}, {0xF93A, 0x9DFA}, {0xF93B, 0x788C}, {0xF93C, 0x797F}, {0xF93D, 0x7DA0}, {0xF93E, 0x83C9}, {0xF93F, 0x9304},
{0xF940, 0x9E7F}, {0xF941, 0x8AD6}, {0xF942, 0x58DF}, {0xF943, 0x5F04}, {0xF944, 0x7C60}, {0xF945, 0x807E}, {0xF946, 0x7262}, {0xF947, 0x78CA}, {0xF948, 0x8CC2}, {0xF949, 0x96F7}, {0xF94A, 0x58D8},
{0xF94B, 0x5C62}, {0xF94C, 0x6A13}, {0xF94D, 0x6DDA}, {0xF94E, 0x6F0F}, {0xF94F, 0x7D2F}, {0xF950, 0x7E37}, {0xF951, 0x964B}, {0xF952, 0x52D2}, {0xF953, 0x808B}, {0xF954, 0x51DC}, {0xF955, 0x51CC},
{0xF956, 0x7A1C}, {0xF957, 0x7DBE}, {0xF958, 0x83F1}, {0xF959, 0x9675}, {0xF95A, 0x8B80}, {0xF95B, 0x62CF}, {0xF95C, 0x6A02}, {0xF95D, 0x8AFE}, {0xF95E, 0x4E39}, {0xF95F, 0x5BE7}, {0xF960, 0x6012},
{0xF961, 0x7387}, {0xF962, 0x7570}, {0xF963, 0x5317}, {0xF964, 0x78FB}, {0xF965, 0x4FBF}, {0xF966, 0x5FA9}, {0xF967, 0x4E0D}, {0xF968, 0x6CCC}, {0xF969, 0x6578}, {0xF96A, 0x7D22}, {0xF96B, 0x53C3},
{0xF96C, 0x585E}, {0xF96D, 0x7701}, {0xF96E, 0x8449}, {0xF96F, 0x8AAA}, {0xF970, 0x6BBA}, {0xF971, 0x8FB0}, {0xF972, 0x6C88}, {0xF973, 0x62FE}, {0xF974, 0x82E5}, {0xF975, 0x63A0}, {0xF976, 0x7565},
{0xF977, 0x4EAE}, {0xF978, 0x5169}, {0xF979, 0x51C9}, {0xF97A, 0x6881}, {0xF97B, 0x7CE7}, {0xF97C, 0x826F}, {0xF97D, 0x8AD2}, {0xF97E, 0x91CF}, {0xF97F, 0x52F5}, {0xF980, 0x5442}, {0xF981, 0x5973},
{0xF982, 0x5EEC}, {0xF983, 0x65C5}, {0xF984, 0x6FFE}, {0xF985, 0x792A}, {0xF986, 0x95AD}, {0xF987, 0x9A6A}, {0xF988, 0x9E97}, {0xF989, 0x9ECE}, {0xF98A, 0x529B}, {0xF98B, 0x66C6}, {0xF98C, 0x6B77},
{0xF98D, 0x8F62}, {0xF98E, 0x5E74}, {0xF98F, 0x6190}, {0xF990, 0x6200}, {0xF991, 0x649A}, {0xF992, 0x6F23}, {0xF993, 0x7149}, {0xF994, 0x7489}, {0xF995, 0x79CA}, {0xF996, 0x7DF4}, {0xF997, 0x806F},
{0xF998, 0x8F26}, {0xF999, 0x84EE}, {0xF99A, 0x9023}, {0xF99B, 0x934A}, {0xF99C, 0x5217}, {0xF99D, 0x52A3}, {0xF99E, 0x54BD}, {0xF99F, 0x70C8}, {0xF9A0, 0x88C2}, {0xF9A1, 0x8AAA}, {0xF9A2, 0x5EC9},
{0xF9A3, 0x5FF5}, {0xF9A4, 0x637B}, {0xF9A5, 0x6BAE}, {0xF9A6, 0x7C3E}, {0xF9A7, 0x7375}, {0xF9A8, 0x4EE4}, {0xF9A9, 0x56F9}, {0xF9AA, 0x5BE7}, {0xF9AB, 0x5DBA}, {0xF9AC, 0x601C}, {0xF9AD, 0x73B2},
{0xF9AE, 0x7469}, {0xF9AF, 0x7F9A}, {0xF9B0, 0x8046}, {0xF9B1, 0x9234}, {0xF9B2, 0x96F6}, {0xF9B3, 0x9748}, {0xF9B4, 0x9818}, {0xF9B5, 0x4F8B}, {0xF9B6, 0x79AE}, {0xF9B7, 0x91B4}, {0xF9B8, 0x96B8},
{0xF9B9, 0x60E1}, {0xF9BA, 0x4E86}, {0xF9BB, 0x50DA}, {0xF9BC, 0x5BEE}, {0xF9BD, 0x5C3F}, {0xF9BE, 0x6599}, {0xF9BF, 0x6A02}, {0xF9C0, 0x71CE}, {0xF9C1, 0x7642}, {0xF9C2, 0x84FC}, {0xF9C3, 0x907C},
{0xF9C4, 0x9F8D}, {0xF9C5, 0x6688}, {0xF9C6, 0x962E}, {0xF9C7, 0x5289}, {0xF9C8, 0x677B}, {0xF9C9, 0x67F3}, {0xF9CA, 0x6D41}, {0xF9CB, 0x6E9C}, {0xF9CC, 0x7409}, {0xF9CD, 0x7559}, {0xF9CE, 0x786B},
{0xF9CF, 0x7D10}, {0xF9D0, 0x985E}, {0xF9D1, 0x516D}, {0xF9D2, 0x622E}, {0xF9D3, 0x9678}, {0xF9D4, 0x502B}, {0xF9D5, 0x5D19}, {0xF9D6, 0x6DEA}, {0xF9D7, 0x8F2A}, {0xF9D8, 0x5F8B}, {0xF9D9, 0x6144},
{0xF9DA, 0x6817}, {0xF9DB, 0x7387}, {0xF9DC, 0x9686}, {0xF9DD, 0x5229}, {0xF9DE, 0x540F}, {0xF9DF, 0x5C65}, {0xF9E0, 0x6613}, {0xF9E1, 0x674E}, {0xF9E2, 0x68A8}, {0xF9E3, 0x6CE5}, {0xF9E4, 0x7406},
{0xF9E5, 0x75E2}, {0xF9E6, 0x7F79}, {0xF9E7, 0x88CF}, {0xF9E8, 0x88E1}, {0xF9E9, 0x91CC}, {0xF9EA, 0x96E2}, {0xF9EB, 0x533F}, {0xF9EC, 0x6EBA}, {0xF9ED, 0x541D}, {0xF9EE, 0x71D0}, {0xF9EF, 0x7498},
{0xF9F0, 0x85FA}, {0xF9F1, 0x96A3}, {0xF9F2, 0x9C57}, {0xF9F3, 0x9E9F}, {0xF9F4, 0x6797}, {0xF9F5, 0x6DCB}, {0xF9F6, 0x81E8}, {0xF9F7, 0x7ACB}, {0xF9F8, 0x7B20}, {0xF9F9, 0x7C92}, {0xF9FA, 0x72C0},
{0xF9FB, 0x7099}, {0xF9FC, 0x8B58}, {0xF9FD, 0x4EC0}, {0xF9FE, 0x8336}, {0xF9FF, 0x523A}, {0xFA00, 0x5207}, {0xFA01, 0x5EA6}, {0xFA02, 0x62D3}, {0xFA03, 0x7CD6}, {0xFA04, 0x5B85}, {0xFA05, 0x6D1E},
{0xFA06, 0x66B4}, {0xFA07, 0x8F3B}, {0xFA08, 0x884C}, {0xFA09, 0x964D}, {0xFA0A, 0x898B}, {0xFA0B, 0x5ED3}, {0xFA0C, 0x5140}, {0xFA0D, 0x55C0}, {0xFA10, 0x585A}, {0xFA12, 0x6674}, {0xFA15, 0x51DE},
{0xFA16, 0x732A}, {0xFA17, 0x76CA}, {0xFA18, 0x793C}, {0xFA19, 0x795E}, {0xFA1A, 0x7965}, {0xFA1B, 0x798F}, {0xFA1C, 0x9756}, {0xFA1D, 0x7CBE}, {0xFA1E, 0x7FBD}, {0xFA20, 0x8612}, {0xFA22, 0x8AF8},
{0xFA25, 0x9038}, {0xFA26, 0x90FD}, {0xFA2A, 0x98EF}, {0xFA2B, 0x98FC}, {0xFA2C, 0x9928}, {0xFA2D, 0x9DB4}, {0xFA2E, 0x90DE}, {0xFA2F, 0x96B7}, {0xFA30, 0x4FAE}, {0xFA31, 0x50E7}, {0xFA32, 0x514D},
{0xFA33, 0x52C9}, {0xFA34, 0x52E4}, {0xFA35, 0x5351}, {0xFA36, 0x559D}, {0xFA37, 0x5606}, {0xFA38, 0x5668}, {0xFA39, 0x5840}, {0xFA3A, 0x58A8}, {0xFA3B, 0x5C64}, {0xFA3C, 0x5C6E}, {0xFA3D, 0x6094},
{0xFA3E, 0x6168}, {0xFA3F, 0x618E}, {0xFA40, 0x61F2}, {0xFA41, 0x654F}, {0xFA42, 0x65E2}, {0xFA43, 0x6691}, {0xFA44, 0x6885}, {0xFA45, 0x6D77}, {0xFA46, 0x6E1A}, {0xFA47, 0x6F22}, {0xFA48, 0x716E},
{0xFA49, 0x722B}, {0xFA4A, 0x7422}, {0xFA4B, 0x7891}, {0xFA4C, 0x793E}, {0xFA4D, 0x7949}, {0xFA4E, 0x7948}, {0xFA4F, 0x7950}, {0xFA50, 0x7956}, {0xFA51, 0x795D}, {0xFA52, 0x798D}, {0xFA53, 0x798E},
{0xFA54, 0x7A40}, {0xFA55, 0x7A81}, {0xFA56, 0x7BC0}, {0xFA57, 0x7DF4}, {0xFA58, 0x7E09}, {0xFA59, 0x7E41}, {0xFA5A, 0x7F72}, {0xFA5B, 0x8005}, {0xFA5C, 0x81ED}, {0xFA5D, 0x8279}, {0xFA5E, 0x8279},
{0xFA5F, 0x8457}, {0xFA60, 0x8910}, {0xFA61, 0x8996}, {0xFA62, 0x8B01}, {0xFA63, 0x8B39}, {0xFA64, 0x8CD3}, {0xFA65, 0x8D08}, {0xFA66, 0x8FB6}, {0xFA67, 0x9038}, {0xFA68, 0x96E3}, {0xFA69, 0x97FF},
{0xFA6A, 0x983B}, {0xFA6B, 0x6075}, {0xFA6C, 0x242EE}, {0xFA6D, 0x8218}, {0xFA70, 0x4E26}, {0xFA71, 0x51B5}, {0xFA72, 0x5168}, {0xFA73, 0x4F80}, {0xFA74, 0x5145}, {0xFA75, 0x5180}, {0xFA76, 0x52C7},
{0xFA77, 0x52FA}, {0xFA78, 0x559D}, {0xFA79, 0x5555}, {0xFA7A, 0x5599}, {0xFA7B, 0x55E2}, {0xFA7C, 0x585A}, {0xFA7D, 0x58B3}, {0xFA7E, 0x5944}, {0xFA7F, 0x5954}, {0xFA80, 0x5A62}, {0xFA81, 0x5B28},
{0xFA82, 0x5ED2}, {0xFA83, 0x5ED9}, {0xFA84, 0x5F69}, {0xFA85, 0x5FAD}, {0xFA86, 0x60D8}, {0xFA87, 0x614E}, {0xFA88, 0x6108}, {0xFA89, 0x618E}, {0xFA8A, 0x6160}, {0xFA8B, 0x61F2}, {0xFA8C, 0x6234},
{0xFA8D, 0x63C4}, {0xFA8E, 0x641C}, {0xFA8F, 0x6452}, {0xFA90, 0x6556}, {0xFA91, 0x6674}, {0xFA92, 0x6717}, {0xFA93, 0x671B}, {0xFA94, 0x6756}, {0xFA95, 0x6B79}, {0xFA96, 0x6BBA}, {0xFA97, 0x6D41},
{0xFA98, 0x6EDB}, {0xFA99, 0x6ECB}, {0xFA9A, 0x6F22}, {0xFA9B, 0x701E}, {0xFA9C, 0x716E}, {0xFA9D, 0x77A7}, {0xFA9E, 0x7235}, {0xFA9F, 0x72AF}, {0xFAA0, 0x732A}, {0xFAA1, 0x7471}, {0xFAA2, 0x7506},
{0xFAA3, 0x753B}, {0xFAA4, 0x761D}, {0xFAA5, 0x761F}, {0xFAA6, 0x76CA}, {0xFAA7, 0x76DB}, {0xFAA8, 0x76F4}, {0xFAA9, 0x774A}, {0xFAAA, 0x7740}, {0xFAAB, 0x78CC}, {0xFAAC, 0x7AB1}, {0xFAAD, 0x7BC0},
{0xFAAE, 0x7C7B}, {0xFAAF, 0x7D5B}, {0xFAB0, 0x7DF4}, {0xFAB1, 0x7F3E}, {0xFAB2, 0x8005}, {0xFAB3, 0x8352}, {0xFAB4, 0x83EF}, {0xFAB5, 0x8779}, {0xFAB6, 0x8941}, {0xFAB7, 0x8986}, {0xFAB8, 0x8996},
{0xFAB9, 0x8ABF}, {0xFABA, 0x8AF8}, {0xFABB, 0x8ACB}, {0xFABC, 0x8B01}, {0xFABD, 0x8AFE}, {0xFABE, 0x8AED}, {0xFABF, 0x8B39}, {0xFAC0, 0x8B8A}, {0xFAC1, 0x8D08}, {0xFAC2, 0x8F38}, {0xFAC3, 0x9072},
{0xFAC4, 0x9199}, {0xFAC5, 0x9276}, {0xFAC6, 0x967C}, {0xFAC7, 0x96E3}, {0xFAC8, 0x9756}, {0xFAC9, 0x97DB}, {0xFACA, 0x97FF}, {0xFACB, 0x980B}, {0xFACC, 0x983B}, {0xFACD, 0x9B12}, {0xFACE, 0x9F9C},
{0xFACF, 0x2284A}, {0xFAD0, 0x22844}, {0xFAD1, 0x233D5}, {0xFAD2, 0x3B9D}, {0xFAD3, 0x4018}, {0xFAD4, 0x4039}, {0xFAD5, 0x25249}, {0xFAD6, 0x25CD0}, {0xFAD7, 0x27ED3}, {0xFAD8, 0x9F43},
{0xFAD9, 0x9F8E}, {0xFB1D, 0x5D9}, {0xFB1D, 0x5B4}, {0xFB1F, 0x5F2}, {0xFB1F, 0x5B7}, {0xFB2A, 0x5E9}, {0xFB2A, 0x5C1}, {0xFB2B, 0x5E9}, {0xFB2B, 0x5C2}, {0xFB2C, 0x5E9}, {0xFB2C, 0x5BC},
{0xFB2C, 0x5C1}, {0xFB2D, 0x5E9}, {0xFB2D, 0x5BC}, {0xFB2D, 0x5C2}, {0xFB2E, 0x5D0}, {0xFB2E, 0x5B7}, {0xFB2F, 0x5D0}, {0xFB2F, 0x5B8}, {0xFB30, 0x5D0}, {0xFB30, 0x5BC}, {0xFB31, 0x5D1},
{0xFB31, 0x5BC}, {0xFB32, 0x5D2}, {0xFB32, 0x5BC}, {0xFB33, 0x5D3}, {0xFB33, 0x5BC}, {0xFB34, 0x5D4}, {0xFB34, 0x5BC}, {0xFB35, 0x5D5}, {0xFB35, 0x5BC}, {0xFB36, 0x5D6}, {0xFB36, 0x5BC},
{0xFB38, 0x5D8}, {0xFB38, 0x5BC}, {0xFB39, 0x5D9}, {0xFB39, 0x5BC}, {0xFB3A, 0x5DA}, {0xFB3A, 0x5BC}, {0xFB3B, 0x5DB}, {0xFB3B, 0x5BC}, {0xFB3C, 0x5DC}, {0xFB3C, 0x5BC}, {0xFB3E, 0x5DE},
{0xFB3E, 0x5BC}, {0xFB40, 0x5E0}, {0xFB40, 0x5BC}, {0xFB41, 0x5E1}, {0xFB41, 0x5BC}, {0xFB43, 0x5E3}, {0xFB43, 0x5BC}, {0xFB44, 0x5E4}, {0xFB44, 0x5BC}, {0xFB46, 0x5E6}, {0xFB46, 0x5BC},
{0xFB47, 0x5E7}, {0xFB47, 0x5BC}, {0xFB48, 0x5E8}, {0xFB48, 0x5BC}, {0xFB49, 0x5E9}, {0xFB49, 0x5BC}, {0xFB4A, 0x5EA}, {0xFB4A, 0x5BC}, {0xFB4B, 0x5D5}, {0xFB4B, 0x5B9}, {0xFB4C, 0x5D1},
{0xFB4C, 0x5BF}, {0xFB4D, 0x5DB}, {0xFB4D, 0x5BF}, {0xFB4E, 0x5E4}, {0xFB4E, 0x5BF}, {0x1109A, 0x11099}, {0x1109A, 0x110BA}, {0x1109C, 0x1109B}, {0x1109C, 0x110BA}, {0x110AB, 0x110A5},
{0x110AB, 0x110BA}, {0x1112E, 0x11131}, {0x1112E, 0x11127}, {0x1112F, 0x11132}, {0x1112F, 0x11127}, {0x1134B, 0x11347}, {0x1134B, 0x1133E}, {0x1134C, 0x11347}, {0x1134C, 0x11357}, {0x114BB, 0x114B9},
{0x114BB, 0x114BA}, {0x114BC, 0x114B9}, {0x114BC, 0x114B0}, {0x114BE, 0x114B9}, {0x114BE, 0x114BD}, {0x115BA, 0x115B8}, {0x115BA, 0x115AF}, {0x115BB, 0x115B9}, {0x115BB, 0x115AF}, {0x1D15E, 0x1D157},
{0x1D15E, 0x1D165}, {0x1D15F, 0x1D158}, {0x1D15F, 0x1D165}, {0x1D160, 0x1D158}, {0x1D160, 0x1D165}, {0x1D160, 0x1D16E}, {0x1D161, 0x1D158}, {0x1D161, 0x1D165}, {0x1D161, 0x1D16F}, {0x1D162, 0x1D158},
{0x1D162, 0x1D165}, {0x1D162, 0x1D170}, {0x1D163, 0x1D158}, {0x1D163, 0x1D165}, {0x1D163, 0x1D171}, {0x1D164, 0x1D158}, {0x1D164, 0x1D165}, {0x1D164, 0x1D172}, {0x1D1BB, 0x1D1B9}, {0x1D1BB, 0x1D165},
{0x1D1BC, 0x1D1BA}, {0x1D1BC, 0x1D165}, {0x1D1BD, 0x1D1B9}, {0x1D1BD, 0x1D165}, {0x1D1BD, 0x1D16E}, {0x1D1BE, 0x1D1BA}, {0x1D1BE, 0x1D165}, {0x1D1BE, 0x1D16E}, {0x1D1BF, 0x1D1B9}, {0x1D1BF, 0x1D165},
{0x1D1BF, 0x1D16F}, {0x1D1C0, 0x1D1BA}, {0x1D1C0, 0x1D165}, {0x1D1C0, 0x1D16F}, {0x2F800, 0x4E3D}, {0x2F801, 0x4E38}, {0x2F802, 0x4E41}, {0x2F803, 0x20122}, {0x2F804, 0x4F60}, {0x2F805, 0x4FAE},
{0x2F806, 0x4FBB}, {0x2F807, 0x5002}, {0x2F808, 0x507A}, {0x2F809, 0x5099}, {0x2F80A, 0x50E7}, {0x2F80B, 0x50CF}, {0x2F80C, 0x349E}, {0x2F80D, 0x2063A}, {0x2F80E, 0x514D}, {0x2F80F, 0x5154},
{0x2F810, 0x5164}, {0x2F811, 0x5177}, {0x2F812, 0x2051C}, {0x2F813, 0x34B9}, {0x2F814, 0x5167}, {0x2F815, 0x518D}, {0x2F816, 0x2054B}, {0x2F817, 0x5197}, {0x2F818, 0x51A4}, {0x2F819, 0x4ECC},
{0x2F81A, 0x51AC}, {0x2F81B, 0x51B5}, {0x2F81C, 0x291DF}, {0x2F81D, 0x51F5}, {0x2F81E, 0x5203}, {0x2F81F, 0x34DF}, {0x2F820, 0x523B}, {0x2F821, 0x5246}, {0x2F822, 0x5272}, {0x2F823, 0x5277},
{0x2F824, 0x3515}, {0x2F825, 0x52C7}, {0x2F826, 0x52C9}, {0x2F827, 0x52E4}, {0x2F828, 0x52FA}, {0x2F829, 0x5305}, {0x2F82A, 0x5306}, {0x2F82B, 0x5317}, {0x2F82C, 0x5349}, {0x2F82D, 0x5351},
{0x2F82E, 0x535A}, {0x2F82F, 0x5373}, {0x2F830, 0x537D}, {0x2F831, 0x537F}, {0x2F832, 0x537F}, {0x2F833, 0x537F}, {0x2F834, 0x20A2C}, {0x2F835, 0x7070}, {0x2F836, 0x53CA}, {0x2F837, 0x53DF},
{0x2F838, 0x20B63}, {0x2F839, 0x53EB}, {0x2F83A, 0x53F1}, {0x2F83B, 0x5406}, {0x2F83C, 0x549E}, {0x2F83D, 0x5438}, {0x2F83E, 0x5448}, {0x2F83F, 0x5468}, {0x2F840, 0x54A2}, {0x2F841, 0x54F6},
{0x2F842, 0x5510}, {0x2F843, 0x5553}, {0x2F844, 0x5563}, {0x2F845, 0x5584}, {0x2F846, 0x5584}, {0x2F847, 0x5599}, {0x2F848, 0x55AB}, {0x2F849, 0x55B3}, {0x2F84A, 0x55C2}, {0x2F84B, 0x5716},
{0x2F84C, 0x5606}, {0x2F84D, 0x5717}, {0x2F84E, 0x5651}, {0x2F84F, 0x5674}, {0x2F850, 0x5207}, {0x2F851, 0x58EE}, {0x2F852, 0x57CE}, {0x2F853, 0x57F4}, {0x2F854, 0x580D}, {0x2F855, 0x578B},
{0x2F856, 0x5832}, {0x2F857, 0x5831}, {0x2F858, 0x58AC}, {0x2F859, 0x214E4}, {0x2F85A, 0x58F2}, {0x2F85B, 0x58F7}, {0x2F85C, 0x5906}, {0x2F85D, 0x591A}, {0x2F85E, 0x5922}, {0x2F85F, 0x5962},
{0x2F860, 0x216A8}, {0x2F861, 0x216EA}, {0x2F862, 0x59EC}, {0x2F863, 0x5A1B}, {0x2F864, 0x5A27}, {0x2F865, 0x59D8}, {0x2F866, 0x5A66}, {0x2F867, 0x36EE}, {0x2F868, 0x36FC}, {0x2F869, 0x5B08},
{0x2F86A, 0x5B3E}, {0x2F86B, 0x5B3E}, {0x2F86C, 0x219C8}, {0x2F86D, 0x5BC3}, {0x2F86E, 0x5BD8}, {0x2F86F, 0x5BE7}, {0x2F870, 0x5BF3}, {0x2F871, 0x21B18}, {0x2F872, 0x5BFF}, {0x2F873, 0x5C06},
{0x2F874, 0x5F53}, {0x2F875, 0x5C22}, {0x2F876, 0x3781}, {0x2F877, 0x5C60}, {0x2F878, 0x5C6E}, {0x2F879, 0x5CC0}, {0x2F87A, 0x5C8D}, {0x2F87B, 0x21DE4}, {0x2F87C, 0x5D43}, {0x2F87D, 0x21DE6},
{0x2F87E, 0x5D6E}, {0x2F87F, 0x5D6B}, {0x2F880, 0x5D7C}, {0x2F881, 0x5DE1}, {0x2F882, 0x5DE2}, {0x2F883, 0x382F}, {0x2F884, 0x5DFD}, {0x2F885, 0x5E28}, {0x2F886, 0x5E3D}, {0x2F887, 0x5E69},
{0x2F888, 0x3862}, {0x2F889, 0x22183}, {0x2F88A, 0x387C}, {0x2F88B, 0x5EB0}, {0x2F88C, 0x5EB3}, {0x2F88D, 0x5EB6}, {0x2F88E, 0x5ECA}, {0x2F88F, 0x2A392}, {0x2F890, 0x5EFE}, {0x2F891, 0x22331},
{0x2F892, 0x22331}, {0x2F893, 0x8201}, {0x2F894, 0x5F22}, {0x2F895, 0x5F22}, {0x2F896, 0x38C7}, {0x2F897, 0x232B8}, {0x2F898, 0x261DA}, {0x2F899, 0x5F62}, {0x2F89A, 0x5F6B}, {0x2F89B, 0x38E3},
{0x2F89C, 0x5F9A}, {0x2F89D, 0x5FCD}, {0x2F89E, 0x5FD7}, {0x2F89F, 0x5FF9}, {0x2F8A0, 0x6081}, {0x2F8A1, 0x393A}, {0x2F8A2, 0x391C}, {0x2F8A3, 0x6094}, {0x2F8A4, 0x226D4}, {0x2F8A5, 0x60C7},
{0x2F8A6, 0x6148}, {0x2F8A7, 0x614C}, {0x2F8A8, 0x614E}, {0x2F8A9, 0x614C}, {0x2F8AA, 0x617A}, {0x2F8AB, 0x618E}, {0x2F8AC, 0x61B2}, {0x2F8AD, 0x61A4}, {0x2F8AE, 0x61AF}, {0x2F8AF, 0x61DE},
{0x2F8B0, 0x61F2}, {0x2F8B1, 0x61F6}, {0x2F8B2, 0x6210}, {0x2F8B3, 0x621B}, {0x2F8B4, 0x625D}, {0x2F8B5, 0x62B1}, {0x2F8B6, 0x62D4}, {0x2F8B7, 0x6350}, {0x2F8B8, 0x22B0C}, {0x2F8B9, 0x633D},
{0x2F8BA, 0x62FC}, {0x2F8BB, 0x6368}, {0x2F8BC, 0x6383}, {0x2F8BD, 0x63E4}, {0x2F8BE, 0x22BF1}, {0x2F8BF, 0x6422}, {0x2F8C0, 0x63C5}, {0x2F8C1, 0x63A9}, {0x2F8C2, 0x3A2E}, {0x2F8C3, 0x6469},
{0x2F8C4, 0x647E}, {0x2F8C5, 0x649D}, {0x2F8C6, 0x6477}, {0x2F8C7, 0x3A6C}, {0x2F8C8, 0x654F}, {0x2F8C9, 0x656C}, {0x2F8CA, 0x2300A}, {0x2F8CB, 0x65E3}, {0x2F8CC, 0x66F8}, {0x2F8CD, 0x6649},
{0x2F8CE, 0x3B19}, {0x2F8CF, 0x6691}, {0x2F8D0, 0x3B08}, {0x2F8D1, 0x3AE4}, {0x2F8D2, 0x5192}, {0x2F8D3, 0x5195}, {0x2F8D4, 0x6700}, {0x2F8D5, 0x669C}, {0x2F8D6, 0x80AD}, {0x2F8D7, 0x43D9},
{0x2F8D8, 0x6717}, {0x2F8D9, 0x671B}, {0x2F8DA, 0x6721}, {0x2F8DB, 0x675E}, {0x2F8DC, 0x6753}, {0x2F8DD, 0x233C3}, {0x2F8DE, 0x3B49}, {0x2F8DF, 0x67FA}, {0x2F8E0, 0x6785}, {0x2F8E1, 0x6852},
{0x2F8E2, 0x6885}, {0x2F8E3, 0x2346D}, {0x2F8E4, 0x688E}, {0x2F8E5, 0x681F}, {0x2F8E6, 0x6914}, {0x2F8E7, 0x3B9D}, {0x2F8E8, 0x6942}, {0x2F8E9, 0x69A3}, {0x2F8EA, 0x69EA}, {0x2F8EB, 0x6AA8},
{0x2F8EC, 0x236A3}, {0x2F8ED, 0x6ADB}, {0x2F8EE, 0x3C18}, {0x2F8EF, 0x6B21}, {0x2F8F0, 0x238A7}, {0x2F8F1, 0x6B54}, {0x2F8F2, 0x3C4E}, {0x2F8F3, 0x6B72}, {0x2F8F4, 0x6B9F}, {0x2F8F5, 0x6BBA},
{0x2F8F6, 0x6BBB}, {0x2F8F7, 0x23A8D}, {0x2F8F8, 0x21D0B}, {0x2F8F9, 0x23AFA}, {0x2F8FA, 0x6C4E}, {0x2F8FB, 0x23CBC}, {0x2F8FC, 0x6CBF}, {0x2F8FD, 0x6CCD}, {0x2F8FE, 0x6C67}, {0x2F8FF, 0x6D16},
{0x2F900, 0x6D3E}, {0x2F901, 0x6D77}, {0x2F902, 0x6D41}, {0x2F903, 0x6D69}, {0x2F904, 0x6D78}, {0x2F905, 0x6D85}, {0x2F906, 0x23D1E}, {0x2F907, 0x6D34}, {0x2F908, 0x6E2F}, {0x2F909, 0x6E6E},
{0x2F90A, 0x3D33}, {0x2F90B, 0x6ECB}, {0x2F90C, 0x6EC7}, {0x2F90D, 0x23ED1}, {0x2F90E, 0x6DF9}, {0x2F90F, 0x6F6E}, {0x2F910, 0x23F5E}, {0x2F911, 0x23F8E}, {0x2F912, 0x6FC6}, {0x2F913, 0x7039},
{0x2F914, 0x701E}, {0x2F915, 0x701B}, {0x2F916, 0x3D96}, {0x2F917, 0x704A}, {0x2F918, 0x707D}, {0x2F919, 0x7077}, {0x2F91A, 0x70AD}, {0x2F91B, 0x20525}, {0x2F91C, 0x7145}, {0x2F91D, 0x24263},
{0x2F91E, 0x719C}, {0x2F91F, 0x243AB}, {0x2F920, 0x7228}, {0x2F921, 0x7235}, {0x2F922, 0x7250}, {0x2F923, 0x24608}, {0x2F924, 0x7280}, {0x2F925, 0x7295}, {0x2F926, 0x24735}, {0x2F927, 0x24814},
{0x2F928, 0x737A}, {0x2F929, 0x738B}, {0x2F92A, 0x3EAC}, {0x2F92B, 0x73A5}, {0x2F92C, 0x3EB8}, {0x2F92D, 0x3EB8}, {0x2F92E, 0x7447}, {0x2F92F, 0x745C}, {0x2F930, 0x7471}, {0x2F931, 0x7485},
{0x2F932, 0x74CA}, {0x2F933, 0x3F1B}, {0x2F934, 0x7524}, {0x2F935, 0x24C36}, {0x2F936, 0x753E}, {0x2F937, 0x24C92}, {0x2F938, 0x7570}, {0x2F939, 0x2219F}, {0x2F93A, 0x7610}, {0x2F93B, 0x24FA1},
{0x2F93C, 0x24FB8}, {0x2F93D, 0x25044}, {0x2F93E, 0x3FFC}, {0x2F93F, 0x4008}, {0x2F940, 0x76F4}, {0x2F941, 0x250F3}, {0x2F942, 0x250F2}, {0x2F943, 0x25119}, {0x2F944, 0x25133}, {0x2F945, 0x771E},
{0x2F946, 0x771F}, {0x2F947, 0x771F}, {0x2F948, 0x774A}, {0x2F949, 0x4039}, {0x2F94A, 0x778B}, {0x2F94B, 0x4046}, {0x2F94C, 0x4096}, {0x2F94D, 0x2541D}, {0x2F94E, 0x784E}, {0x2F94F, 0x788C},
{0x2F950, 0x78CC}, {0x2F951, 0x40E3}, {0x2F952, 0x25626}, {0x2F953, 0x7956}, {0x2F954, 0x2569A}, {0x2F955, 0x256C5}, {0x2F956, 0x798F}, {0x2F957, 0x79EB}, {0x2F958, 0x412F}, {0x2F959, 0x7A40},
{0x2F95A, 0x7A4A}, {0x2F95B, 0x7A4F}, {0x2F95C, 0x2597C}, {0x2F95D, 0x25AA7}, {0x2F95E, 0x25AA7}, {0x2F95F, 0x7AEE}, {0x2F960, 0x4202}, {0x2F961, 0x25BAB}, {0x2F962, 0x7BC6}, {0x2F963, 0x7BC9},
{0x2F964, 0x4227}, {0x2F965, 0x25C80}, {0x2F966, 0x7CD2}, {0x2F967, 0x42A0}, {0x2F968, 0x7CE8}, {0x2F969, 0x7CE3}, {0x2F96A, 0x7D00}, {0x2F96B, 0x25F86}, {0x2F96C, 0x7D63}, {0x2F96D, 0x4301},
{0x2F96E, 0x7DC7}, {0x2F96F, 0x7E02}, {0x2F970, 0x7E45}, {0x2F971, 0x4334}, {0x2F972, 0x26228}, {0x2F973, 0x26247}, {0x2F974, 0x4359}, {0x2F975, 0x262D9}, {0x2F976, 0x7F7A}, {0x2F977, 0x2633E},
{0x2F978, 0x7F95}, {0x2F979, 0x7FFA}, {0x2F97A, 0x8005}, {0x2F97B, 0x264DA}, {0x2F97C, 0x26523}, {0x2F97D, 0x8060}, {0x2F97E, 0x265A8}, {0x2F97F, 0x8070}, {0x2F980, 0x2335F}, {0x2F981, 0x43D5},
{0x2F982, 0x80B2}, {0x2F983, 0x8103}, {0x2F984, 0x440B}, {0x2F985, 0x813E}, {0x2F986, 0x5AB5}, {0x2F987, 0x267A7}, {0x2F988, 0x267B5}, {0x2F989, 0x23393}, {0x2F98A, 0x2339C}, {0x2F98B, 0x8201},
{0x2F98C, 0x8204}, {0x2F98D, 0x8F9E}, {0x2F98E, 0x446B}, {0x2F98F, 0x8291}, {0x2F990, 0x828B}, {0x2F991, 0x829D}, {0x2F992, 0x52B3}, {0x2F993, 0x82B1}, {0x2F994, 0x82B3}, {0x2F995, 0x82BD},
{0x2F996, 0x82E6}, {0x2F997, 0x26B3C}, {0x2F998, 0x82E5}, {0x2F999, 0x831D}, {0x2F99A, 0x8363}, {0x2F99B, 0x83AD}, {0x2F99C, 0x8323}, {0x2F99D, 0x83BD}, {0x2F99E, 0x83E7}, {0x2F99F, 0x8457},
{0x2F9A0, 0x8353}, {0x2F9A1, 0x83CA}, {0x2F9A2, 0x83CC}, {0x2F9A3, 0x83DC}, {0x2F9A4, 0x26C36}, {0x2F9A5, 0x26D6B}, {0x2F9A6, 0x26CD5}, {0x2F9A7, 0x452B}, {0x2F9A8, 0x84F1}, {0x2F9A9, 0x84F3},
{0x2F9AA, 0x8516}, {0x2F9AB, 0x273CA}, {0x2F9AC, 0x8564}, {0x2F9AD, 0x26F2C}, {0x2F9AE, 0x455D}, {0x2F9AF, 0x4561}, {0x2F9B0, 0x26FB1}, {0x2F9B1, 0x270D2}, {0x2F9B2, 0x456B}, {0x2F9B3, 0x8650},
{0x2F9B4, 0x865C}, {0x2F9B5, 0x8667}, {0x2F9B6, 0x8669}, {0x2F9B7, 0x86A9}, {0x2F9B8, 0x8688}, {0x2F9B9, 0x870E}, {0x2F9BA, 0x86E2}, {0x2F9BB, 0x8779}, {0x2F9BC, 0x8728}, {0x2F9BD, 0x876B},
{0x2F9BE, 0x8786}, {0x2F9BF, 0x45D7}, {0x2F9C0, 0x87E1}, {0x2F9C1, 0x8801}, {0x2F9C2, 0x45F9}, {0x2F9C3, 0x8860}, {0x2F9C4, 0x8863}, {0x2F9C5, 0x27667}, {0x2F9C6, 0x88D7}, {0x2F9C7, 0x88DE},
{0x2F9C8, 0x4635}, {0x2F9C9, 0x88FA}, {0x2F9CA, 0x34BB}, {0x2F9CB, 0x278AE}, {0x2F9CC, 0x27966}, {0x2F9CD, 0x46BE}, {0x2F9CE, 0x46C7}, {0x2F9CF, 0x8AA0}, {0x2F9D0, 0x8AED}, {0x2F9D1, 0x8B8A},
{0x2F9D2, 0x8C55}, {0x2F9D3, 0x27CA8}, {0x2F9D4, 0x8CAB}, {0x2F9D5, 0x8CC1}, {0x2F9D6, 0x8D1B}, {0x2F9D7, 0x8D77}, {0x2F9D8, 0x27F2F}, {0x2F9D9, 0x20804}, {0x2F9DA, 0x8DCB}, {0x2F9DB, 0x8DBC},
{0x2F9DC, 0x8DF0}, {0x2F9DD, 0x208DE}, {0x2F9DE, 0x8ED4}, {0x2F9DF, 0x8F38}, {0x2F9E0, 0x285D2}, {0x2F9E1, 0x285ED}, {0x2F9E2, 0x9094}, {0x2F9E3, 0x90F1}, {0x2F9E4, 0x9111}, {0x2F9E5, 0x2872E},
{0x2F9E6, 0x911B}, {0x2F9E7, 0x9238}, {0x2F9E8, 0x92D7}, {0x2F9E9, 0x92D8}, {0x2F9EA, 0x927C}, {0x2F9EB, 0x93F9}, {0x2F9EC, 0x9415}, {0x2F9ED, 0x28BFA}, {0x2F9EE, 0x958B}, {0x2F9EF, 0x4995},
{0x2F9F0, 0x95B7}, {0x2F9F1, 0x28D77}, {0x2F9F2, 0x49E6}, {0x2F9F3, 0x96C3}, {0x2F9F4, 0x5DB2}, {0x2F9F5, 0x9723}, {0x2F9F6, 0x29145}, {0x2F9F7, 0x2921A}, {0x2F9F8, 0x4A6E}, {0x2F9F9, 0x4A76},
{0x2F9FA, 0x97E0}, {0x2F9FB, 0x2940A}, {0x2F9FC, 0x4AB2}, {0x2F9FD, 0x29496}, {0x2F9FE, 0x980B}, {0x2F9FF, 0x980B}, {0x2FA00, 0x9829}, {0x2FA01, 0x295B6}, {0x2FA02, 0x98E2}, {0x2FA03, 0x4B33},
{0x2FA04, 0x9929}, {0x2FA05, 0x99A7}, {0x2FA06, 0x99C2}, {0x2FA07, 0x99FE}, {0x2FA08, 0x4BCE}, {0x2FA09, 0x29B30}, {0x2FA0A, 0x9B12}, {0x2FA0B, 0x9C40}, {0x2FA0C, 0x9CFD}, {0x2FA0D, 0x4CCE},
{0x2FA0E, 0x4CED}, {0x2FA0F, 0x9D67}, {0x2FA10, 0x2A0CE}, {0x2FA11, 0x4CF8}, {0x2FA12, 0x2A105}, {0x2FA13, 0x2A20E}, {0x2FA14, 0x2A291}, {0x2FA15, 0x9EBB}, {0x2FA16, 0x4D56}, {0x2FA17, 0x9EF9},
{0x2FA18, 0x9EFE}, {0x2FA19, 0x9F05}, {0x2FA1A, 0x9F0F}, {0x2FA1B, 0x9F16}, {0x2FA1D, 0x2A600},
};
static std::string codepoint_to_utf8(uint32_t cp) {
std::string result;
if (/* 0x00 <= cp && */ cp <= 0x7f) {
result.push_back(cp);
}
else if (0x80 <= cp && cp <= 0x7ff) {
result.push_back(0xc0 | ((cp >> 6) & 0x1f));
result.push_back(0x80 | (cp & 0x3f));
}
else if (0x800 <= cp && cp <= 0xffff) {
result.push_back(0xe0 | ((cp >> 12) & 0x0f));
result.push_back(0x80 | ((cp >> 6) & 0x3f));
result.push_back(0x80 | (cp & 0x3f));
}
else if (0x10000 <= cp && cp <= 0x10ffff) {
result.push_back(0xf0 | ((cp >> 18) & 0x07));
result.push_back(0x80 | ((cp >> 12) & 0x3f));
result.push_back(0x80 | ((cp >> 6) & 0x3f));
result.push_back(0x80 | (cp & 0x3f));
}
else {
throw std::invalid_argument("invalid codepoint");
}
return result;
}
static std::string codepoints_to_utf8(const std::vector<uint32_t> & cps) {
std::string result;
for (size_t i = 0; i < cps.size(); ++i) {
result.append(codepoint_to_utf8(cps[i]));
}
return result;
}
static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
assert(offset < utf8.size());
if (!(utf8[offset + 0] & 0x80)) {
auto result = utf8[offset + 0];
offset += 1;
return result;
}
if (!(utf8[offset + 0] & 0x40)) {
throw std::invalid_argument("invalid character");
}
if (!(utf8[offset + 0] & 0x20)) {
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) {
throw std::invalid_argument("invalid character");
}
auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
offset += 2;
return result;
}
if (!(utf8[offset + 0] & 0x10)) {
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) {
throw std::invalid_argument("invalid character");
}
auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
offset += 3;
return result;
}
if (!(utf8[offset + 0] & 0x08)) {
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) {
throw std::invalid_argument("invalid character");
}
auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f);
offset += 4;
return result;
}
throw std::invalid_argument("invalid string");
}
static std::vector<uint32_t> codepoints_from_utf8(const std::string & utf8) {
std::vector<uint32_t> result;
size_t offset = 0;
while (offset < utf8.size()) {
result.push_back(codepoint_from_utf8(utf8, offset));
}
return result;
}
static std::vector<uint16_t> codepoint_to_utf16(uint32_t cp) {
std::vector<uint16_t> result;
if (/* 0x0000 <= cp && */ cp <= 0xffff) {
result.emplace_back(cp);
}
else if (0x10000 <= cp && cp <= 0x10ffff) {
result.emplace_back(0xd800 | ((cp - 0x10000) >> 10));
result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff));
}
else {
throw std::invalid_argument("invalid codepoint");
}
return result;
}
static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t> & cps) {
std::vector<uint16_t> result;
for (size_t i = 0; i < cps.size(); ++i) {
auto temp = codepoint_to_utf16(cps[i]);
result.insert(result.end(), temp.begin(), temp.end());
}
return result;
}
static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t & offset) {
assert(offset < utf16.size());
if (((utf16[0] >> 10) << 10) != 0xd800) {
auto result = utf16[offset + 0];
offset += 1;
return result;
}
if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) {
throw std::invalid_argument("invalid character");
}
auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
offset += 2;
return result;
}
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
std::vector<uint32_t> result;
size_t offset = 0;
while (offset < utf16.size()) {
result.push_back(codepoint_from_utf16(utf16, offset));
}
return result;
}
#define CODEPOINT_TYPE_UNIDENTIFIED 0 #define CODEPOINT_TYPE_UNIDENTIFIED 0
#define CODEPOINT_TYPE_DIGIT 1 #define CODEPOINT_TYPE_DIGIT 1
#define CODEPOINT_TYPE_LETTER 2 #define CODEPOINT_TYPE_LETTER 2
@ -670,115 +13,14 @@ static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> &
#define CODEPOINT_TYPE_SYMBOL 6 #define CODEPOINT_TYPE_SYMBOL 6
#define CODEPOINT_TYPE_CONTROL 7 #define CODEPOINT_TYPE_CONTROL 7
static std::unordered_map<uint32_t, int> codepoint_type_map() { std::string unicode_cpt_to_utf8(uint32_t cp);
std::unordered_map<uint32_t, int> codepoint_types; std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
for (auto p : digit_ranges) {
for (auto i = p.first; i <= p.second; ++ i) {
codepoint_types[i] = CODEPOINT_TYPE_DIGIT;
}
}
for (auto p : letter_ranges) {
for (auto i = p.first; i <= p.second; ++ i) {
codepoint_types[i] = CODEPOINT_TYPE_LETTER;
}
}
for (auto p : whitespace_ranges) {
for (auto i = p.first; i <= p.second; ++ i) {
codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE;
}
}
for (auto p : accent_mark_ranges) {
for (auto i = p.first; i <= p.second; ++ i) {
codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK;
}
}
for (auto p : punctuation_ranges) {
for (auto i = p.first; i <= p.second; ++ i) {
codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION;
}
}
for (auto p : symbol_ranges) {
for (auto i = p.first; i <= p.second; ++i) {
codepoint_types[i] = CODEPOINT_TYPE_SYMBOL;
}
}
for (auto p : control_ranges) {
for (auto i = p.first; i <= p.second; ++ i) {
codepoint_types[i] = CODEPOINT_TYPE_CONTROL;
}
}
return codepoint_types;
}
static int codepoint_type(uint32_t cp) { std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
const auto it = codepoint_types.find(cp);
return it == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : it->second;
}
static int codepoint_type(const std::string & utf8) { int unicode_cpt_type(uint32_t cp);
if (utf8.length() == 0) { int unicode_cpt_type(const std::string & utf8);
return CODEPOINT_TYPE_UNIDENTIFIED;
}
size_t offset = 0;
return codepoint_type(codepoint_from_utf8(utf8, offset));
}
static std::unordered_map<uint8_t, std::string> bytes_to_unicode_map_bpe() { std::string unicode_byte_to_utf8(uint8_t byte);
std::unordered_map<uint8_t, std::string> map; uint8_t unicode_utf8_to_byte(const std::string & utf8);
for (int ch = u'!'; ch <= u'~'; ++ch) {
assert(0 <= ch && ch < 256);
map[ch] = codepoint_to_utf8(ch);
}
for (int ch = u'¡'; ch <= u'¬'; ++ch) {
assert(0 <= ch && ch < 256);
map[ch] = codepoint_to_utf8(ch);
}
for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
assert(0 <= ch && ch < 256);
map[ch] = codepoint_to_utf8(ch);
}
auto n = 0;
for (int ch = 0; ch < 256; ++ch) {
if (map.find(ch) == map.end()) {
map[ch] = codepoint_to_utf8(256 + n);
++n;
}
}
return map;
}
static std::string bytes_to_unicode_bpe(uint8_t byte) {
static std::unordered_map<uint8_t, std::string> map = bytes_to_unicode_map_bpe();
return map.at(byte);
}
static std::unordered_map<std::string, uint8_t> unicode_to_bytes_map_bpe() {
std::unordered_map<std::string, uint8_t> map;
for (int ch = u'!'; ch <= u'~'; ++ch) {
assert(0 <= ch && ch < 256);
map[codepoint_to_utf8(ch)] = ch;
}
for (int ch = u'¡'; ch <= u'¬'; ++ch) {
assert(0 <= ch && ch < 256);
map[codepoint_to_utf8(ch)] = ch;
}
for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
assert(0 <= ch && ch < 256);
map[codepoint_to_utf8(ch)] = ch;
}
auto n = 0;
for (int ch = 0; ch < 256; ++ch) {
if (map.find(codepoint_to_utf8(ch)) == map.end()) {
map[codepoint_to_utf8(256 + n)] = ch;
++n;
}
}
return map;
}
static uint8_t unicode_to_bytes_bpe(const std::string & utf8) {
static std::unordered_map<std::string, uint8_t> map = unicode_to_bytes_map_bpe();
return map.at(utf8);
}