Merge remote-tracking branch 'origin/master' into grammar-reps

This commit is contained in:
ochafik 2024-04-15 18:57:43 +01:00
commit 1fb778750f
43 changed files with 6044 additions and 6094 deletions

View file

@ -91,6 +91,12 @@ jobs:
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
fi fi
- name: Downcase github.repository_owner
run: |
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
env:
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
- name: Build and push Docker image (versioned) - name: Build and push Docker image (versioned)
if: github.event_name == 'push' if: github.event_name == 'push'
uses: docker/build-push-action@v4 uses: docker/build-push-action@v4
@ -98,7 +104,7 @@ jobs:
context: . context: .
push: true push: true
platforms: ${{ matrix.config.platforms }} platforms: ${{ matrix.config.platforms }}
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
file: ${{ matrix.config.dockerfile }} file: ${{ matrix.config.dockerfile }}
- name: Build and push Docker image (tagged) - name: Build and push Docker image (tagged)
@ -107,5 +113,5 @@ jobs:
context: . context: .
push: ${{ github.event_name == 'push' }} push: ${{ github.event_name == 'push' }}
platforms: ${{ matrix.config.platforms }} platforms: ${{ matrix.config.platforms }}
tags: "ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ github.repository_owner }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}" tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
file: ${{ matrix.config.dockerfile }} file: ${{ matrix.config.dockerfile }}

View file

@ -688,7 +688,7 @@ llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h COMMON_H_DEPS = common/common.h common/sampling.h common/log.h
COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o COMMON_DEPS = common.o sampling.o grammar-parser.o build-info.o json-schema-to-grammar.o
common.o: common/common.cpp $(COMMON_H_DEPS) common.o: common/common.cpp $(COMMON_H_DEPS)
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
@ -756,7 +756,7 @@ batched: examples/batched/batched.cpp ggml.o llama.o $(C
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o common.o $(OBJS) batched-bench: examples/batched-bench/batched-bench.cpp build-info.o ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@ -788,7 +788,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp json-schema-to-grammar.o common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS) server: examples/server/server.cpp examples/server/utils.hpp examples/server/httplib.h common/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2) $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)

View file

@ -2,6 +2,44 @@
import PackageDescription import PackageDescription
var sources = [
"ggml.c",
"llama.cpp",
"unicode.cpp",
"unicode-data.cpp",
"ggml-alloc.c",
"ggml-backend.c",
"ggml-quants.c",
]
var resources: [Resource] = []
var linkerSettings: [LinkerSetting] = []
var cSettings: [CSetting] = [
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
.unsafeFlags(["-fno-objc-arc"]),
// NOTE: NEW_LAPACK will required iOS version 16.4+
// We should consider add this in the future when we drop support for iOS 14
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
// .define("ACCELERATE_NEW_LAPACK"),
// .define("ACCELERATE_LAPACK_ILP64")
]
#if canImport(Darwin)
sources.append("ggml-metal.m")
resources.append(.process("ggml-metal.metal"))
linkerSettings.append(.linkedFramework("Accelerate"))
cSettings.append(
contentsOf: [
.define("GGML_USE_ACCELERATE"),
.define("GGML_USE_METAL")
]
)
#endif
#if os(Linux)
cSettings.append(.define("_GNU_SOURCE"))
#endif
let package = Package( let package = Package(
name: "llama", name: "llama",
platforms: [ platforms: [
@ -28,34 +66,11 @@ let package = Package(
"ggml-cuda.h", "ggml-cuda.h",
"Makefile" "Makefile"
], ],
sources: [ sources: sources,
"ggml.c", resources: resources,
"llama.cpp",
"unicode.cpp",
"unicode-data.cpp",
"ggml-alloc.c",
"ggml-backend.c",
"ggml-quants.c",
"ggml-metal.m",
],
resources: [
.process("ggml-metal.metal")
],
publicHeadersPath: "spm-headers", publicHeadersPath: "spm-headers",
cSettings: [ cSettings: cSettings,
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]), linkerSettings: linkerSettings
.define("GGML_USE_ACCELERATE"),
.unsafeFlags(["-fno-objc-arc"]),
.define("GGML_USE_METAL"),
// NOTE: NEW_LAPACK will required iOS version 16.4+
// We should consider add this in the future when we drop support for iOS 14
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
// .define("ACCELERATE_NEW_LAPACK"),
// .define("ACCELERATE_LAPACK_ILP64")
],
linkerSettings: [
.linkedFramework("Accelerate")
]
) )
], ],
cxxLanguageStandard: .cxx11 cxxLanguageStandard: .cxx11

View file

@ -68,7 +68,7 @@ It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS,
| Intel GPU | Status | Verified Model | | Intel GPU | Status | Verified Model |
|-------------------------------|---------|---------------------------------------| |-------------------------------|---------|---------------------------------------|
| Intel Data Center Max Series | Support | Max 1550 | | Intel Data Center Max Series | Support | Max 1550, 1100 |
| Intel Data Center Flex Series | Support | Flex 170 | | Intel Data Center Flex Series | Support | Flex 170 |
| Intel Arc Series | Support | Arc 770, 730M | | Intel Arc Series | Support | Arc 770, 730M |
| Intel built-in Arc GPU | Support | built-in Arc GPU in Meteor Lake | | Intel built-in Arc GPU | Support | built-in Arc GPU in Meteor Lake |
@ -84,8 +84,7 @@ It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS,
- **Execution Unit (EU)** - **Execution Unit (EU)**
- If the iGPU has less than 80 EUs, the inference speed will likely be too slow for practical use. - If the iGPU has less than 80 EUs, the inference speed will likely be too slow for practical use.
### Nvidia GPU ### Other Vendor GPU
The BLAS acceleration on Nvidia GPU through oneAPI can be obtained using the Nvidia plugins for oneAPI and the cuBLAS backend of the upstream oneMKL library. Details and instructions on how to setup the runtime and library can be found in [this section](#i-setup-environment)
**Verified devices** **Verified devices**
@ -94,14 +93,9 @@ The BLAS acceleration on Nvidia GPU through oneAPI can be obtained using the Nvi
| Ampere Series | Support | A100, A4000 | | Ampere Series | Support | A100, A4000 |
| Ampere Series *(Mobile)* | Support | RTX 40 Series | | Ampere Series *(Mobile)* | Support | RTX 40 Series |
*Notes:*
- Support for Nvidia targets through oneAPI is currently limited to Linux platforms.
- Please make sure the native oneAPI MKL *(dedicated to intel CPUs and GPUs)* is not "visible" at this stage to properly setup and use the built-from-source oneMKL with cuBLAS backend in llama.cpp for Nvidia GPUs.
## Docker ## Docker
The docker build option is currently limited to *intel GPU* targets. The docker build option is currently limited to *intel GPU* targets.
### Build image ### Build image
```sh ```sh
# Using FP16 # Using FP16
@ -168,29 +162,10 @@ Platform #0: Intel(R) OpenCL HD Graphics
- **Nvidia GPU** - **Nvidia GPU**
In order to target Nvidia GPUs through SYCL, please make sure the CUDA/CUBLAS native requirements *-found [here](README.md#cuda)-* are installed. In order to target Nvidia GPUs through SYCL, please make sure the CUDA/CUBLAS native requirements *-found [here](README.md#cuda)-* are installed.
Installation can be verified by running the following:
```sh
nvidia-smi
```
Please make sure at least one CUDA device is available, which can be displayed like this *(here an A100-40GB Nvidia GPU)*:
```
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.54.03 Driver Version: 535.54.03 CUDA Version: 12.2 |
|-----------------------------------------+----------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+======================+======================|
| 0 NVIDIA A100-PCIE-40GB On | 00000000:8D:00.0 Off | 0 |
| N/A 36C P0 57W / 250W | 4MiB / 40960MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+----------------------+----------------------+
```
2. **Install Intel® oneAPI Base toolkit** 2. **Install Intel® oneAPI Base toolkit**
- **Base installation** - **For Intel GPU**
The base toolkit can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page. The base toolkit can be obtained from the official [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) page.
@ -202,10 +177,10 @@ Upon a successful installation, SYCL is enabled for the available intel devices,
- **Adding support to Nvidia GPUs** - **Adding support to Nvidia GPUs**
**oneAPI**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup. **oneAPI Plugin**: In order to enable SYCL support on Nvidia GPUs, please install the [Codeplay oneAPI Plugin for Nvidia GPUs](https://developer.codeplay.com/products/oneapi/nvidia/download). User should also make sure the plugin version matches the installed base toolkit one *(previous step)* for a seamless "oneAPI on Nvidia GPU" setup.
**oneMKL**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *cuBLAS* backend enabled is thus required to run it on Nvidia GPUs. **oneMKL for cuBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* do not contain the cuBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *cuBLAS* backend enabled is thus required to run it on Nvidia GPUs.
```sh ```sh
git clone https://github.com/oneapi-src/oneMKL git clone https://github.com/oneapi-src/oneMKL
@ -237,7 +212,7 @@ When targeting an intel GPU, the user should expect one or more level-zero devic
- **Nvidia GPU** - **Nvidia GPU**
Similarly, user targetting Nvidia GPUs should expect at least one SYCL-CUDA device [`ext_oneapi_cuda:gpu`] as bellow: Similarly, user targeting Nvidia GPUs should expect at least one SYCL-CUDA device [`ext_oneapi_cuda:gpu`] as bellow:
``` ```
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.12.0.12_195853.xmain-hotfix] [opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.12.0.12_195853.xmain-hotfix]
[opencl:cpu:1] Intel(R) OpenCL, Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz OpenCL 3.0 (Build 0) [2023.16.12.0.12_195853.xmain-hotfix] [opencl:cpu:1] Intel(R) OpenCL, Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz OpenCL 3.0 (Build 0) [2023.16.12.0.12_195853.xmain-hotfix]
@ -260,6 +235,9 @@ cmake --build .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icp
# Option 2: Use FP32 by default # Option 2: Use FP32 by default
cmake --build .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx cmake --build .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
#build all binary
cmake --build . --config Release -j -v
``` ```
#### Nvidia GPU #### Nvidia GPU
@ -278,6 +256,10 @@ cmake --build .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=i
# Option 2: Use FP32 by default # Option 2: Use FP32 by default
cmake --build .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx cmake --build .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
#build all binary
cmake --build . --config Release -j -v
``` ```
### III. Run the inference ### III. Run the inference
@ -357,7 +339,6 @@ Otherwise, you can run the script:
*Notes:* *Notes:*
- By default, `mmap` is used to read the model file. In some cases, it causes runtime hang issues. Please disable it by passing `--no-mmap` to the `/bin/main` if faced with the issue.
- Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow: - Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow:
```sh ```sh
@ -438,7 +419,7 @@ cd build
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
make make -j
``` ```
Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions: Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions:
@ -525,7 +506,6 @@ Otherwise, run the following wrapper script:
Note: Note:
- By default, `mmap` is used to read the model file. In some cases, it causes runtime hang issues. Please disable it by passing `--no-mmap` to the `main.exe` if faced with the issue.
- Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow: - Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow:
```sh ```sh
@ -557,12 +537,6 @@ use 1 SYCL GPUs: [0] with Max compute units:512
## Known Issues ## Known Issues
- Hanging during startup
llama.cpp uses *mmap* as the default mode for reading the model file and copying it to the GPU. In some systems, `memcpy` might behave abnormally and therefore hang.
- **Solution**: add `--no-mmap` or `--mmap 0` flag to the `main` executable.
- `Split-mode:[row]` is not supported. - `Split-mode:[row]` is not supported.
## Q&A ## Q&A
@ -574,7 +548,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512
- General compiler error: - General compiler error:
- Remove build folder or try a clean-build. - Remove **build** folder or try a clean-build.
- I can **not** see `[ext_oneapi_level_zero:gpu]` afer installing the GPU driver on Linux. - I can **not** see `[ext_oneapi_level_zero:gpu]` afer installing the GPU driver on Linux.

View file

@ -94,6 +94,7 @@ Typically finetunes of the base models below are supported as well.
- [x] LLaMA 2 🦙🦙 - [x] LLaMA 2 🦙🦙
- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) - [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1)
- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) - [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral)
- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct)
- [X] Falcon - [X] Falcon
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2) - [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne) - [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)

View file

@ -128,14 +128,14 @@ pub fn build(b: *std.build.Builder) !void {
const clip = make.obj("clip", "examples/llava/clip.cpp"); const clip = make.obj("clip", "examples/llava/clip.cpp");
const llava = make.obj("llava", "examples/llava/llava.cpp"); const llava = make.obj("llava", "examples/llava/llava.cpp");
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo, sampling, console, grammar_parser }); _ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, console, grammar_parser });
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo }); _ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo }); _ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo }); _ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo, train }); _ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo, train }); _ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, buildinfo, sampling, grammar_parser, json_schema_to_grammar, clip, llava }); const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, grammar_parser, clip, llava });
if (server.target.isWindows()) { if (server.target.isWindows()) {
server.linkSystemLibrary("ws2_32"); server.linkSystemLibrary("ws2_32");
} }

View file

@ -153,6 +153,52 @@ function gg_sum_ctest_release {
gg_printf '```\n' gg_printf '```\n'
} }
# test_scripts_debug
function gg_run_test_scripts_debug {
cd ${SRC}
set -e
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
set +e
}
function gg_sum_test_scripts_debug {
gg_printf '### %s\n\n' "${ci}"
gg_printf 'Runs test scripts in debug mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
gg_printf '```\n'
gg_printf '\n'
}
# test_scripts_release
function gg_run_test_scripts_release {
cd ${SRC}
set -e
(cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
set +e
}
function gg_sum_test_scripts_release {
gg_printf '### %s\n\n' "${ci}"
gg_printf 'Runs test scripts in release mode\n'
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
gg_printf '```\n'
gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
gg_printf '```\n'
gg_printf '\n'
}
function gg_get_model { function gg_get_model {
local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf" local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf"
local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf" local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf"
@ -642,6 +688,9 @@ test $ret -eq 0 && gg_run ctest_release
if [ -z ${GG_BUILD_LOW_PERF} ]; then if [ -z ${GG_BUILD_LOW_PERF} ]; then
test $ret -eq 0 && gg_run embd_bge_small test $ret -eq 0 && gg_run embd_bge_small
test $ret -eq 0 && gg_run test_scripts_debug
test $ret -eq 0 && gg_run test_scripts_release
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
if [ -z ${GG_BUILD_CUDA} ]; then if [ -z ${GG_BUILD_CUDA} ]; then
test $ret -eq 0 && gg_run open_llama_3b_v2 test $ret -eq 0 && gg_run open_llama_3b_v2

View file

@ -47,9 +47,6 @@ if (BUILD_SHARED_LIBS)
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
endif() endif()
set(TARGET json-schema-to-grammar)
add_library(${TARGET} OBJECT json-schema-to-grammar.cpp json-schema-to-grammar.h)
set(TARGET common) set(TARGET common)
add_library(${TARGET} STATIC add_library(${TARGET} STATIC
@ -63,6 +60,7 @@ add_library(${TARGET} STATIC
grammar-parser.h grammar-parser.h
grammar-parser.cpp grammar-parser.cpp
json.hpp json.hpp
json-schema-to-grammar.cpp
train.h train.h
train.cpp train.cpp
ngram-cache.h ngram-cache.h

View file

@ -1,4 +1,6 @@
#include "common.h" #include "common.h"
#include "json.hpp"
#include "json-schema-to-grammar.h"
#include "llama.h" #include "llama.h"
#include <algorithm> #include <algorithm>
@ -68,6 +70,8 @@
#define LLAMA_CURL_MAX_HEADER_LENGTH 256 #define LLAMA_CURL_MAX_HEADER_LENGTH 256
#endif // LLAMA_USE_CURL #endif // LLAMA_USE_CURL
using json = nlohmann::ordered_json;
int32_t get_num_physical_cores() { int32_t get_num_physical_cores() {
#ifdef __linux__ #ifdef __linux__
// enumerate the set of thread siblings, num entries is num cores // enumerate the set of thread siblings, num entries is num cores
@ -1148,6 +1152,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
); );
return true; return true;
} }
if (arg == "-j" || arg == "--json-schema") {
if (++i >= argc) {
invalid_param = true;
return true;
}
sparams.grammar = json_schema_to_grammar(json::parse(argv[i]));
return true;
}
if (arg == "--override-kv") { if (arg == "--override-kv") {
if (++i >= argc) { if (++i >= argc) {
invalid_param = true; invalid_param = true;
@ -1353,6 +1365,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); printf(" or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n");
printf(" --grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir)\n"); printf(" --grammar GRAMMAR BNF-like grammar to constrain generations (see samples in grammars/ dir)\n");
printf(" --grammar-file FNAME file to read grammar from\n"); printf(" --grammar-file FNAME file to read grammar from\n");
printf(" -j SCHEMA, --json-schema SCHEMA\n");
printf(" JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object.\n");
printf(" For schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead\n");
printf(" --cfg-negative-prompt PROMPT\n"); printf(" --cfg-negative-prompt PROMPT\n");
printf(" negative prompt to use for guidance. (default: empty)\n"); printf(" negative prompt to use for guidance. (default: empty)\n");
printf(" --cfg-negative-prompt-file FNAME\n"); printf(" --cfg-negative-prompt-file FNAME\n");

View file

@ -11,35 +11,101 @@
using json = nlohmann::ordered_json; using json = nlohmann::ordered_json;
template <typename Iterator>
static std::string join(Iterator begin, Iterator end, const std::string & separator);
static std::string repeat(const std::string & str, size_t n);
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "", bool item_rule_is_literal = false) {
if (separator_rule.empty()) {
if (min_items == 0 && max_items == 1) {
return item_rule + "?";
} else if (min_items == 1 && max_items == std::numeric_limits<int>::max()) {
return item_rule + "+";
}
}
std::string result;
if (min_items > 0) {
if (item_rule_is_literal && separator_rule.empty()) {
result = "\"" + repeat(std::string(item_rule.begin() + 1, item_rule.end() - 1), min_items) + "\"";
} else {
std::vector<std::string> items(min_items, item_rule);
result = join(items.begin(), items.end(), separator_rule.empty() ? " " : " " + separator_rule + " ");
}
}
std::function<std::string(int, bool)> opt_repetitions = [&](int up_to_n, bool prefix_with_sep) -> std::string {
auto content = prefix_with_sep && !separator_rule.empty() ? separator_rule + " " + item_rule : item_rule;
if (up_to_n == 0) {
return "";
} else if (up_to_n == 1) {
return "(" + content + ")?";
} else if (!separator_rule.empty() && !prefix_with_sep) {
return "(" + content + " " + opt_repetitions(up_to_n - 1, true) + ")?";
} else {
std::string res = repeat("(" + content + " ", up_to_n);
// strip trailing space
res = res.substr(0, res.length() - 1);
res += repeat(")?", up_to_n);
return res;
}
};
if (min_items > 0 && max_items != min_items) {
result += " ";
}
if (max_items != std::numeric_limits<int>::max()) {
result += opt_repetitions(max_items - min_items, min_items > 0);
} else {
std::string item_operator = "(" + (separator_rule.empty() ? "" : separator_rule + " ") + item_rule + ")";
if (min_items == 0 && !separator_rule.empty()) {
result = "(" + item_rule + " " + item_operator + "*)?";
} else {
result += item_operator + "*";
}
}
return result;
}
const std::string SPACE_RULE = "\" \"?"; const std::string SPACE_RULE = "\" \"?";
std::unordered_map<std::string, std::string> PRIMITIVE_RULES = { struct BuiltinRule {
{"boolean", "(\"true\" | \"false\") space"}, std::string content;
{"number", "(\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? space"}, std::vector<std::string> deps;
{"integer", "(\"-\"? ([0-9] | [1-9] [0-9]*)) space"},
{"value", "object | array | string | number | boolean"},
{"object", "\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space"},
{"array", "\"[\" space ( value (\",\" space value)* )? \"]\" space"},
{"uuid", "\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space"},
{"string", " \"\\\"\" (\n"
" [^\"\\\\] |\n"
" \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n"
" )* \"\\\"\" space"},
{"null", "\"null\" space"}
}; };
std::vector<std::string> OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null", "value"};
std::unordered_map<std::string, std::string> DATE_RULES = { const std::string _up_to_15_digits = build_repetition("[0-9]", 0, 15);
{"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )"},
{"time", "([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )"}, std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
{"date-time", "date \"T\" time"}, {"boolean", {"(\"true\" | \"false\") space", {}}},
{"date-string", "\"\\\"\" date \"\\\"\" space"}, {"decimal-part", {"[0-9] " + _up_to_15_digits, {}}},
{"time-string", "\"\\\"\" time \"\\\"\" space"}, {"integral-part", {"[0-9] | [1-9] " + _up_to_15_digits, {}}},
{"date-time-string", "\"\\\"\" date-time \"\\\"\" space"} {"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}},
{"integer", {"(\"-\"? integral-part) space", {"integral-part"}}},
{"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}},
{"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}},
{"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}},
{"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}},
{"char", {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])", {}}},
{"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}},
{"null", {"\"null\" space", {}}},
};
std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
{"date", {"[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
{"date-time", {"date \"T\" time", {"date", "time"}}},
{"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}},
{"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}},
{"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}}
}; };
static bool is_reserved_name(const std::string & name) { static bool is_reserved_name(const std::string & name) {
@ -47,7 +113,7 @@ static bool is_reserved_name(const std::string & name) {
if (RESERVED_NAMES.empty()) { if (RESERVED_NAMES.empty()) {
RESERVED_NAMES.insert("root"); RESERVED_NAMES.insert("root");
for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first); for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first);
for (const auto &p : DATE_RULES) RESERVED_NAMES.insert(p.first); for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first);
} }
return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); return RESERVED_NAMES.find(name) != RESERVED_NAMES.end();
} }
@ -192,7 +258,7 @@ private:
if (_dotall) { if (_dotall) {
rule = "[\\U00000000-\\U0010FFFF]"; rule = "[\\U00000000-\\U0010FFFF]";
} else { } else {
rule = "[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]"; rule = "[^\\x0A\\x0D]";
} }
return _add_rule("dot", rule); return _add_rule("dot", rule);
}; };
@ -308,13 +374,6 @@ private:
auto &sub = last.first; auto &sub = last.first;
auto sub_is_literal = last.second; auto sub_is_literal = last.second;
if (min_times == 0 && max_times == std::numeric_limits<int>::max()) {
sub += "*";
} else if (min_times == 0 && max_times == 1) {
sub += "?";
} else if (min_times == 1 && max_times == std::numeric_limits<int>::max()) {
sub += "+";
} else {
if (!sub_is_literal) { if (!sub_is_literal) {
std::string & sub_id = sub_rule_ids[sub]; std::string & sub_id = sub_rule_ids[sub];
if (sub_id.empty()) { if (sub_id.empty()) {
@ -322,33 +381,14 @@ private:
} }
sub = sub_id; sub = sub_id;
} }
std::string result; seq.back().first = build_repetition(
if (sub_is_literal && min_times > 0) { sub_is_literal ? "\"" + sub + "\"" : sub,
result = "\"" + repeat(sub.substr(1, sub.length() - 2), min_times) + "\""; min_times,
} else { max_times,
for (int j = 0; j < min_times; j++) { "",
if (j > 0) { sub_is_literal
result += " "; );
}
result += sub;
}
}
if (min_times > 0 && min_times < max_times) {
result += " ";
}
if (max_times == std::numeric_limits<int>::max()) {
result += sub + "*";
} else {
for (int j = min_times; j < max_times; j++) {
if (j > min_times) {
result += " ";
}
result += sub + "?";
}
}
seq.back().first = result;
seq.back().second = false; seq.back().second = false;
}
} else { } else {
std::string literal; std::string literal;
auto is_non_literal = [&](char c) { auto is_non_literal = [&](char c) {
@ -424,7 +464,7 @@ private:
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) { if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value"); std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
std::string kv_rule = _add_rule(sub_name + "-kv", _add_rule("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule); std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
prop_kv_rule_names["*"] = kv_rule; prop_kv_rule_names["*"] = kv_rule;
optional_props.push_back("*"); optional_props.push_back("*");
} }
@ -486,6 +526,25 @@ private:
return rule; return rule;
} }
std::string _add_primitive(const std::string & name, const BuiltinRule & rule) {
auto n = _add_rule(name, rule.content);
for (const auto & dep : rule.deps) {
BuiltinRule dep_rule;
auto it = PRIMITIVE_RULES.find(dep);
if (it == PRIMITIVE_RULES.end()) {
it = STRING_FORMAT_RULES.find(dep);
if (it == STRING_FORMAT_RULES.end()) {
_errors.push_back("Rule " + dep + " not known");
continue;
}
}
if (_rules.find(dep) == _rules.end()) {
_add_primitive(dep, it->second);
}
}
return n;
}
public: public:
SchemaConverter( SchemaConverter(
const std::function<json(const std::string &)> & fetch_json, const std::function<json(const std::string &)> & fetch_json,
@ -647,49 +706,33 @@ public:
return _add_rule(rule_name, rule); return _add_rule(rule_name, rule);
} else { } else {
std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item");
std::string list_item_operator = "( \",\" space " + item_rule_name + " )";
std::string successive_items;
int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0; int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0;
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : -1; int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max();
if (min_items > 0) {
successive_items += repeat(list_item_operator, min_items - 1); return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space");
min_items--;
}
if (max_items >= 0 && max_items > min_items) {
successive_items += repeat(list_item_operator + "?", max_items - min_items - 1);
} else {
successive_items += list_item_operator + "*";
}
std::string rule;
if (min_items == 0) {
rule = "\"[\" space ( " + item_rule_name + " " + successive_items + " )? \"]\" space";
} else {
rule = "\"[\" space " + item_rule_name + " " + successive_items + " \"]\" space";
}
return _add_rule(rule_name, rule);
} }
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
return _visit_pattern(schema["pattern"], rule_name); return _visit_pattern(schema["pattern"], rule_name);
} else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) {
return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid"));
} else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) { } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) {
for (const auto & kv : DATE_RULES) { auto prim_name = schema_format + "-string";
_add_rule(kv.first, kv.second); return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name)));
} } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) {
return schema_format + "-string"; std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
} else if (schema.empty() || schema_type == "object") { } else if (schema.empty() || schema_type == "object") {
for (const auto & n : OBJECT_RULE_NAMES) { return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
_add_rule(n, PRIMITIVE_RULES.at(n));
}
return _add_rule(rule_name, "object");
} else { } else {
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) { if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) {
_errors.push_back("Unrecognized schema: " + schema.dump()); _errors.push_back("Unrecognized schema: " + schema.dump());
return ""; return "";
} }
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
return _add_rule(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>())); return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>()));
} }
} }

View file

@ -43,17 +43,18 @@ AnyModel = TypeVar("AnyModel", bound="type[Model]")
class Model(ABC): class Model(ABC):
_model_classes: dict[str, type[Model]] = {} _model_classes: dict[str, type[Model]] = {}
def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian: bool): def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian: bool, use_temp_file: bool):
self.dir_model = dir_model self.dir_model = dir_model
self.ftype = ftype self.ftype = ftype
self.fname_out = fname_out self.fname_out = fname_out
self.is_big_endian = is_big_endian self.is_big_endian = is_big_endian
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
self.use_temp_file = use_temp_file
self.is_safetensors = self._is_model_safetensors() self.is_safetensors = self._is_model_safetensors()
self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin") self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin")
self.part_names = self._get_part_names() self.part_names = self._get_part_names()
self.hparams = Model.load_hparams(self.dir_model) self.hparams = Model.load_hparams(self.dir_model)
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=False) self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"]) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
@property @property
@ -1427,6 +1428,102 @@ class GrokModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
@Model.register("DbrxForCausalLM")
class DbrxModel(Model):
model_arch = gguf.MODEL_ARCH.DBRX
def set_gguf_parameters(self):
ffn_config = self.hparams["ffn_config"]
attn_config = self.hparams["attn_config"]
self.gguf_writer.add_name(self.hparams["model_type"])
self.gguf_writer.add_block_count(self.hparams["n_layers"])
self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
self.gguf_writer.add_feed_forward_length(ffn_config["ffn_hidden_size"])
self.gguf_writer.add_head_count(self.hparams["n_heads"])
self.gguf_writer.add_head_count_kv(attn_config["kv_n_heads"])
self.gguf_writer.add_rope_freq_base(attn_config["rope_theta"])
self.gguf_writer.add_clamp_kqv(attn_config["clip_qkv"])
self.gguf_writer.add_file_type(self.ftype)
self.gguf_writer.add_expert_count(ffn_config["moe_num_experts"])
self.gguf_writer.add_expert_used_count(ffn_config["moe_top_k"])
self.gguf_writer.add_layer_norm_eps(1e-5)
self.gguf_writer.add_file_type(self.ftype)
print(f"gguf: file type = {self.ftype}")
def write_tensors(self):
block_count = self.hparams.get("n_layers")
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
for name, data_torch in self.get_tensors():
n_expert = self.hparams["ffn_config"]["moe_num_experts"]
n_ff = self.hparams["ffn_config"]["ffn_hidden_size"]
n_embd = self.hparams["d_model"]
# Specific behavior for experts tensors: suffix .weight, view as 3D and transpose
# original implementation expects (n_expert, n_ff, n_embd) for all experts weights
# But llama.cpp moe graph works differently
# AND the dimensions in ggml are typically in the reverse order of the pytorch dimensions
# so (n_expert, n_ff, n_embd) in pytorch is {n_embd, n_ff, n_expert} in ggml_tensor
exp_tensor_names = {"ffn.experts.mlp.w1": None, # LLM_TENSOR_FFN_GATE_EXPS ggml_tensor->ne{n_embd, n_ff, n_expert}
"ffn.experts.mlp.w2": (0, 2, 1), # LLM_TENSOR_FFN_DOWN_EXPS ggml_tensor->ne{n_ff, n_embd, n_expert}
"ffn.experts.mlp.v1": None} # LLM_TENSOR_FFN_UP_EXPS ggml_tensor->ne{n_embd, n_ff, n_expert}
experts = False
for exp_tensor_name in exp_tensor_names.keys():
if name.find(exp_tensor_name) != -1 and name.find(".weight") == -1:
experts = True
data_torch = data_torch.view(n_expert, n_ff, n_embd)
if (permute_tensor := exp_tensor_names[exp_tensor_name]) is not None:
data_torch = data_torch.permute(*permute_tensor)
break
old_dtype = data_torch.dtype
# convert any unsupported data types to float32
if data_torch.dtype not in (torch.float16, torch.float32):
data_torch = data_torch.to(torch.float32)
data = data_torch.squeeze().numpy()
# map tensor names
# In MoE models the ffn tensors are typically most of the model weights,
# and need to be quantizable. Quantize expects tensor names to be suffixed by .weight.
# Every other model has the weight names ending in .weight,
# let's assume that is the convention which is not the case for dbrx:
# https://huggingface.co/databricks/dbrx-instruct/blob/main/model.safetensors.index.json#L15
new_name = tensor_map.get_name(name if not experts else name + ".weight", try_suffixes=(".weight",))
if new_name is None:
print(f"Can not map tensor {name!r}")
sys.exit()
n_dims = len(data.shape)
data_dtype = data.dtype
# Most of the codebase that takes in 1D tensors only handles F32 tensors
# and most of the outputs tensors are F32.
if data_dtype != np.float32 and n_dims == 1:
print(f"Can not map tensor {name!r}: all 1D tensors must be F32")
sys.exit()
# if f32 desired, convert any float16 to float32
if self.ftype == 0 and data_dtype == np.float16:
data = data.astype(np.float32)
# if f16 desired, convert any float32 2-dim weight tensors to float16
if self.ftype == 1 and data_dtype == np.float32 and n_dims > 1:
data = data.astype(np.float16)
print(f"{new_name}, n_dims = {n_dims}, shape = {data.shape}, {old_dtype} --> {data.dtype}")
self.gguf_writer.add_tensor(new_name, data)
@Model.register("MiniCPMForCausalLM") @Model.register("MiniCPMForCausalLM")
class MiniCPMModel(Model): class MiniCPMModel(Model):
model_arch = gguf.MODEL_ARCH.MINICPM model_arch = gguf.MODEL_ARCH.MINICPM
@ -2363,6 +2460,7 @@ def parse_args() -> argparse.Namespace:
"model", type=Path, "model", type=Path,
help="directory containing model file", help="directory containing model file",
) )
parser.add_argument("--use-temp-file", action="store_true", help="use the tempfile library while processing (helpful when running out of memory, process killed)")
return parser.parse_args() return parser.parse_args()
@ -2406,7 +2504,7 @@ def main() -> None:
with torch.inference_mode(): with torch.inference_mode():
model_class = Model.from_model_architecture(hparams["architectures"][0]) model_class = Model.from_model_architecture(hparams["architectures"][0])
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian) model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file)
print("Set model parameters") print("Set model parameters")
model_instance.set_gguf_parameters() model_instance.set_gguf_parameters()

View file

@ -28,14 +28,27 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
} }
static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) { static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
GGML_ASSERT(n > 0);
float sum = 0; float sum = 0;
for (int64_t i3 = 0; i3 < ne[3]; i3++) { for (int64_t i3 = 0; i3 < ne[3]; i3++) {
printf(" [\n"); printf(" [\n");
for (int64_t i2 = 0; i2 < ne[2] && i2 < n; i2++) { for (int64_t i2 = 0; i2 < ne[2]; i2++) {
if (i2 == n && ne[2] > 2*n) {
printf(" ..., \n");
i2 = ne[2] - n;
}
printf(" [\n"); printf(" [\n");
for (int64_t i1 = 0; i1 < ne[1] && i1 < n; i1++) { for (int64_t i1 = 0; i1 < ne[1]; i1++) {
if (i1 == n && ne[1] > 2*n) {
printf(" ..., \n");
i1 = ne[1] - n;
}
printf(" ["); printf(" [");
for (int64_t i0 = 0; i0 < ne[0] && i0 < n; i0++) { for (int64_t i0 = 0; i0 < ne[0]; i0++) {
if (i0 == n && ne[0] > 2*n) {
printf("..., ");
i0 = ne[0] - n;
}
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0]; size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
float v; float v;
if (type == GGML_TYPE_F16) { if (type == GGML_TYPE_F16) {
@ -51,17 +64,14 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne
} else { } else {
GGML_ASSERT(false); GGML_ASSERT(false);
} }
printf("%8.4f", v); printf("%12.4f", v);
sum += v; sum += v;
if (i0 < ne[0] - 1 && i0 < n - 1) printf(", "); if (i0 < ne[0] - 1) printf(", ");
} }
if (ne[0] > n) printf(", ...");
printf("],\n"); printf("],\n");
} }
if (ne[1] > n) printf(" ...\n");
printf(" ],\n"); printf(" ],\n");
} }
if (ne[2] > n) printf(" ...\n");
printf(" ]\n"); printf(" ]\n");
printf(" sum = %f\n", sum); printf(" sum = %f\n", sum);
} }

View file

@ -5,5 +5,6 @@ CLI to split / merge GGUF files.
**Command line options:** **Command line options:**
- `--split`: split GGUF to multiple GGUF, default operation. - `--split`: split GGUF to multiple GGUF, default operation.
- `--split-max-size`: max size per split in `M` or `G`, f.ex. `500M` or `2G`.
- `--split-max-tensors`: maximum tensors in each split: default(128) - `--split-max-tensors`: maximum tensors in each split: default(128)
- `--merge`: merge multiple GGUF to a single GGUF. - `--merge`: merge multiple GGUF to a single GGUF.

View file

@ -59,10 +59,10 @@ static size_t split_str_to_n_bytes(std::string str) {
int n; int n;
if (str.back() == 'M') { if (str.back() == 'M') {
sscanf(str.c_str(), "%d", &n); sscanf(str.c_str(), "%d", &n);
n_bytes = n * 1024 * 1024; // megabytes n_bytes = (size_t)n * 1024 * 1024; // megabytes
} else if (str.back() == 'G') { } else if (str.back() == 'G') {
sscanf(str.c_str(), "%d", &n); sscanf(str.c_str(), "%d", &n);
n_bytes = n * 1024 * 1024 * 1024; // gigabytes n_bytes = (size_t)n * 1024 * 1024 * 1024; // gigabytes
} else { } else {
throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back())); throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back()));
} }

View file

@ -0,0 +1,89 @@
#!/bin/bash
set -eu
if [ $# -lt 1 ]
then
echo "usage: $0 path_to_build_binary [path_to_temp_folder]"
echo "example: $0 ../../build/bin ../../tmp"
exit 1
fi
if [ $# -gt 1 ]
then
TMP_DIR=$2
else
TMP_DIR=/tmp
fi
set -x
SPLIT=$1/gguf-split
MAIN=$1/main
WORK_PATH=$TMP_DIR/gguf-split
CUR_DIR=$(pwd)
mkdir -p "$WORK_PATH"
# Clean up in case of previously failed test
rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
# 1. Get a model
(
cd $WORK_PATH
"$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
)
echo PASS
# 2. Split with max tensors strategy
$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
echo PASS
echo
# 2b. Test the sharded model is loading properly
$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --random-prompt --n-predict 32
echo PASS
echo
# 3. Merge
$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf
echo PASS
echo
# 3b. Test the merged model is loading properly
$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32
echo PASS
echo
# 4. Split with no tensor in metadata
#$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors
#echo PASS
#echo
# 4b. Test the sharded model is loading properly
#$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf --random-prompt --n-predict 32
#echo PASS
#echo
# 5. Merge
#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf
#echo PASS
#echo
# 5b. Test the merged model is loading properly
#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32
#echo PASS
#echo
# 6. Split with size strategy
$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G
echo PASS
echo
# 6b. Test the sharded model is loading properly
$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32
echo PASS
echo
# Clean up
rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf

View file

@ -6,37 +6,94 @@ import re
import sys import sys
from typing import Any, Dict, List, Set, Tuple, Union from typing import Any, Dict, List, Set, Tuple, Union
def _build_repetition(item_rule, min_items, max_items, separator_rule=None, item_rule_is_literal=False):
if not separator_rule:
if min_items == 0 and max_items == 1:
return f'{item_rule}?'
elif min_items == 1 and max_items is None:
return f'{item_rule}+'
result = ''
if min_items > 0:
if item_rule_is_literal and separator_rule is None:
result = '"' + (item_rule[1:-1] * min_items) + '"'
else:
result = (f' {separator_rule} ' if separator_rule else ' ').join([item_rule] * min_items)
def opt_repetitions(up_to_n, prefix_with_sep=False):
'''
- n=4, no sep: '(a (a (a (a)?)?)?)?'
- n=4, sep=',', prefix: '("," a ("," a ("," a ("," a)?)?)?)?'
- n=4, sep=',', no prefix: '(a ("," a ("," a ("," a)?)?)?)?'
'''
content = f'{separator_rule} {item_rule}' if prefix_with_sep and separator_rule else item_rule
if up_to_n == 0:
return ''
elif up_to_n == 1:
return f'({content})?'
elif separator_rule and not prefix_with_sep:
return f'({content} {opt_repetitions(up_to_n - 1, prefix_with_sep=True)})?'
else:
return (f'({content} ' * up_to_n).rstrip() + (')?' * up_to_n)
if min_items > 0 and max_items != min_items:
result += ' '
if max_items is not None:
result += opt_repetitions(max_items - min_items, prefix_with_sep=min_items > 0)
else:
item_operator = f'({separator_rule + " " if separator_rule else ""}{item_rule})'
if min_items == 0 and separator_rule:
result = f'({item_rule} {item_operator}*)?'
else:
result += f'{item_operator}*'
return result
class BuiltinRule:
def __init__(self, content: str, deps: list = None):
self.content = content
self.deps = deps or []
_up_to_15_digits = _build_repetition('[0-9]', 0, 15)
# whitespace is constrained to a single space char to prevent model "running away" in # whitespace is constrained to a single space char to prevent model "running away" in
# whitespace. Also maybe improves generation quality? # whitespace. Also maybe improves generation quality?
SPACE_RULE = '" "?' SPACE_RULE = '" "?'
PRIMITIVE_RULES = { PRIMITIVE_RULES = {
'boolean': '("true" | "false") space', 'boolean' : BuiltinRule('("true" | "false") space', []),
'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space', 'decimal-part' : BuiltinRule('[0-9] ' + _up_to_15_digits, []),
'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space', 'integral-part': BuiltinRule('[0-9] | [1-9] ' + _up_to_15_digits, []),
'value' : 'object | array | string | number | boolean', 'number' : BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']),
'object' : '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', 'integer' : BuiltinRule('("-"? integral-part) space', ['integral-part']),
'array' : '"[" space ( value ("," space value)* )? "]" space', 'value' : BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
'uuid' : '"\\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + ' "\\"" space', 'object' : BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
'string': r''' "\"" ( 'array' : BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
[^"\\] | 'uuid' : BuiltinRule(r'"\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + r' "\"" space', []),
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) 'char' : BuiltinRule(r'[^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])', []),
)* "\"" space''', 'string' : BuiltinRule(r'"\"" char* "\"" space', ['char']),
'null': '"null" space', 'null' : BuiltinRule('"null" space', []),
} }
OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value']
# TODO: support "uri", "email" string formats # TODO: support "uri", "email" string formats
DATE_RULES = { STRING_FORMAT_RULES = {
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', 'date' : BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', 'time' : BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
'date-time': 'date "T" time', 'date-time' : BuiltinRule('date "T" time', ['date', 'time']),
'date-string': '"\\"" date "\\"" space', 'date-string' : BuiltinRule('"\\"" date "\\"" space', ['date']),
'time-string': '"\\"" time "\\"" space', 'time-string' : BuiltinRule('"\\"" time "\\"" space', ['time']),
'date-time-string': '"\\"" date-time "\\"" space', 'date-time-string': BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
} }
RESERVED_NAMES = set(["root", *PRIMITIVE_RULES.keys(), *DATE_RULES.keys()]) DOTALL = '[\\U00000000-\\U0010FFFF]'
DOT = '[^\\x0A\\x0D]'
RESERVED_NAMES = set(["root", "dot", *PRIMITIVE_RULES.keys(), *STRING_FORMAT_RULES.keys()])
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+') INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]') GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
@ -46,8 +103,6 @@ GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']'
NON_LITERAL_SET = set('|.()[]{}*+?') NON_LITERAL_SET = set('|.()[]{}*+?')
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?') ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])'
TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits
class SchemaConverter: class SchemaConverter:
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern): def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
@ -55,7 +110,9 @@ class SchemaConverter:
self._allow_fetch = allow_fetch self._allow_fetch = allow_fetch
self._dotall = dotall self._dotall = dotall
self._raw_pattern = raw_pattern self._raw_pattern = raw_pattern
self._rules = {'space': SPACE_RULE} self._rules = {
'space': SPACE_RULE,
}
self._refs = {} self._refs = {}
self._refs_being_resolved = set() self._refs_being_resolved = set()
@ -65,6 +122,29 @@ class SchemaConverter:
) )
return f'"{escaped}"' return f'"{escaped}"'
def not_literal(self, literal: str, dotall: bool = True, maybe_escaped_underscores = False) -> str:
'''
not_literal('a') -> '[^a]'
not_literal('abc') -> '([^a] | "a" ([^b] | "b" ([^c])?)?)?'
'''
assert len(literal) > 0, 'Empty literal not supported'
def recurse(i: int):
c = literal[i]
if maybe_escaped_underscores and c == '_':
yield f'[^{c}\\\\]'
yield ' | '
yield f'"\\\\"? "{c}"'
else:
yield f'[^{c}]'
if i < len(literal) - 1:
yield ' | '
yield self._format_literal(c)
yield ' ('
yield from recurse(i + 1)
yield ')?'
return ''.join(('(', *recurse(0), ')'))
def _add_rule(self, name, rule): def _add_rule(self, name, rule):
esc_name = INVALID_RULE_CHARS_RE.sub('-', name) esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
if esc_name not in self._rules or self._rules[esc_name] == rule: if esc_name not in self._rules or self._rules[esc_name] == rule:
@ -169,10 +249,10 @@ class SchemaConverter:
def get_dot(): def get_dot():
if self._dotall: if self._dotall:
rule = '[\\U00000000-\\U0010FFFF]' rule = DOTALL
else: else:
# Accept any character... except \n and \r line break chars (\x0A and \xOD) # Accept any character... except \n and \r line break chars (\x0A and \xOD)
rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]' rule = DOT
return self._add_rule(f'dot', rule) return self._add_rule(f'dot', rule)
def join_seq(): def join_seq():
@ -246,13 +326,6 @@ class SchemaConverter:
(sub, sub_is_literal) = seq[-1] (sub, sub_is_literal) = seq[-1]
if min_times == 0 and max_times is None:
seq[-1] = (f'{sub}*', False)
elif min_times == 0 and max_times == 1:
seq[-1] = (f'{sub}?', False)
elif min_times == 1 and max_times is None:
seq[-1] = (f'{sub}+', False)
else:
if not sub_is_literal: if not sub_is_literal:
id = sub_rule_ids.get(sub) id = sub_rule_ids.get(sub)
if id is None: if id is None:
@ -260,12 +333,7 @@ class SchemaConverter:
sub_rule_ids[sub] = id sub_rule_ids[sub] = id
sub = id sub = id
seq[-1] = ( seq[-1] = (_build_repetition(f'"{sub}"' if sub_is_literal else sub, min_times, max_times, item_rule_is_literal=sub_is_literal), False)
' '.join(
([f'"{sub[1:-1] * min_times}"'] if sub_is_literal else [sub] * min_times) +
([f'{sub}?'] * (max_times - min_times) if max_times is not None else [f'{sub}*'])),
False
)
else: else:
literal = '' literal = ''
while i < length: while i < length:
@ -373,49 +441,47 @@ class SchemaConverter:
' "]" space') ' "]" space')
else: else:
item_rule_name = self.visit(items, f'{name}{"-" if name else ""}item') item_rule_name = self.visit(items, f'{name}{"-" if name else ""}item')
list_item_operator = f'( "," space {item_rule_name} )'
successive_items = ""
min_items = schema.get("minItems", 0) min_items = schema.get("minItems", 0)
max_items = schema.get("maxItems") max_items = schema.get("maxItems")
if min_items > 0: return self._add_rule(rule_name, '"[" space ' + _build_repetition(item_rule_name, min_items, max_items, separator_rule='"," space') + ' "]" space')
successive_items = list_item_operator * (min_items - 1)
min_items -= 1
if max_items is not None and max_items > min_items:
successive_items += (list_item_operator + "?") * (max_items - min_items - 1)
else:
successive_items += list_item_operator + "*"
if min_items == 0:
rule = f'"[" space ( {item_rule_name} {successive_items} )? "]" space'
else:
rule = f'"[" space {item_rule_name} {successive_items} "]" space'
return self._add_rule(rule_name, rule)
elif schema_type in (None, 'string') and 'pattern' in schema: elif schema_type in (None, 'string') and 'pattern' in schema:
return self._visit_pattern(schema['pattern'], rule_name) return self._visit_pattern(schema['pattern'], rule_name)
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''): elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''):
return self._add_rule( return self._add_primitive(
'root' if rule_name == 'root' else schema_format, 'root' if rule_name == 'root' else schema_format,
PRIMITIVE_RULES['uuid'] PRIMITIVE_RULES['uuid']
) )
elif schema_type in (None, 'string') and schema_format in DATE_RULES: elif schema_type in (None, 'string') and f'{schema_format}-string' in STRING_FORMAT_RULES:
for t, r in DATE_RULES.items(): prim_name = f'{schema_format}-string'
self._add_rule(t, r) return self._add_rule(rule_name, self._add_primitive(prim_name, STRING_FORMAT_RULES[prim_name]))
return schema_format + '-string'
elif schema_type == 'string' and ('minLength' in schema or 'maxLength' in schema):
char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
min_len = schema.get('minLength', 0)
max_len = schema.get('maxLength')
return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space')
elif (schema_type == 'object') or (len(schema) == 0): elif (schema_type == 'object') or (len(schema) == 0):
for n in OBJECT_RULE_NAMES: return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
self._add_rule(n, PRIMITIVE_RULES[n])
return self._add_rule(rule_name, 'object')
else: else:
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}' assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero # TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
return self._add_rule( return self._add_primitive('root' if rule_name == 'root' else schema_type, PRIMITIVE_RULES[schema_type])
'root' if rule_name == 'root' else schema_type,
PRIMITIVE_RULES[schema_type] def _add_primitive(self, name: str, rule: BuiltinRule):
) n = self._add_rule(name, rule.content)
for dep in rule.deps:
dep_rule = PRIMITIVE_RULES.get(dep) or STRING_FORMAT_RULES.get(dep)
assert dep_rule, f'Rule {dep} not known'
if dep not in self._rules:
self._add_primitive(dep, dep_rule)
return n
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]): def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
prop_order = self._prop_order prop_order = self._prop_order
@ -437,7 +503,7 @@ class SchemaConverter:
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
prop_kv_rule_names["*"] = self._add_rule( prop_kv_rule_names["*"] = self._add_rule(
f'{sub_name}-kv', f'{sub_name}-kv',
self._add_rule('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}' self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
) )
optional_props.append("*") optional_props.append("*")

View file

@ -304,10 +304,12 @@ These options help improve the performance and memory usage of the LLaMA models.
- `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. **Note**: Restoring a cached prompt does not imply restoring the exact state of the session at the point it was saved. So even when specifying a specific seed, you are not guaranteed to get the same sequence of tokens as the original generation. - `--prompt-cache FNAME`: Specify a file to cache the model state after the initial prompt. This can significantly speed up the startup time when you're using longer prompts. The file is created during the first run and is reused and updated in subsequent runs. **Note**: Restoring a cached prompt does not imply restoring the exact state of the session at the point it was saved. So even when specifying a specific seed, you are not guaranteed to get the same sequence of tokens as the original generation.
### Grammars ### Grammars & JSON schemas
- `--grammar GRAMMAR`, `--grammar-file FILE`: Specify a grammar (defined inline or in a file) to constrain model output to a specific format. For example, you could force the model to output JSON or to speak only in emojis. See the [GBNF guide](../../grammars/README.md) for details on the syntax. - `--grammar GRAMMAR`, `--grammar-file FILE`: Specify a grammar (defined inline or in a file) to constrain model output to a specific format. For example, you could force the model to output JSON or to speak only in emojis. See the [GBNF guide](../../grammars/README.md) for details on the syntax.
- `--json-schema SCHEMA`: Specify a [JSON schema](https://json-schema.org/) to constrain model output to (e.g. `{}` for any JSON object, or `{"items": {"type": "string", "minLength": 10, "maxLength": 100}, "minItems": 10}` for a JSON array of strings with size constraints). If a schema uses external `$ref`s, you should use `--grammar "$( python examples/json_schema_to_grammar.py myschema.json )"` instead.
### Quantization ### Quantization
For information about 4-bit quantization, which can significantly improve performance and reduce memory usage, please refer to llama.cpp's primary [README](../../README.md#prepare-and-quantize). For information about 4-bit quantization, which can significantly improve performance and reduce memory usage, please refer to llama.cpp's primary [README](../../README.md#prepare-and-quantize).

View file

@ -8,7 +8,7 @@ print(subprocess.check_output(
"python", "python",
os.path.join( os.path.join(
os.path.dirname(os.path.realpath(__file__)), os.path.dirname(os.path.realpath(__file__)),
"json-schema-to-grammar.py"), "json_schema_to_grammar.py"),
*rest, *rest,
"-", "-",
"--raw-pattern", "--raw-pattern",

View file

@ -11,7 +11,7 @@ install(TARGETS ${TARGET} RUNTIME)
target_compile_definitions(${TARGET} PRIVATE target_compile_definitions(${TARGET} PRIVATE
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}> SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
) )
target_link_libraries(${TARGET} PRIVATE common json-schema-to-grammar ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
if (LLAMA_SERVER_SSL) if (LLAMA_SERVER_SSL)
find_package(OpenSSL REQUIRED) find_package(OpenSSL REQUIRED)
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto) target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)

View file

@ -11,6 +11,7 @@ Set of LLM REST APIs and a simple web front end to interact with llama.cpp.
* Continuous batching * Continuous batching
* Multimodal (wip) * Multimodal (wip)
* Monitoring endpoints * Monitoring endpoints
* Schema-constrained JSON response format
The project is under active development, and we are [looking for feedback and contributors](https://github.com/ggerganov/llama.cpp/issues/4216). The project is under active development, and we are [looking for feedback and contributors](https://github.com/ggerganov/llama.cpp/issues/4216).
@ -250,6 +251,8 @@ node index.js
`grammar`: Set grammar for grammar-based sampling. Default: no grammar `grammar`: Set grammar for grammar-based sampling. Default: no grammar
`json_schema`: Set a JSON schema for grammar-based sampling (e.g. `{"items": {"type": "string"}, "minItems": 10, "maxItems": 100}` of a list of strings, or `{}` for any JSON). See [tests](../../tests/test-json-schema-to-grammar.cpp) for supported features. Default: no JSON schema.
`seed`: Set the random number generator (RNG) seed. Default: `-1`, which is a random seed. `seed`: Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.
`ignore_eos`: Ignore end of stream token and continue generating. Default: `false` `ignore_eos`: Ignore end of stream token and continue generating. Default: `false`
@ -365,6 +368,8 @@ Notice that each `probs` is an array of length `n_probs`.
See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such as `mirostat` are supported. See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such as `mirostat` are supported.
The `response_format` parameter supports both plain JSON output (e.g. `{"type": "json_object"}`) and schema-constrained JSON (e.g. `{"type": "json_object", "schema": {"type": "string", "minLength": 10, "maxLength": 100}}`), similar to other OpenAI-inspired API providers.
*Examples:* *Examples:*
You can use either Python `openai` library with appropriate checkpoints: You can use either Python `openai` library with appropriate checkpoints:

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -51,26 +51,6 @@
margin-bottom: 0.5em; margin-bottom: 0.5em;
} }
button, input, textarea, .button, a.button, select {
color: #666;
border: 1px solid #ddd;
border-radius: 4px;
line-height: 1.5em;
padding: 0.25em 0.25em;
text-decoration: none;
font-size: 1.1rem;
}
button {
border: 1px solid #2a8aad;
background: #3584e4;
font-weight: normal;
color: #fff;
}
button:disabled {
background: #9cbce5;
}
#write form { #write form {
margin: 1em 0 0 0; margin: 1em 0 0 0;
display: flex; display: flex;
@ -587,7 +567,7 @@
runCompletion(); runCompletion();
} }
return html` return html`
<div class="right"> <div>
<button onclick=${submit} type="button" disabled=${generating.value}>Start</button> <button onclick=${submit} type="button" disabled=${generating.value}>Start</button>
<button onclick=${stop} disabled=${!generating.value}>Stop</button> <button onclick=${stop} disabled=${!generating.value}>Stop</button>
<button onclick=${reset}>Reset</button> <button onclick=${reset}>Reset</button>

View file

@ -1,33 +1,95 @@
// WARNING: This file was ported from json-schema-to-grammar.py, please fix bugs / add features there first. // WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
const SPACE_RULE = '" "?'; const SPACE_RULE = '" "?';
const PRIMITIVE_RULES = { function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
boolean: '("true" | "false") space', const separatorRule = opts.separatorRule ?? '';
number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space', const itemRuleIsLiteral = opts.itemRuleIsLiteral ?? false
integer: '("-"? ([0-9] | [1-9] [0-9]*)) space',
value: 'object | array | string | number | boolean', if (separatorRule === '') {
object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', if (minItems === 0 && maxItems === 1) {
array: '"[" space ( value ("," space value)* )? "]" space', return `${itemRule}?`;
uuid: '"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space', } else if (minItems === 1 && maxItems === undefined) {
string: ` "\\"" ( return `${itemRule}+`;
[^"\\\\] | }
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) }
)* "\\"" space`,
null: '"null" space', let result = '';
if (minItems > 0) {
if (itemRuleIsLiteral && separatorRule === '') {
result = `"${itemRule.slice(1, -1).repeat(minItems)}"`;
} else {
result = Array.from({ length: minItems }, () => itemRule)
.join(separatorRule !== '' ? ` ${separatorRule} ` : ' ');
}
}
const optRepetitions = (upToN, prefixWithSep=false) => {
const content = separatorRule !== '' && prefixWithSep ? `${separatorRule} ${itemRule}` : itemRule;
if (upToN === 0) {
return '';
} else if (upToN === 1) {
return `(${content})?`;
} else if (separatorRule !== '' && !prefixWithSep) {
return `(${content} ${optRepetitions(upToN - 1, true)})?`;
} else {
return Array.from({ length: upToN }, () => `(${content}`).join(' ').trim() + Array.from({ length: upToN }, () => ')?').join('');
}
};
if (minItems > 0 && maxItems !== minItems) {
result += ' ';
}
if (maxItems !== undefined) {
result += optRepetitions(maxItems - minItems, minItems > 0);
} else {
const itemOperator = `(${separatorRule !== '' ? separatorRule + ' ' : ''}${itemRule})`;
if (minItems === 0 && separatorRule !== '') {
result = `(${itemRule} ${itemOperator}*)?`;
} else {
result += `${itemOperator}*`;
}
}
return result;
}
class BuiltinRule {
constructor(content, deps) {
this.content = content;
this.deps = deps || [];
}
}
const UP_TO_15_DIGITS = _buildRepetition('[0-9]', 0, 15);
const PRIMITIVE_RULES = {
boolean : new BuiltinRule('("true" | "false") space', []),
'decimal-part' : new BuiltinRule('[0-9] ' + UP_TO_15_DIGITS, []),
'integral-part': new BuiltinRule('[0-9] | [1-9] ' + UP_TO_15_DIGITS, []),
number : new BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']),
integer : new BuiltinRule('("-"? integral-part) space', ['integral-part']),
value : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
uuid : new BuiltinRule('"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space', []),
char : new BuiltinRule(`[^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])`, []),
string : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']),
null : new BuiltinRule('"null" space', []),
}; };
const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value'];
// TODO: support "uri", "email" string formats // TODO: support "uri", "email" string formats
const DATE_RULES = { const STRING_FORMAT_RULES = {
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', 'date' : new BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', 'time' : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
'date-time': 'date "T" time', 'date-time' : new BuiltinRule('date "T" time', ['date', 'time']),
'date-string': '"\\"" date "\\"" space', 'date-string' : new BuiltinRule('"\\"" date "\\"" space', ['date']),
'time-string': '"\\"" time "\\"" space', 'time-string' : new BuiltinRule('"\\"" time "\\"" space', ['time']),
'date-time-string': '"\\"" date-time "\\"" space', 'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', ['date-time']),
}; }
const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...DATE_RULES}; const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...STRING_FORMAT_RULES};
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g; const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g; const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
@ -158,7 +220,7 @@ export class SchemaConverter {
rule = '[\\U00000000-\\U0010FFFF]'; rule = '[\\U00000000-\\U0010FFFF]';
} else { } else {
// Accept any character... except \n and \r line break chars (\x0A and \xOD) // Accept any character... except \n and \r line break chars (\x0A and \xOD)
rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]'; rule = '[^\\x0A\\x0D]';
} }
return this._addRule('dot', rule); return this._addRule('dot', rule);
}; };
@ -259,13 +321,6 @@ export class SchemaConverter {
let [sub, subIsLiteral] = seq[seq.length - 1]; let [sub, subIsLiteral] = seq[seq.length - 1];
if (minTimes === 0 && maxTimes === Infinity) {
seq[seq.length - 1] = [`${sub}*`, false];
} else if (minTimes === 0 && maxTimes === 1) {
seq[seq.length - 1] = [`${sub}?`, false];
} else if (minTimes === 1 && maxTimes === Infinity) {
seq[seq.length - 1] = [`${sub}+`, false];
} else {
if (!subIsLiteral) { if (!subIsLiteral) {
let id = subRuleIds[sub]; let id = subRuleIds[sub];
if (id === undefined) { if (id === undefined) {
@ -275,10 +330,10 @@ export class SchemaConverter {
sub = id; sub = id;
} }
const repeatedSub = Array.from({ length: minTimes }, () => subIsLiteral ? `"${sub.slice(1, -1).repeat(minTimes)}"` : sub); seq[seq.length - 1] = [
const optionalSub = maxTimes !== undefined ? Array.from({ length: maxTimes - minTimes }, () => `${sub}?`) : [`${sub}*`]; _buildRepetition(subIsLiteral ? `"${sub}"` : sub, minTimes, maxTimes, {itemRuleIsLiteral: subIsLiteral}),
seq[seq.length - 1] = [repeatedSub.concat(optionalSub).join(' '), false]; false
} ];
} else { } else {
let literal = ''; let literal = '';
while (i < length) { while (i < length) {
@ -394,49 +449,50 @@ export class SchemaConverter {
); );
} else { } else {
const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`); const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`);
const listItemOperator = `( "," space ${itemRuleName} )`; const minItems = schema.minItems || 0;
let successiveItems = '';
let minItems = schema.minItems || 0;
const maxItems = schema.maxItems; const maxItems = schema.maxItems;
if (minItems > 0) { return this._addRule(ruleName, '"[" space ' + _buildRepetition(itemRuleName, minItems, maxItems, {separatorRule: '"," space'}) + ' "]" space');
successiveItems = listItemOperator.repeat(minItems - 1);
minItems--;
}
if (maxItems !== undefined && maxItems > minItems) {
successiveItems += `${listItemOperator}?`.repeat(maxItems - minItems - 1);
} else {
successiveItems += `${listItemOperator}*`;
}
const rule = minItems === 0
? `"[" space ( ${itemRuleName} ${successiveItems} )? "]" space`
: `"[" space ${itemRuleName} ${successiveItems} "]" space`;
return this._addRule(ruleName, rule);
} }
} else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) { } else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
return this._visitPattern(schema.pattern, ruleName); return this._visitPattern(schema.pattern, ruleName);
} else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) { } else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
return this._addRule( return this._addPrimitive(
ruleName === 'root' ? 'root' : schemaFormat, ruleName === 'root' ? 'root' : schemaFormat,
PRIMITIVE_RULES['uuid']) PRIMITIVE_RULES['uuid']
} else if ((schemaType === undefined || schemaType === 'string') && schema.format in DATE_RULES) { );
for (const [t, r] of Object.entries(DATE_RULES)) { } else if ((schemaType === undefined || schemaType === 'string') && `${schema.format}-string` in STRING_FORMAT_RULES) {
this._addRule(t, r); const primName = `${schema.format}-string`
} return this._addRule(ruleName, this._addPrimitive(primName, STRING_FORMAT_RULES[primName]));
return schemaFormat + '-string'; } else if (schemaType === 'string' && ('minLength' in schema || 'maxLength' in schema)) {
const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
const minLen = schema.minLength || 0;
const maxLen = schema.maxLength;
return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) { } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
for (const n of OBJECT_RULE_NAMES) { return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
this._addRule(n, PRIMITIVE_RULES[n]);
}
return this._addRule(ruleName, 'object');
} else { } else {
if (!(schemaType in PRIMITIVE_RULES)) { if (!(schemaType in PRIMITIVE_RULES)) {
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`); throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
} }
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
return this._addRule(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]); return this._addPrimitive(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
} }
} }
_addPrimitive(name, rule) {
let n = this._addRule(name, rule.content);
for (const dep of rule.deps) {
const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep];
if (!depRule) {
throw new Error(`Rule ${dep} not known`);
}
if (!(dep in this._rules)) {
this._addPrimitive(dep, depRule);
}
}
return n;
}
_buildObjectRule(properties, required, name, additionalProperties) { _buildObjectRule(properties, required, name, additionalProperties) {
const propOrder = this._propOrder; const propOrder = this._propOrder;
// sort by position in prop_order (if specified) then by original order // sort by position in prop_order (if specified) then by original order
@ -462,7 +518,7 @@ export class SchemaConverter {
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`); const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
propKvRuleNames['*'] = this._addRule( propKvRuleNames['*'] = this._addRule(
`${subName}-kv`, `${subName}-kv`,
`${this._addRule('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`); `${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
optionalProps.push('*'); optionalProps.push('*');
} }

View file

@ -859,7 +859,7 @@ struct server_context {
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep); slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
// process "json_schema" and "grammar" // process "json_schema" and "grammar"
if (data.contains("json_schema") && data.contains("grammar")) { if (data.contains("json_schema") && !data["json_schema"].is_null() && data.contains("grammar") && !data["grammar"].is_null()) {
send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST); send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST);
return false; return false;
} else if (data.contains("json_schema") && !data.contains("grammar")) { } else if (data.contains("json_schema") && !data.contains("grammar")) {

View file

@ -20,4 +20,4 @@ cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
#cmake --build . --config Release --target llama-bench #cmake --build . --config Release --target llama-bench
#build all binary #build all binary
cmake --build . --config Release -v cmake --build . --config Release -j -v

View file

@ -12,6 +12,7 @@ if [ $# -gt 0 ]; then
GGML_SYCL_SINGLE_GPU=1 GGML_SYCL_SINGLE_GPU=1
else else
GGML_SYCL_DEVICE=0 GGML_SYCL_DEVICE=0
GGML_SYCL_SINGLE_GPU=0
fi fi
#export GGML_SYCL_DEBUG=1 #export GGML_SYCL_DEBUG=1

View file

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# #
# ./examples/ts-type-to-grammar.sh "{a:string,b:string,c?:string}" # ./examples/ts-type-to-grammar.sh "{a:string,b:string,c?:string}"
# python examples/json-schema-to-grammar.py https://json.schemastore.org/tsconfig.json # python examples/json_schema_to_grammar.py https://json.schemastore.org/tsconfig.json
# #
set -euo pipefail set -euo pipefail
@ -25,4 +25,4 @@ npx ts-json-schema-generator --unstable --no-top-ref --path "$DTS_FILE" --type M
# https://github.com/YousefED/typescript-json-schema # https://github.com/YousefED/typescript-json-schema
# npx typescript-json-schema --defaultProps --required "$DTS_FILE" MyType | tee "$SCHEMA_FILE" >&2 # npx typescript-json-schema --defaultProps --required "$DTS_FILE" MyType | tee "$SCHEMA_FILE" >&2
./examples/json-schema-to-grammar.py "$SCHEMA_FILE" ./examples/json_schema_to_grammar.py "$SCHEMA_FILE"

6
flake.lock generated
View file

@ -20,11 +20,11 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1712163089, "lastModified": 1712791164,
"narHash": "sha256-Um+8kTIrC19vD4/lUCN9/cU9kcOsD1O1m+axJqQPyMM=", "narHash": "sha256-3sbWO1mbpWsLepZGbWaMovSO7ndZeFqDSdX0hZ9nVyw=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "fd281bd6b7d3e32ddfa399853946f782553163b5", "rev": "1042fd8b148a9105f3c0aca3a6177fd1d9360ba5",
"type": "github" "type": "github"
}, },
"original": { "original": {

View file

@ -1946,7 +1946,7 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
} else if (!split && !fp16_performance_good && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { } else if (!split && !fp16_performance_good && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) {
// KQV single-batch // KQV single-batch
ggml_cuda_mul_mat_vec_nc(ctx, src0, src1, dst); ggml_cuda_mul_mat_vec_nc(ctx, src0, src1, dst);
} else if (!split && fp16_performance_good && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || fp16_performance_good) && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) {
// KQ + KQV multi-batch // KQ + KQV multi-batch
ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst); ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst);
} else if (use_dequantize_mul_mat_vec) { } else if (use_dequantize_mul_mat_vec) {

View file

@ -37,6 +37,7 @@ enum ggml_metal_kernel_type {
GGML_METAL_KERNEL_TYPE_DIV_ROW, GGML_METAL_KERNEL_TYPE_DIV_ROW,
GGML_METAL_KERNEL_TYPE_SCALE, GGML_METAL_KERNEL_TYPE_SCALE,
GGML_METAL_KERNEL_TYPE_SCALE_4, GGML_METAL_KERNEL_TYPE_SCALE_4,
GGML_METAL_KERNEL_TYPE_CLAMP,
GGML_METAL_KERNEL_TYPE_TANH, GGML_METAL_KERNEL_TYPE_TANH,
GGML_METAL_KERNEL_TYPE_RELU, GGML_METAL_KERNEL_TYPE_RELU,
GGML_METAL_KERNEL_TYPE_GELU, GGML_METAL_KERNEL_TYPE_GELU,
@ -468,6 +469,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIV_ROW, div_row, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_DIV_ROW, div_row, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SCALE, scale, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SCALE, scale, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SCALE_4, scale_4, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SCALE_4, scale_4, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_CLAMP, clamp, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_TANH, tanh, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_TANH, tanh, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RELU, relu, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RELU, relu, true);
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU, gelu, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GELU, gelu, true);
@ -713,6 +715,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const
case GGML_OP_MUL: case GGML_OP_MUL:
case GGML_OP_DIV: case GGML_OP_DIV:
case GGML_OP_SCALE: case GGML_OP_SCALE:
case GGML_OP_CLAMP:
case GGML_OP_SQR: case GGML_OP_SQR:
case GGML_OP_SUM_ROWS: case GGML_OP_SUM_ROWS:
return true; return true;
@ -1152,6 +1155,25 @@ static enum ggml_status ggml_metal_graph_compute(
[encoder setBuffer:id_dst offset:offs_dst atIndex:1]; [encoder setBuffer:id_dst offset:offs_dst atIndex:1];
[encoder setBytes:&scale length:sizeof(scale) atIndex:2]; [encoder setBytes:&scale length:sizeof(scale) atIndex:2];
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
} break;
case GGML_OP_CLAMP:
{
id<MTLComputePipelineState> pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_CLAMP].pipeline;
float min;
float max;
memcpy(&min, ((int32_t *) dst->op_params) + 0, sizeof(float));
memcpy(&max, ((int32_t *) dst->op_params) + 1, sizeof(float));
[encoder setComputePipelineState:pipeline];
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
[encoder setBuffer:id_dst offset:offs_dst atIndex:1];
[encoder setBytes:&min length:sizeof(min) atIndex:2];
[encoder setBytes:&max length:sizeof(max) atIndex:3];
const int64_t n = ggml_nelements(dst);
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
} break; } break;
case GGML_OP_UNARY: case GGML_OP_UNARY:
@ -1926,7 +1948,12 @@ static enum ggml_status ggml_metal_graph_compute(
{ {
nth0 = 4; nth0 = 4;
nth1 = 16; nth1 = 16;
#if QK_K == 64
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_NL_F32].pipeline;
#else
pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_XS_F32].pipeline; pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_ID_IQ4_XS_F32].pipeline;
#endif
} break; } break;
default: default:
{ {

File diff suppressed because it is too large Load diff

View file

@ -3154,7 +3154,6 @@ typedef float (*vec_dot_q_mul_mat_sycl_t)(
#define SYCL_SCALE_BLOCK_SIZE 256 #define SYCL_SCALE_BLOCK_SIZE 256
#define SYCL_CLAMP_BLOCK_SIZE 256 #define SYCL_CLAMP_BLOCK_SIZE 256
#define SYCL_ROPE_BLOCK_SIZE 256 #define SYCL_ROPE_BLOCK_SIZE 256
#define SYCL_SOFT_MAX_BLOCK_SIZE 1024
#define SYCL_ALIBI_BLOCK_SIZE 32 #define SYCL_ALIBI_BLOCK_SIZE 32
#define SYCL_DIAG_MASK_INF_BLOCK_SIZE 32 #define SYCL_DIAG_MASK_INF_BLOCK_SIZE 32
#define SYCL_QUANTIZE_BLOCK_SIZE 256 #define SYCL_QUANTIZE_BLOCK_SIZE 256
@ -13080,11 +13079,13 @@ static void soft_max_f32_sycl(const float * x, const float * mask, const float *
const int nrows_y, const float scale, const float max_bias, const int nrows_y, const float scale, const float max_bias,
dpct::queue_ptr stream) { dpct::queue_ptr stream) {
int nth = WARP_SIZE; int nth = WARP_SIZE;
while (nth < ncols_x && nth < SYCL_SOFT_MAX_BLOCK_SIZE) nth *= 2; int max_block_size = g_work_group_size;
while (nth < ncols_x && nth < max_block_size) nth *= 2;
if (nth>max_block_size) nth = max_block_size;
const sycl::range<3> block_dims(1, 1, nth); const sycl::range<3> block_dims(1, 1, nth);
const sycl::range<3> block_nums(1, 1, nrows_x); const sycl::range<3> block_nums(1, 1, nrows_x);
const size_t n_local_scratch = (GGML_PAD(ncols_x, WARP_SIZE) + WARP_SIZE); const size_t n_local_scratch = (GGML_PAD(ncols_x, WARP_SIZE) + WARP_SIZE);
static_assert(SYCL_SOFT_MAX_BLOCK_SIZE == 1024, "These values need to be adjusted.");
const uint32_t n_head_kv = nrows_x/nrows_y; const uint32_t n_head_kv = nrows_x/nrows_y;
const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head_kv)); const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head_kv));
@ -13094,6 +13095,12 @@ static void soft_max_f32_sycl(const float * x, const float * mask, const float *
const size_t local_mem_size = stream->get_device().get_info<sycl::info::device::local_mem_size>(); const size_t local_mem_size = stream->get_device().get_info<sycl::info::device::local_mem_size>();
if (n_local_scratch*sizeof(float) < local_mem_size) { if (n_local_scratch*sizeof(float) < local_mem_size) {
if (ncols_x > max_block_size) {
soft_max_f32_submitter<true, 0, 0>(x, mask, pos, dst, ncols_x, nrows_y, scale,
max_bias, m0, m1, n_head_log2, block_nums,
block_dims, n_local_scratch, stream);
return;
}
switch (ncols_x) { switch (ncols_x) {
case 32: case 32:
soft_max_f32_submitter<true, 32, 32>(x, mask, pos, dst, ncols_x, nrows_y, scale, soft_max_f32_submitter<true, 32, 32>(x, mask, pos, dst, ncols_x, nrows_y, scale,
@ -15989,73 +15996,76 @@ static void ggml_sycl_mul_mat_id_sycl(ggml_tensor * dst) {
static void ggml_sycl_mul_mat_id(const ggml_tensor *src0, static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
const ggml_tensor *src1, const ggml_tensor *src1,
ggml_tensor *dst) try { ggml_tensor *dst) try {
#if 0 GGML_ASSERT(src0->backend != GGML_BACKEND_TYPE_GPU_SPLIT &&
ggml_sycl_mul_mat_id_sycl(dst); "mul_mat_id does not support split buffers");
// TODO: mmq/mmv support const ggml_tensor *ids = dst->src[2];
#endif
const int64_t nb11 = src1->nb[1];
const int64_t nb1 = dst->nb[1];
const struct ggml_tensor * ids = src0;
const int32_t id = ((int32_t *) dst->op_params)[0];
const int32_t n_as = ((int32_t *) dst->op_params)[1];
std::vector<char> ids_host(ggml_nbytes(ids));
const dpct::queue_ptr stream = g_syclStreams[g_main_device][0]; const dpct::queue_ptr stream = g_syclStreams[g_main_device][0];
if (ids->backend == GGML_BACKEND_TYPE_GPU) { const size_t nb11 = src1->nb[1];
const char * ids_dev = (const char *)((const ggml_tensor_extra_gpu *)ids->extra)->data_device[g_main_device]; const size_t nb1 = dst->nb[1];
const int32_t id = ((int32_t *)dst->op_params)[0];
const int32_t n_as = src0->ne[2];
std::vector<char> ids_host(ggml_nbytes(ids));
const char *ids_dev = (const char *)ids->data;
SYCL_CHECK(CHECK_TRY_ERROR( SYCL_CHECK(CHECK_TRY_ERROR(
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids)).wait())); stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
// SYCL_CHECK(CHECK_TRY_ERROR(stream->wait())); SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
} else {
memcpy(ids_host.data(), ids->data, ggml_nbytes(ids));
}
const ggml_tensor_extra_gpu * src1_extra = (const ggml_tensor_extra_gpu *) src1->extra; const ggml_tensor_extra_gpu *src0_extra =
const ggml_tensor_extra_gpu * dst_extra = (const ggml_tensor_extra_gpu *) dst->extra; (const ggml_tensor_extra_gpu *)src0->extra;
const ggml_tensor_extra_gpu *src1_extra =
(const ggml_tensor_extra_gpu *)src1->extra;
const ggml_tensor_extra_gpu *dst_extra =
(const ggml_tensor_extra_gpu *)dst->extra;
ggml_tensor_extra_gpu src0_row_extra;
ggml_tensor_extra_gpu src1_row_extra; ggml_tensor_extra_gpu src1_row_extra;
ggml_tensor_extra_gpu dst_row_extra; ggml_tensor_extra_gpu dst_row_extra;
ggml_tensor src0_row = *src0;
ggml_tensor src1_row = *src1; ggml_tensor src1_row = *src1;
ggml_tensor dst_row = *dst; ggml_tensor dst_row = *dst;
src1_row.backend = GGML_BACKEND_TYPE_GPU; src1_row.backend = GGML_BACKEND_TYPE_GPU;
dst_row.backend = GGML_BACKEND_TYPE_GPU; dst_row.backend = GGML_BACKEND_TYPE_GPU;
src0_row.extra = &src0_row_extra;
src1_row.extra = &src1_row_extra; src1_row.extra = &src1_row_extra;
dst_row.extra = &dst_row_extra; dst_row.extra = &dst_row_extra;
char * src1_original = src1->backend == GGML_BACKEND_TYPE_CPU ? char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
(char *) src1->data : (char *) src1_extra->data_device[g_main_device]; ? (char *)src0->data
char * dst_original = dst->backend == GGML_BACKEND_TYPE_CPU ? : (char *)src0_extra->data_device[g_main_device];
(char *) dst->data : (char *) dst_extra->data_device[g_main_device]; char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
? (char *)src1->data
: (char *)src1_extra->data_device[g_main_device];
char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
? (char *)dst->data
: (char *)dst_extra->data_device[g_main_device];
src0_row.ne[2] = 1;
src0_row.ne[3] = 1;
src0_row.nb[3] = src0->nb[2];
if (src1->ne[1] == 1) { if (src1->ne[1] == 1) {
GGML_ASSERT(src1->backend == GGML_BACKEND_TYPE_GPU);
GGML_ASSERT(dst->backend == GGML_BACKEND_TYPE_GPU);
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) { for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
//int32_t row_id; const int32_t row_id =
//SYCL_CHECK(syclMemcpyAsync(&row_id, ids_dev + i01*ids->nb[1] + id*ids->nb[0], sizeof(int32_t), syclMemcpyDeviceToHost, g_syclStreams[g_main_device][0])); *(const int32_t *)(ids_host.data() + i01 * ids->nb[1] +
//SYCL_CHECK(syclStreamSynchronize(g_syclStreams[g_main_device][0])); id * ids->nb[0]);
const int32_t row_id = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
GGML_ASSERT(row_id >= 0 && row_id < n_as); GGML_ASSERT(row_id >= 0 && row_id < n_as);
const struct ggml_tensor * src0_row = dst->src[row_id + 2]; src0_row_extra.data_device[g_main_device] =
src0_original + row_id * src0->nb[2];
src1_row_extra.data_device[g_main_device] =
src1_original + i01 * src1->nb[1];
dst_row_extra.data_device[g_main_device] =
dst_original + i01 * dst->nb[1];
src1_row_extra.data_device[g_main_device] = src1_original + i01*src1->nb[1]; ggml_sycl_mul_mat(&src0_row, &src1_row, &dst_row);
src1_row.data = (char *) src1->data + i01*src1->nb[1]; // TODO why is this set?
dst_row_extra.data_device[g_main_device] = dst_original + i01*dst->nb[1];
dst_row.data = (char *) dst->data + i01*dst->nb[1]; // TODO why is this set?
ggml_sycl_mul_mat(src0_row, &src1_row, &dst_row);
} }
} else { } else {
sycl_pool_alloc<char> src1_contiguous(sizeof(float)*ggml_nelements(src1)); sycl_pool_alloc<char> src1_contiguous(sizeof(float)*ggml_nelements(src1));
@ -16065,8 +16075,6 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
dst_row_extra.data_device[g_main_device] = dst_contiguous.get(); dst_row_extra.data_device[g_main_device] = dst_contiguous.get();
for (int32_t row_id = 0; row_id < n_as; ++row_id) { for (int32_t row_id = 0; row_id < n_as; ++row_id) {
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
int64_t num_src1_rows = 0; int64_t num_src1_rows = 0;
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) { for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
const int32_t row_id_i = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]); const int32_t row_id_i = *(const int32_t *) (ids_host.data() + i01*ids->nb[1] + id*ids->nb[0]);
@ -16079,7 +16087,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
SYCL_CHECK(CHECK_TRY_ERROR( SYCL_CHECK(CHECK_TRY_ERROR(
stream->memcpy(src1_contiguous.get() + num_src1_rows * nb11, stream->memcpy(src1_contiguous.get() + num_src1_rows * nb11,
src1_original + i01 * nb11, nb11).wait())); src1_original + i01 * nb11, nb11)));
num_src1_rows++; num_src1_rows++;
} }
@ -16087,6 +16095,9 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
continue; continue;
} }
src0_row_extra.data_device[g_main_device] =
src0_original + row_id * src0->nb[2];
src1_row.ne[1] = num_src1_rows; src1_row.ne[1] = num_src1_rows;
dst_row.ne[1] = num_src1_rows; dst_row.ne[1] = num_src1_rows;
@ -16098,7 +16109,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
dst_row.nb[2] = num_src1_rows*nb1; dst_row.nb[2] = num_src1_rows*nb1;
dst_row.nb[3] = num_src1_rows*nb1; dst_row.nb[3] = num_src1_rows*nb1;
ggml_sycl_mul_mat(src0_row, &src1_row, &dst_row); ggml_sycl_mul_mat(&src0_row, &src1_row, &dst_row);
num_src1_rows = 0; num_src1_rows = 0;
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) { for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
@ -16112,7 +16123,7 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy( SYCL_CHECK(CHECK_TRY_ERROR(stream->memcpy(
dst_original + i01 * nb1, dst_original + i01 * nb1,
dst_contiguous.get() + num_src1_rows * nb1, nb1).wait())); dst_contiguous.get() + num_src1_rows * nb1, nb1)));
num_src1_rows++; num_src1_rows++;
} }
} }
@ -16814,11 +16825,13 @@ static void ggml_backend_sycl_buffer_set_tensor(ggml_backend_buffer_t buffer,
const dpct::queue_ptr stream = g_syclStreams[ctx->device][0]; const dpct::queue_ptr stream = g_syclStreams[ctx->device][0];
SYCL_CHECK( SYCL_CHECK(
CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw())); CHECK_TRY_ERROR(dpct::dev_mgr::instance().get_device(ctx->device).queues_wait_and_throw()));
char* host_buf = (char*)malloc(size);
memcpy(host_buf, data, size);
SYCL_CHECK( SYCL_CHECK(
CHECK_TRY_ERROR((*stream) CHECK_TRY_ERROR((*stream)
.memcpy((char *)tensor->data + offset, data, size) .memcpy((char *)tensor->data + offset, host_buf, size)
.wait())); .wait()));
free(host_buf);
} }
catch (sycl::exception const &exc) { catch (sycl::exception const &exc) {
std::cerr << exc.what() << "Exception caught at file:" << __FILE__ std::cerr << exc.what() << "Exception caught at file:" << __FILE__

1
ggml.c
View file

@ -11012,7 +11012,6 @@ static void ggml_compute_forward_mul_mat_id(
} }
// initialize matrix_row_counts // initialize matrix_row_counts
GGML_ASSERT(wdata == wdata_src1_end);
memset(matrix_row_counts, 0, n_as*sizeof(int64_t)); memset(matrix_row_counts, 0, n_as*sizeof(int64_t));
// group rows by src0 matrix // group rows by src0 matrix

View file

@ -126,6 +126,7 @@ class MODEL_ARCH(IntEnum):
MAMBA = auto() MAMBA = auto()
XVERSE = auto() XVERSE = auto()
COMMAND_R = auto() COMMAND_R = auto()
DBRX = auto()
class MODEL_TENSOR(IntEnum): class MODEL_TENSOR(IntEnum):
@ -195,6 +196,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.MAMBA: "mamba", MODEL_ARCH.MAMBA: "mamba",
MODEL_ARCH.XVERSE: "xverse", MODEL_ARCH.XVERSE: "xverse",
MODEL_ARCH.COMMAND_R: "command-r", MODEL_ARCH.COMMAND_R: "command-r",
MODEL_ARCH.DBRX: "dbrx",
} }
TENSOR_NAMES: dict[MODEL_TENSOR, str] = { TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@ -642,6 +644,19 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.ATTN_K_NORM, MODEL_TENSOR.ATTN_K_NORM,
MODEL_TENSOR.ATTN_Q_NORM, MODEL_TENSOR.ATTN_Q_NORM,
], ],
MODEL_ARCH.DBRX: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_OUT_NORM,
MODEL_TENSOR.FFN_GATE_INP,
MODEL_TENSOR.FFN_GATE_EXP,
MODEL_TENSOR.FFN_DOWN_EXP,
MODEL_TENSOR.FFN_UP_EXP,
],
# TODO # TODO
} }

View file

@ -10,7 +10,7 @@ class TensorNameMap:
# Token embeddings # Token embeddings
MODEL_TENSOR.TOKEN_EMBD: ( MODEL_TENSOR.TOKEN_EMBD: (
"gpt_neox.embed_in", # gptneox "gpt_neox.embed_in", # gptneox
"transformer.wte", # gpt2 gpt-j mpt refact qwen "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
"transformer.word_embeddings", # falcon "transformer.word_embeddings", # falcon
"word_embeddings", # bloom "word_embeddings", # bloom
"model.embed_tokens", # llama-hf "model.embed_tokens", # llama-hf
@ -48,7 +48,7 @@ class TensorNameMap:
# Output # Output
MODEL_TENSOR.OUTPUT: ( MODEL_TENSOR.OUTPUT: (
"embed_out", # gptneox "embed_out", # gptneox
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
"output", # llama-pth bloom internlm2 "output", # llama-pth bloom internlm2
"word_embeddings_for_head", # persimmon "word_embeddings_for_head", # persimmon
"lm_head.linear", # phi2 "lm_head.linear", # phi2
@ -60,7 +60,7 @@ class TensorNameMap:
"transformer.ln_f", # gpt2 gpt-j falcon "transformer.ln_f", # gpt2 gpt-j falcon
"model.norm", # llama-hf baichuan internlm2 "model.norm", # llama-hf baichuan internlm2
"norm", # llama-pth "norm", # llama-pth
"transformer.norm_f", # mpt "transformer.norm_f", # mpt dbrx
"ln_f", # refact bloom qwen gpt2 "ln_f", # refact bloom qwen gpt2
"language_model.encoder.final_layernorm", # persimmon "language_model.encoder.final_layernorm", # persimmon
"model.final_layernorm", # persimmon "model.final_layernorm", # persimmon
@ -96,6 +96,7 @@ class TensorNameMap:
"model.layers.{bid}.norm", # mamba-qbert "model.layers.{bid}.norm", # mamba-qbert
"backbone.layers.{bid}.norm", # mamba "backbone.layers.{bid}.norm", # mamba
"transformer.decoder_layer.{bid}.rms_norm", # Grok "transformer.decoder_layer.{bid}.rms_norm", # Grok
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
), ),
# Attention norm 2 # Attention norm 2
@ -108,6 +109,7 @@ class TensorNameMap:
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
"transformer.h.{bid}.attn.c_attn", # gpt2 qwen "transformer.h.{bid}.attn.c_attn", # gpt2 qwen
"transformer.blocks.{bid}.attn.Wqkv", # mpt "transformer.blocks.{bid}.attn.Wqkv", # mpt
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
"transformer.h.{bid}.self_attention.query_key_value", # falcon "transformer.h.{bid}.self_attention.query_key_value", # falcon
"h.{bid}.self_attention.query_key_value", # bloom "h.{bid}.self_attention.query_key_value", # bloom
"language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
@ -168,7 +170,8 @@ class TensorNameMap:
"model.layers.layers.{bid}.self_attn.o_proj", # plamo "model.layers.layers.{bid}.self_attn.o_proj", # plamo
"model.layers.{bid}.attention.wo", # internlm2 "model.layers.{bid}.attention.wo", # internlm2
"encoder.layers.{bid}.attn.out_proj", # nomic-bert "encoder.layers.{bid}.attn.out_proj", # nomic-bert
"transformer.decoder_layer.{bid}.multi_head_attention.linear"# Grok "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
), ),
# Attention output norm # Attention output norm
@ -176,6 +179,7 @@ class TensorNameMap:
"encoder.layer.{bid}.attention.output.LayerNorm", # bert "encoder.layer.{bid}.attention.output.LayerNorm", # bert
"encoder.layers.{bid}.norm1", # nomic-bert "encoder.layers.{bid}.norm1", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_1", # Grok "transformer.decoder_layer.{bid}.rms_norm_1", # Grok
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
), ),
# Rotary embeddings # Rotary embeddings
@ -204,7 +208,8 @@ class TensorNameMap:
MODEL_TENSOR.FFN_GATE_INP: ( MODEL_TENSOR.FFN_GATE_INP: (
"layers.{bid}.feed_forward.gate", # mixtral "layers.{bid}.feed_forward.gate", # mixtral
"model.layers.{bid}.block_sparse_moe.gate", # mixtral "model.layers.{bid}.block_sparse_moe.gate", # mixtral
"transformer.decoder_layer.{bid}.router" # Grok "transformer.decoder_layer.{bid}.router", # Grok
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
), ),
# Feed-forward up # Feed-forward up
@ -233,6 +238,7 @@ class TensorNameMap:
MODEL_TENSOR.FFN_UP_EXP: ( MODEL_TENSOR.FFN_UP_EXP: (
"layers.{bid}.feed_forward.experts.w3", # mixtral (merged) "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
), ),
# AWQ-activation gate # AWQ-activation gate
@ -252,7 +258,8 @@ class TensorNameMap:
MODEL_TENSOR.FFN_GATE_EXP: ( MODEL_TENSOR.FFN_GATE_EXP: (
"layers.{bid}.feed_forward.experts.w1", # mixtral (merged) "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear" # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
), ),
# Feed-forward down # Feed-forward down
@ -280,6 +287,7 @@ class TensorNameMap:
MODEL_TENSOR.FFN_DOWN_EXP: ( MODEL_TENSOR.FFN_DOWN_EXP: (
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged) "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged) "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
"transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
), ),
MODEL_TENSOR.ATTN_Q_NORM: ( MODEL_TENSOR.ATTN_Q_NORM: (

View file

@ -93,3 +93,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory
``` ```
./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt' ./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt'
``` ```
## Troubleshooting
Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218).
### Efficient optional repetitions
A common pattern is to allow repetitions of a pattern `x` up to N times.
While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting)

357
llama.cpp
View file

@ -105,7 +105,7 @@
#endif #endif
#define LLAMA_MAX_NODES 8192 #define LLAMA_MAX_NODES 8192
#define LLAMA_MAX_EXPERTS 8 #define LLAMA_MAX_EXPERTS 16
// //
@ -220,6 +220,7 @@ enum llm_arch {
LLM_ARCH_MAMBA, LLM_ARCH_MAMBA,
LLM_ARCH_XVERSE, LLM_ARCH_XVERSE,
LLM_ARCH_COMMAND_R, LLM_ARCH_COMMAND_R,
LLM_ARCH_DBRX,
LLM_ARCH_UNKNOWN, LLM_ARCH_UNKNOWN,
}; };
@ -252,6 +253,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_MAMBA, "mamba" }, { LLM_ARCH_MAMBA, "mamba" },
{ LLM_ARCH_XVERSE, "xverse" }, { LLM_ARCH_XVERSE, "xverse" },
{ LLM_ARCH_COMMAND_R, "command-r" }, { LLM_ARCH_COMMAND_R, "command-r" },
{ LLM_ARCH_DBRX, "dbrx" },
{ LLM_ARCH_UNKNOWN, "(unknown)" }, { LLM_ARCH_UNKNOWN, "(unknown)" },
}; };
@ -934,6 +936,22 @@ static const std::map<llm_arch, std::map<llm_tensor, std::string>> LLM_TENSOR_NA
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" }, { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
}, },
}, },
{
LLM_ARCH_DBRX,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_ATTN_OUT_NORM, "blk.%d.attn_output_norm" },
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
},
},
{ {
LLM_ARCH_UNKNOWN, LLM_ARCH_UNKNOWN,
{ {
@ -1707,6 +1725,7 @@ enum e_model {
MODEL_XL, MODEL_XL,
MODEL_8x7B, MODEL_8x7B,
MODEL_8x22B, MODEL_8x22B,
MODEL_16x12B,
}; };
static const size_t kiB = 1024; static const size_t kiB = 1024;
@ -3562,6 +3581,7 @@ static const char * llama_model_type_name(e_model type) {
case MODEL_XL: return "1.5B"; case MODEL_XL: return "1.5B";
case MODEL_8x7B: return "8x7B"; case MODEL_8x7B: return "8x7B";
case MODEL_8x22B: return "8x22B"; case MODEL_8x22B: return "8x22B";
case MODEL_16x12B: return "16x12B";
default: return "?B"; default: return "?B";
} }
} }
@ -3983,6 +4003,16 @@ static void llm_load_hparams(
default: model.type = e_model::MODEL_UNKNOWN; default: model.type = e_model::MODEL_UNKNOWN;
} }
} break; } break;
case LLM_ARCH_DBRX:
{
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
ml.get_key(LLM_KV_ATTENTION_CLAMP_KQV, hparams.f_clamp_kqv);
switch (hparams.n_layer) {
case 40: model.type = e_model::MODEL_16x12B; break;
default: model.type = e_model::MODEL_UNKNOWN;
}
} break;
default: (void)0; default: (void)0;
} }
@ -4671,6 +4701,39 @@ static bool llm_load_tensors(
layer.layer_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd}); layer.layer_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd});
} }
} break; } break;
case LLM_ARCH_DBRX:
{
if (n_expert == 0) {
throw std::runtime_error("DBRX model cannot have zero experts");
}
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
// output
{
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
model.output = ml.create_tensor(ctx_output_split, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab});
}
for (int i = 0; i < n_layer; ++i) {
ggml_context * ctx_layer = ctx_for_layer(i);
ggml_context * ctx_split = ctx_for_layer_split(i);
auto & layer = model.layers[i];
layer.attn_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd});
layer.wqkv = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa});
layer.wo = ml.create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd});
layer.attn_out_norm = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd});
layer.ffn_gate_inp = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert});
layer.ffn_gate_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff, n_expert});
layer.ffn_down_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {n_ff, n_embd, n_expert});
layer.ffn_up_exps = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff, n_expert});
}
} break;
case LLM_ARCH_BAICHUAN: case LLM_ARCH_BAICHUAN:
{ {
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
@ -6433,6 +6496,40 @@ struct llm_build_context {
LLM_NORM_RMS, cb, il); LLM_NORM_RMS, cb, il);
cb(cur, "ffn_norm", il); cb(cur, "ffn_norm", il);
cur = build_moe_ffn(cur, n_tokens, LLM_FFN_SILU, il);
}
cur = ggml_add(ctx0, cur, ffn_inp);
cb(cur, "ffn_out", il);
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
if (layer_dir != nullptr) {
cur = ggml_add(ctx0, cur, layer_dir);
}
cb(cur, "l_out", il);
// input for next layer
inpL = cur;
}
cur = inpL;
cur = llm_build_norm(ctx0, cur, hparams,
model.output_norm, NULL,
LLM_NORM_RMS, cb, -1);
cb(cur, "result_norm", -1);
// lm_head
cur = ggml_mul_mat(ctx0, model.output, cur);
cb(cur, "result_output", -1);
ggml_build_forward_expand(gf, cur);
return gf;
}
// REVIEW: will be replaced by https://github.com/ggerganov/llama.cpp/pull/6505
ggml_tensor * build_moe_ffn(ggml_tensor * cur, int32_t n_tokens, llm_ffn_op_type type_op, int il) {
ggml_tensor * logits = ggml_mul_mat(ctx0, model.layers[il].ffn_gate_inp, cur); // [n_tokens, num_experts] ggml_tensor * logits = ggml_mul_mat(ctx0, model.layers[il].ffn_gate_inp, cur); // [n_tokens, num_experts]
cb(logits, "ffn_moe_logits", il); cb(logits, "ffn_moe_logits", il);
@ -6464,13 +6561,25 @@ struct llm_build_context {
ggml_tensor * cur_up = ggml_mul_mat_id(ctx0, model.layers[il].ffn_up_exps, selected_experts, i, cur); ggml_tensor * cur_up = ggml_mul_mat_id(ctx0, model.layers[il].ffn_up_exps, selected_experts, i, cur);
cb(cur_up, "ffn_moe_up", il); cb(cur_up, "ffn_moe_up", il);
ggml_tensor * cur_gate = ggml_mul_mat_id(ctx0, model.layers[il].ffn_gate_exps, selected_experts, i, cur); ggml_tensor * gate = ggml_mul_mat_id(ctx0, model.layers[il].ffn_gate_exps, selected_experts, i, cur);
cb(cur_gate, "ffn_moe_gate", il); cb(gate, "ffn_moe_gate", il);
cur_gate = ggml_silu(ctx0, cur_gate); switch (type_op) {
cb(cur_gate, "ffn_moe_silu", il); case LLM_FFN_SILU:
{
gate = ggml_silu(ctx0, gate);
cb(gate, "ffn_moe_silu", il);
} break;
case LLM_FFN_GELU:
{
gate = ggml_gelu(ctx0, gate);
cb(gate, "ffn_moe_gelu", il);
} break;
default:
GGML_ASSERT(false);
}
cur_expert = ggml_mul(ctx0, cur_up, cur_gate); cur_expert = ggml_mul(ctx0, cur_up, gate);
cb(cur_expert, "ffn_moe_gate_par", il); cb(cur_expert, "ffn_moe_gate_par", il);
cur_expert = ggml_mul_mat_id(ctx0, model.layers[il].ffn_down_exps, selected_experts, i, cur_expert); // [n_tokens, n_embd] cur_expert = ggml_mul_mat_id(ctx0, model.layers[il].ffn_down_exps, selected_experts, i, cur_expert); // [n_tokens, n_embd]
@ -6488,36 +6597,7 @@ struct llm_build_context {
} }
} }
cur = moe_out; return moe_out;
}
cur = ggml_add(ctx0, cur, ffn_inp);
cb(cur, "ffn_out", il);
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
if (layer_dir != nullptr) {
cur = ggml_add(ctx0, cur, layer_dir);
}
cb(cur, "l_out", il);
// input for next layer
inpL = cur;
}
cur = inpL;
cur = llm_build_norm(ctx0, cur, hparams,
model.output_norm, NULL,
LLM_NORM_RMS, cb, -1);
cb(cur, "result_norm", -1);
// lm_head
cur = ggml_mul_mat(ctx0, model.output, cur);
cb(cur, "result_output", -1);
ggml_build_forward_expand(gf, cur);
return gf;
} }
struct ggml_cgraph * build_baichuan() { struct ggml_cgraph * build_baichuan() {
@ -6967,63 +7047,7 @@ struct llm_build_context {
LLM_NORM_RMS, cb, il); LLM_NORM_RMS, cb, il);
cb(cur, "ffn_norm", il); cb(cur, "ffn_norm", il);
ggml_tensor * logits = ggml_mul_mat(ctx0, model.layers[il].ffn_gate_inp, cur); // [n_tokens, num_experts] cur = build_moe_ffn(cur, n_tokens, LLM_FFN_GELU, il);
cb(logits, "ffn_moe_logits", il);
ggml_tensor * probs = ggml_soft_max(ctx0, logits); // [n_tokens, num_experts]
cb(probs, "ffn_moe_probs", il);
// select experts
ggml_tensor * selected_experts = ggml_top_k(ctx0, probs, n_expert_used); // [n_tokens, num_experts_per_tok]
cb(selected_experts->src[0], "ffn_moe_argsort", il);
ggml_tensor * weights = ggml_get_rows(ctx0,
ggml_reshape_3d(ctx0, probs, 1, n_expert, n_tokens), selected_experts);
cb(weights, "ffn_moe_weights", il);
weights = ggml_reshape_2d(ctx0, weights, n_expert_used, n_tokens); // [n_tokens, num_experts_per_tok]
ggml_tensor * weights_sum = ggml_sum_rows(ctx0, weights);
cb(weights_sum, "ffn_moe_weights_sum", il);
weights = ggml_div(ctx0, weights, weights_sum); // [n_tokens, num_experts_per_tok]
cb(weights, "ffn_moe_weights_norm", il);
// compute expert outputs
ggml_tensor * moe_out = nullptr;
for (int i = 0; i < n_expert_used; ++i) {
ggml_tensor * cur_expert;
ggml_tensor * cur_up = ggml_mul_mat_id(ctx0, model.layers[il].ffn_up_exps, selected_experts, i, cur);
cb(cur_up, "ffn_moe_up", il);
ggml_tensor * cur_gate = ggml_mul_mat_id(ctx0, model.layers[il].ffn_gate_exps, selected_experts, i, cur);
cb(cur_gate, "ffn_moe_gate", il);
//GeLU
cur_gate = ggml_gelu(ctx0, cur_gate);
cb(cur_gate, "ffn_moe_gelu", il);
cur_expert = ggml_mul(ctx0, cur_up, cur_gate);
cb(cur_expert, "ffn_moe_gate_par", il);
cur_expert = ggml_mul_mat_id(ctx0, model.layers[il].ffn_down_exps, selected_experts, i, cur_expert); // [n_tokens, n_embd]
cb(cur_expert, "ffn_moe_down", il);
cur_expert = ggml_mul(ctx0, cur_expert,
ggml_view_2d(ctx0, weights, 1, n_tokens, weights->nb[1], i*weights->nb[0]));
cb(cur_expert, "ffn_moe_weighted", il);
if (i == 0) {
moe_out = cur_expert;
} else {
moe_out = ggml_add(ctx0, moe_out, cur_expert);
cb(moe_out, "ffn_moe_out", il);
}
}
cur = moe_out;
// Grok // Grok
// if layer_out_norm is present then apply it before adding the input // if layer_out_norm is present then apply it before adding the input
@ -7071,6 +7095,126 @@ struct llm_build_context {
return gf; return gf;
} }
struct ggml_cgraph * build_dbrx() {
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
// mutable variable, needed during the last layer of the computation to skip unused tokens
int32_t n_tokens = this->n_tokens;
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_head == hparams.n_rot);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
// inp_pos - contains the positions
struct ggml_tensor * inp_pos = build_inp_pos();
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
for (int il = 0; il < n_layer; ++il) {
struct ggml_tensor * inpSA = inpL;
// norm
cur = llm_build_norm(ctx0, inpL, hparams,
model.layers[il].attn_norm, NULL,
LLM_NORM, cb, il);
cb(cur, "attn_norm", il);
// self-attention
{
struct ggml_tensor * Qcur = nullptr;
struct ggml_tensor * Kcur = nullptr;
struct ggml_tensor * Vcur = nullptr;
cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, cur);
cb(cur, "wqkv", il);
cur = ggml_clamp(ctx0, cur, -hparams.f_clamp_kqv, hparams.f_clamp_kqv);
cb(cur, "wqkv_clamped", il);
Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd)));
Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd)));
Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa)));
cb(Qcur, "Qcur", il);
cb(Kcur, "Kcur", il);
cb(Vcur, "Vcur", il);
Qcur = ggml_rope_custom(
ctx0, ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens), inp_pos,
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow
);
cb(Qcur, "Qcur", il);
Kcur = ggml_rope_custom(
ctx0, ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens), inp_pos,
n_rot, rope_type, 0, n_orig_ctx, freq_base, freq_scale,
ext_factor, attn_factor, beta_fast, beta_slow
);
cb(Kcur, "Kcur", il);
cur = llm_build_kv(ctx0, model, hparams, kv_self, gf,
model.layers[il].wo, NULL,
Kcur, Vcur, Qcur, KQ_mask, nullptr, n_ctx, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
}
if (il == n_layer - 1) {
// skip computing output for unused tokens
struct ggml_tensor * inp_out_ids = build_inp_out_ids();
n_tokens = n_outputs;
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
}
struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
cb(ffn_inp, "ffn_inp", il);
// feed-forward network
// MoE branch
cur = llm_build_norm(ctx0, ffn_inp, hparams,
model.layers[il].attn_out_norm, NULL,
LLM_NORM, cb, il);
cb(cur, "attn_out_norm", il);
cur = build_moe_ffn(cur, n_tokens, LLM_FFN_SILU, il);
cur = ggml_add(ctx0, cur, ffn_inp);
cb(cur, "ffn_out", il);
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
if (layer_dir != nullptr) {
cur = ggml_add(ctx0, cur, layer_dir);
}
cb(cur, "l_out", il);
// input for next layer
inpL = cur;
}
cur = inpL;
cur = llm_build_norm(ctx0, cur, hparams,
model.output_norm, NULL,
LLM_NORM, cb, -1);
cb(cur, "result_norm", -1);
// lm_head
cur = ggml_mul_mat(ctx0, model.output, cur);
cb(cur, "result_output", -1);
ggml_build_forward_expand(gf, cur);
return gf;
}
struct ggml_cgraph * build_starcoder() { struct ggml_cgraph * build_starcoder() {
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false); struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, LLAMA_MAX_NODES, false);
@ -9785,6 +9929,10 @@ static struct ggml_cgraph * llama_build_graph(
{ {
result = llm.build_command_r(); result = llm.build_command_r();
} break; } break;
case LLM_ARCH_DBRX:
{
result = llm.build_dbrx();
} break;
default: default:
GGML_ASSERT(false); GGML_ASSERT(false);
} }
@ -12915,6 +13063,11 @@ struct llama_beam_search_data {
} }
llama_logit_info logit_info(ctx); llama_logit_info logit_info(ctx);
std::vector<llama_token_data> next_tokens = logit_info.top_k(n_beams); std::vector<llama_token_data> next_tokens = logit_info.top_k(n_beams);
// Clear the kv slot so that other beams may try different tokens at this position. The llama_decode()
// call in loop() will conclusively fill in the kv slot once the beams converge at this position.
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
size_t i=0; size_t i=0;
if (next_beams.size() < n_beams) { if (next_beams.size() < n_beams) {
for (; next_beams.size() < n_beams ; ++i) { for (; next_beams.size() < n_beams ; ++i) {
@ -14638,6 +14791,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
// the pairs of head values are offset by n_rot/2 // the pairs of head values are offset by n_rot/2
case LLM_ARCH_FALCON: case LLM_ARCH_FALCON:
case LLM_ARCH_GROK: case LLM_ARCH_GROK:
case LLM_ARCH_DBRX:
case LLM_ARCH_PERSIMMON: case LLM_ARCH_PERSIMMON:
case LLM_ARCH_BERT: case LLM_ARCH_BERT:
case LLM_ARCH_NOMIC_BERT: case LLM_ARCH_NOMIC_BERT:
@ -15324,6 +15478,8 @@ size_t llama_state_set_data(struct llama_context * ctx, const uint8_t * src) {
GGML_ASSERT((uint32_t) id < ctx->cparams.n_batch); GGML_ASSERT((uint32_t) id < ctx->cparams.n_batch);
ctx->output_ids[id] = i; ctx->output_ids[id] = i;
} }
ctx->n_outputs = n_outputs;
} }
} }
@ -16476,6 +16632,21 @@ static int32_t llama_chat_apply_template_internal(
if (add_ass) { if (add_ass) {
ss << "### Response:\n"; ss << "### Response:\n";
} }
} else if (tmpl == "command-r" || (tmpl.find("<|START_OF_TURN_TOKEN|>") != std::string::npos && tmpl.find("<|USER_TOKEN|>") != std::string::npos)) {
// CohereForAI/c4ai-command-r-plus
for (auto message : chat) {
std::string role(message->role);
if (role == "system") {
ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
} else if (role == "user") {
ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
} else if (role == "assistant") {
ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
}
}
if (add_ass) {
ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
}
} else { } else {
// template not supported // template not supported
return -1; return -1;

View file

@ -1,10 +1,11 @@
#!/bin/bash #!/bin/bash
wget https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip wget https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
unzip wikitext-2-raw-v1.zip
echo "Usage:" echo "Usage:"
echo "" echo ""
echo " ./perplexity -m model.gguf -f wiki.test.raw [other params]" echo " ./perplexity -m model.gguf -f wikitext-2-raw/wiki.test.raw [other params]"
echo "" echo ""
exit 0 exit 0

View file

@ -25,7 +25,7 @@ function(llama_test source)
add_executable(${TEST_TARGET} ${source} get-model.cpp) add_executable(${TEST_TARGET} ${source} get-model.cpp)
install(TARGETS ${TEST_TARGET} RUNTIME) install(TARGETS ${TEST_TARGET} RUNTIME)
target_link_libraries(${TEST_TARGET} PRIVATE common json-schema-to-grammar) target_link_libraries(${TEST_TARGET} PRIVATE common)
add_test( add_test(
NAME ${TEST_TARGET} NAME ${TEST_TARGET}
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY} WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}

View file

@ -2014,6 +2014,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
for (int n_mats : {2, 4, 8}) { for (int n_mats : {2, 4, 8}) {
for (int id = 0; id < n_mats; id++) { for (int id = 0; id < n_mats; id++) {
for (bool v : {false, true}) { for (bool v : {false, true}) {
test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, id, 16, 1, 256, v));
test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, id, 16, 16, 256, v)); test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, id, 16, 16, 256, v));
} }
} }

View file

@ -45,6 +45,8 @@ int main(void) {
// Orca-Vicuna // Orca-Vicuna
// No template included in tokenizer_config.json, so this template likely needs to be manually set. // No template included in tokenizer_config.json, so this template likely needs to be manually set.
"{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}", "{%- for message in messages %}{%- if message['role'] == 'system' -%}{{-'SYSTEM: ' + message['content'] + '\n' -}}{%- else -%}{%- if message['role'] == 'user' -%}{{-'USER: ' + message['content'] + '\n'-}}{%- else -%}{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{-'ASSISTANT:'-}}{%- endif -%}",
// CohereForAI/c4ai-command-r-plus
"{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}"
}; };
std::vector<std::string> expected_output = { std::vector<std::string> expected_output = {
// teknium/OpenHermes-2.5-Mistral-7B // teknium/OpenHermes-2.5-Mistral-7B
@ -69,6 +71,8 @@ int main(void) {
"You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT: I am an assistant </s>\nUSER: Another question\nASSISTANT:", "You are a helpful assistant\n\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT: I am an assistant </s>\nUSER: Another question\nASSISTANT:",
// Orca-Vicuna // Orca-Vicuna
"SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT: I am an assistant </s>\nUSER: Another question\nASSISTANT:", "SYSTEM: You are a helpful assistant\nUSER: Hello\nASSISTANT: Hi there</s>\nUSER: Who are you\nASSISTANT: I am an assistant </s>\nUSER: Another question\nASSISTANT:",
// CohereForAI/c4ai-command-r-plus
"<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a helpful assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Who are you<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>I am an assistant<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Another question<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
}; };
std::vector<char> formatted_chat(1024); std::vector<char> formatted_chat(1024);
int32_t res; int32_t res;

View file

@ -104,16 +104,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""( R"""(
array ::= "[" space ( value ("," space value)* )? "]" space array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space boolean ::= ("true" | "false") space
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
null ::= "null" space null ::= "null" space
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object root ::= object
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] | value ::= object | array | string | number | boolean | null
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
value ::= object | array | string | number | boolean
)""" )"""
}); });
@ -133,10 +133,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
date-string ::= "\"" date "\"" space date-string ::= "\"" date "\"" space
date-time ::= date "T" time date-time ::= date "T" time
date-time-string ::= "\"" date-time "\"" space date-time-string ::= "\"" date-time "\"" space
root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space
space ::= " "? space ::= " "?
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ) time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
time-string ::= "\"" time "\"" space time-string ::= "\"" time "\"" space
tuple-0 ::= date-string
tuple-2 ::= time-string
tuple-3 ::= date-time-string
uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
)""" )"""
}); });
@ -148,10 +151,65 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"type": "string" "type": "string"
})""", })""",
R"""( R"""(
root ::= "\"" ( char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
[^"\\] | root ::= "\"" char* "\"" space
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) space ::= " "?
)* "\"" space )"""
});
test({
SUCCESS,
"string w/ min length 1",
R"""({
"type": "string",
"minLength": 1
})""",
R"""(
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
root ::= "\"" char+ "\"" space
space ::= " "?
)"""
});
test({
SUCCESS,
"string w/ min length 3",
R"""({
"type": "string",
"minLength": 3
})""",
R"""(
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
root ::= "\"" char char char (char)* "\"" space
space ::= " "?
)"""
});
test({
SUCCESS,
"string w/ max length",
R"""({
"type": "string",
"maxLength": 3
})""",
R"""(
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
root ::= "\"" (char (char (char)?)?)? "\"" space
space ::= " "?
)"""
});
test({
SUCCESS,
"string w/ min & max length",
R"""({
"type": "string",
"minLength": 1,
"maxLength": 4
})""",
R"""(
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
root ::= "\"" char (char (char (char)?)?)? "\"" space
space ::= " "? space ::= " "?
)""" )"""
}); });
@ -175,7 +233,8 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"type": "integer" "type": "integer"
})""", })""",
R"""( R"""(
root ::= ("-"? ([0-9] | [1-9] [0-9]*)) space integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
root ::= ("-"? integral-part) space
space ::= " "? space ::= " "?
)""" )"""
}); });
@ -223,12 +282,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"prefixItems": [{ "type": "string" }] "prefixItems": [{ "type": "string" }]
})""", })""",
R"""( R"""(
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
root ::= "[" space string "]" space root ::= "[" space string "]" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -239,13 +296,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"prefixItems": [{ "type": "string" }, { "type": "number" }] "prefixItems": [{ "type": "string" }, { "type": "number" }]
})""", })""",
R"""( R"""(
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "[" space string "," space number "]" space root ::= "[" space string "," space number "]" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -256,7 +313,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"type": "number" "type": "number"
})""", })""",
R"""( R"""(
root ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
space ::= " "? space ::= " "?
)""" )"""
}); });
@ -272,7 +331,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""", })""",
R"""( R"""(
boolean ::= ("true" | "false") space boolean ::= ("true" | "false") space
root ::= "[" space boolean ( "," space boolean )( "," space boolean )* "]" space root ::= "[" space boolean "," space boolean ("," space boolean)* "]" space
space ::= " "? space ::= " "?
)""" )"""
}); });
@ -320,10 +379,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"maxItems": 5 "maxItems": 5
})""", })""",
R"""( R"""(
integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integer ::= ("-"? integral-part) space
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
item ::= number | integer item ::= number | integer
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space root ::= "[" space item "," space item "," space item ("," space item ("," space item)?)? "]" space
space ::= " "? space ::= " "?
)""" )"""
}); });
@ -372,11 +433,11 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"regexp", "regexp",
R"""({ R"""({
"type": "string", "type": "string",
"pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} and...$" "pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} a{3,5}nd...$"
})""", })""",
R"""( R"""(
dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF] dot ::= [^\x0A\x0D]
root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space root ::= "\"" ("(" root-1 (root-1 (root-1)?)? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " " "aaa" ("a" ("a")?)? "nd" dot dot dot "\"" space
root-1 ::= [0-9] root-1 ::= [0-9]
space ::= " "? space ::= " "?
)""" )"""
@ -404,12 +465,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
a-kv ::= "\"a\"" space ":" space string a-kv ::= "\"a\"" space ":" space string
b-kv ::= "\"b\"" space ":" space string b-kv ::= "\"b\"" space ":" space string
c-kv ::= "\"c\"" space ":" space string c-kv ::= "\"c\"" space ":" space string
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -426,12 +485,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""", })""",
R"""( R"""(
a-kv ::= "\"a\"" space ":" space string a-kv ::= "\"a\"" space ":" space string
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
root ::= "{" space (a-kv )? "}" space root ::= "{" space (a-kv )? "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -452,12 +509,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
b-kv ::= "\"b\"" space ":" space string b-kv ::= "\"b\"" space ":" space string
b-rest ::= ( "," space c-kv )? b-rest ::= ( "," space c-kv )?
c-kv ::= "\"c\"" space ":" space string c-kv ::= "\"c\"" space ":" space string
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -478,14 +533,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
a-kv ::= "\"a\"" space ":" space string a-kv ::= "\"a\"" space ":" space string
b-kv ::= "\"b\"" space ":" space string b-kv ::= "\"b\"" space ":" space string
c-kv ::= "\"c\"" space ":" space string c-kv ::= "\"c\"" space ":" space string
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
d-kv ::= "\"d\"" space ":" space string d-kv ::= "\"d\"" space ":" space string
d-rest ::= ( "," space c-kv )? d-rest ::= ( "," space c-kv )?
root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -500,13 +553,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
additional-kv ::= string ":" space additional-value additional-kv ::= string ":" space additional-value
additional-kvs ::= additional-kv ( "," space additional-kv )* additional-kvs ::= additional-kv ( "," space additional-kv )*
additional-value ::= "[" space (number ("," space number)*)? "]" space additional-value ::= "[" space (number ("," space number)*)? "]" space
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space (additional-kvs )? "}" space root ::= "{" space (additional-kvs )? "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -520,16 +573,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""( R"""(
array ::= "[" space ( value ("," space value)* )? "]" space array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space boolean ::= ("true" | "false") space
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
null ::= "null" space null ::= "null" space
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object root ::= object
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] | value ::= object | array | string | number | boolean | null
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
value ::= object | array | string | number | boolean
)""" )"""
}); });
@ -542,16 +595,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""( R"""(
array ::= "[" space ( value ("," space value)* )? "]" space array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space boolean ::= ("true" | "false") space
char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
null ::= "null" space null ::= "null" space
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object root ::= object
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] | value ::= object | array | string | number | boolean | null
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
value ::= object | array | string | number | boolean
)""" )"""
}); });
@ -583,13 +636,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
a-kv ::= "\"a\"" space ":" space number a-kv ::= "\"a\"" space ":" space number
additional-kv ::= string ":" space string additional-kv ::= string ":" space string
additional-kvs ::= additional-kv ( "," space additional-kv )* additional-kvs ::= additional-kv ( "," space additional-kv )*
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -608,13 +661,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
a-rest ::= additional-kvs a-rest ::= additional-kvs
additional-kv ::= string ":" space number additional-kv ::= string ":" space number
additional-kvs ::= additional-kv ( "," space additional-kv )* additional-kvs ::= additional-kv ( "," space additional-kv )*
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -636,13 +689,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
additional-kvs ::= additional-kv ( "," space additional-kv )* additional-kvs ::= additional-kv ( "," space additional-kv )*
b-kv ::= "\"b\"" space ":" space number b-kv ::= "\"b\"" space ":" space number
b-rest ::= additional-kvs b-rest ::= additional-kvs
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -650,9 +703,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
SUCCESS, SUCCESS,
"top-level $ref", "top-level $ref",
R"""({ R"""({
"$ref": "#/definitions/MyType", "$ref": "#/definitions/foo",
"definitions": { "definitions": {
"MyType": { "foo": {
"type": "object", "type": "object",
"properties": { "properties": {
"a": { "a": {
@ -667,14 +720,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
} }
})""", })""",
R"""( R"""(
MyType ::= "{" space MyType-a-kv "}" space char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
MyType-a-kv ::= "\"a\"" space ":" space string foo ::= "{" space foo-a-kv "}" space
root ::= MyType foo-a-kv ::= "\"a\"" space ":" space string
root ::= foo
space ::= " "? space ::= " "?
string ::= "\"" ( string ::= "\"" char* "\"" space
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space
)""" )"""
}); });
@ -701,9 +752,11 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
alternative-1 ::= bar alternative-1 ::= bar
bar ::= "{" space (bar-b-kv )? "}" space bar ::= "{" space (bar-b-kv )? "}" space
bar-b-kv ::= "\"b\"" space ":" space number bar-b-kv ::= "\"b\"" space ":" space number
decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
foo ::= "{" space (foo-a-kv )? "}" space foo ::= "{" space (foo-a-kv )? "}" space
foo-a-kv ::= "\"a\"" space ":" space number foo-a-kv ::= "\"a\"" space ":" space number
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= alternative-0 | alternative-1 root ::= alternative-0 | alternative-1
space ::= " "? space ::= " "?
)""" )"""
@ -745,7 +798,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
c-kv ::= "\"c\"" space ":" space number c-kv ::= "\"c\"" space ":" space number
d-kv ::= "\"d\"" space ":" space number d-kv ::= "\"d\"" space ":" space number
d-rest ::= ( "," space c-kv )? d-rest ::= ( "," space c-kv )?
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
space ::= " "? space ::= " "?
)""" )"""
@ -786,7 +841,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"definitions": {} "definitions": {}
})""", })""",
R"""( R"""(
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)?
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
number- ::= "{" space number-number-kv "}" space number- ::= "{" space number-number-kv "}" space
number-kv ::= "\"number\"" space ":" space number- number-kv ::= "\"number\"" space ":" space number-
number-number ::= "{" space number-number-root-kv "}" space number-number ::= "{" space number-number-root-kv "}" space
@ -816,7 +873,7 @@ int main() {
test_all("Python", [](const TestCase & tc) { test_all("Python", [](const TestCase & tc) {
write("test-json-schema-input.tmp", tc.schema); write("test-json-schema-input.tmp", tc.schema);
tc.verify_status(std::system( tc.verify_status(std::system(
"python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); "python ./examples/json_schema_to_grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
tc.verify(read("test-grammar-output.tmp")); tc.verify(read("test-grammar-output.tmp"));
}); });
} else { } else {